From 2ff33b2bf04b7c1f505ef420478d31a1a7c7d257 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Fri, 17 Jan 2025 15:08:04 +0000 Subject: [PATCH 1/3] Add deployment related attributes Signed-off-by: Ankita Katiyar --- kedro/pipeline/pipeline.py | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py index 826acd1b13..8f14bd0c14 100644 --- a/kedro/pipeline/pipeline.py +++ b/kedro/pipeline/pipeline.py @@ -369,6 +369,46 @@ def grouped_nodes(self) -> list[list[Node]]: return [list(group) for group in self._toposorted_groups] + @property + def grouped_by_namespace(self): + """Return a dictionary of the pipeline nodes grouped by namespace. + + Returns: + The pipeline nodes grouped by namespace. + """ + nodes_by_namespace = defaultdict(list) + for node in self.nodes: + if node.namespace: + nodes_by_namespace[node.namespace].append(node) + else: + nodes_by_namespace[node.name].append(node) + return nodes_by_namespace + + @property + def node_dependencies_by_namespace(self): + """Return a dictionary of the pipeline nodes dependencies grouped by namespace. + + Returns: + The pipeline nodes dependencies grouped by namespace. + """ + node_dependencies_by_namespace = defaultdict(dict) + for node in self.nodes: + key = node.namespace if node.namespace else node.name + for parent in self.node_dependencies[node]: + if key not in node_dependencies_by_namespace: + node_dependencies_by_namespace[key] = [] + if parent.namespace and parent.namespace != key: + node_dependencies_by_namespace[key].append(parent.namespace) + elif parent.namespace and parent.namespace == key: + continue + else: + node_dependencies_by_namespace[key].append(parent.name) + + node_dependencies_by_namespace = { + key: set(value) for key, value in node_dependencies_by_namespace.items() + } + return node_dependencies_by_namespace + def only_nodes(self, *node_names: str) -> Pipeline: """Create a new ``Pipeline`` which will contain only the specified nodes by name. From ef4d664e4c1946363a3883bb528acc68e0a4dd8c Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Wed, 29 Jan 2025 13:49:06 +0000 Subject: [PATCH 2/3] Update with feedback Signed-off-by: Ankita Katiyar --- kedro/pipeline/pipeline.py | 46 +++++++++++++------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py index 8f14bd0c14..42ce40b63e 100644 --- a/kedro/pipeline/pipeline.py +++ b/kedro/pipeline/pipeline.py @@ -370,44 +370,28 @@ def grouped_nodes(self) -> list[list[Node]]: return [list(group) for group in self._toposorted_groups] @property - def grouped_by_namespace(self): - """Return a dictionary of the pipeline nodes grouped by namespace. - - Returns: - The pipeline nodes grouped by namespace. - """ - nodes_by_namespace = defaultdict(list) - for node in self.nodes: - if node.namespace: - nodes_by_namespace[node.namespace].append(node) - else: - nodes_by_namespace[node.name].append(node) - return nodes_by_namespace - - @property - def node_dependencies_by_namespace(self): - """Return a dictionary of the pipeline nodes dependencies grouped by namespace. - - Returns: - The pipeline nodes dependencies grouped by namespace. - """ - node_dependencies_by_namespace = defaultdict(dict) + def grouped_nodes_by_namespace(self) -> dict[str, dict[str, Any]]: + grouped_nodes: dict[str, dict[str, Any]] = defaultdict(dict) for node in self.nodes: - key = node.namespace if node.namespace else node.name + key = node.namespace or node.name + if key not in grouped_nodes: + grouped_nodes[key] = {} + grouped_nodes[key]["name"] = key + grouped_nodes[key]["type"] = "namespace" if node.namespace else "node" + grouped_nodes[key]["nodes"] = [*grouped_nodes[key].get("nodes", []), node] + deps = set() for parent in self.node_dependencies[node]: - if key not in node_dependencies_by_namespace: - node_dependencies_by_namespace[key] = [] if parent.namespace and parent.namespace != key: - node_dependencies_by_namespace[key].append(parent.namespace) + deps.add(parent.namespace) elif parent.namespace and parent.namespace == key: continue else: - node_dependencies_by_namespace[key].append(parent.name) + deps.add(parent.name) + grouped_nodes[key]["dependencies"] = ( + grouped_nodes[key].get("dependencies", set()) | deps + ) - node_dependencies_by_namespace = { - key: set(value) for key, value in node_dependencies_by_namespace.items() - } - return node_dependencies_by_namespace + return grouped_nodes def only_nodes(self, *node_names: str) -> Pipeline: """Create a new ``Pipeline`` which will contain only the specified From a299ef36304a69bf753e81329332bbff6658ac85 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Fri, 31 Jan 2025 16:48:28 +0000 Subject: [PATCH 3/3] Minor formatting Signed-off-by: Ankita Katiyar --- kedro/pipeline/pipeline.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py index 42ce40b63e..ff032fbf69 100644 --- a/kedro/pipeline/pipeline.py +++ b/kedro/pipeline/pipeline.py @@ -371,6 +371,8 @@ def grouped_nodes(self) -> list[list[Node]]: @property def grouped_nodes_by_namespace(self) -> dict[str, dict[str, Any]]: + """Return a dictionary of the pipeline nodes grouped by namespace with + information about the nodes, their type, and dependencies.""" grouped_nodes: dict[str, dict[str, Any]] = defaultdict(dict) for node in self.nodes: key = node.namespace or node.name @@ -379,18 +381,17 @@ def grouped_nodes_by_namespace(self) -> dict[str, dict[str, Any]]: grouped_nodes[key]["name"] = key grouped_nodes[key]["type"] = "namespace" if node.namespace else "node" grouped_nodes[key]["nodes"] = [*grouped_nodes[key].get("nodes", []), node] - deps = set() + dependencies = set() for parent in self.node_dependencies[node]: if parent.namespace and parent.namespace != key: - deps.add(parent.namespace) + dependencies.add(parent.namespace) elif parent.namespace and parent.namespace == key: continue else: - deps.add(parent.name) + dependencies.add(parent.name) grouped_nodes[key]["dependencies"] = ( - grouped_nodes[key].get("dependencies", set()) | deps + grouped_nodes[key].get("dependencies", set()) | dependencies ) - return grouped_nodes def only_nodes(self, *node_names: str) -> Pipeline: