diff --git a/kedro/pipeline/pipeline.py b/kedro/pipeline/pipeline.py index 826acd1b13..ff032fbf69 100644 --- a/kedro/pipeline/pipeline.py +++ b/kedro/pipeline/pipeline.py @@ -369,6 +369,31 @@ def grouped_nodes(self) -> list[list[Node]]: return [list(group) for group in self._toposorted_groups] + @property + def grouped_nodes_by_namespace(self) -> dict[str, dict[str, Any]]: + """Return a dictionary of the pipeline nodes grouped by namespace with + information about the nodes, their type, and dependencies.""" + grouped_nodes: dict[str, dict[str, Any]] = defaultdict(dict) + for node in self.nodes: + key = node.namespace or node.name + if key not in grouped_nodes: + grouped_nodes[key] = {} + grouped_nodes[key]["name"] = key + grouped_nodes[key]["type"] = "namespace" if node.namespace else "node" + grouped_nodes[key]["nodes"] = [*grouped_nodes[key].get("nodes", []), node] + dependencies = set() + for parent in self.node_dependencies[node]: + if parent.namespace and parent.namespace != key: + dependencies.add(parent.namespace) + elif parent.namespace and parent.namespace == key: + continue + else: + dependencies.add(parent.name) + grouped_nodes[key]["dependencies"] = ( + grouped_nodes[key].get("dependencies", set()) | dependencies + ) + return grouped_nodes + def only_nodes(self, *node_names: str) -> Pipeline: """Create a new ``Pipeline`` which will contain only the specified nodes by name.