From a78514fdf00a45a64e21e613edafdcf39bc8d905 Mon Sep 17 00:00:00 2001 From: z3z1ma Date: Thu, 2 Jan 2025 23:08:49 -0700 Subject: [PATCH] feat: allow setting sort-by to choose alphabetical yaml col sorting on a per node/directory basis --- pyproject.toml | 2 +- src/dbt_osmosis/cli/main.py | 6 +++--- src/dbt_osmosis/core/osmosis.py | 33 +++++++++++++++++++++++++++++---- uv.lock | 2 +- 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7ca0de5..d5df3ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "dbt-osmosis" -version = "1.1.3" +version = "1.1.4" description = "A dbt utility for managing YAML to make developing with dbt more delightful." readme = "README.md" license = { text = "Apache-2.0" } diff --git a/src/dbt_osmosis/cli/main.py b/src/dbt_osmosis/cli/main.py index 717b0f2..896fc35 100644 --- a/src/dbt_osmosis/cli/main.py +++ b/src/dbt_osmosis/cli/main.py @@ -25,7 +25,7 @@ inherit_upstream_column_knowledge, inject_missing_columns, remove_columns_not_in_database, - sort_columns_as_in_database, + sort_columns_as_configured, sync_node_to_yaml, synchronize_data_types, synthesize_missing_documentation_with_openai, @@ -259,7 +259,7 @@ def refactor( inject_missing_columns(context=context) remove_columns_not_in_database(context=context) inherit_upstream_column_knowledge(context=context) - sort_columns_as_in_database(context=context) + sort_columns_as_configured(context=context) synchronize_data_types(context=context) if synthesize: synthesize_missing_documentation_with_openai(context=context) @@ -432,7 +432,7 @@ def document( inject_missing_columns(context=context) inherit_upstream_column_knowledge(context=context) - sort_columns_as_in_database(context=context) + sort_columns_as_configured(context=context) if synthesize: synthesize_missing_documentation_with_openai(context=context) sync_node_to_yaml(context=context) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index 36fda17..76637da 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -88,6 +88,7 @@ "remove_columns_not_in_database", "sort_columns_as_in_database", "sort_columns_alphabetically", + "sort_columns_as_configured", "synchronize_data_types", ] @@ -876,6 +877,9 @@ def process_column(col: BaseColumn | ColumnMetadata): return normalized_cols +# TODO: instead of getting specific keys, perhaps we get a NodeConfigContext object scoped to a node / node+column +# and internally the __getitem__ or similar handles the complex resolution of keys (under the hood, we can +# probably use a ChainMap) def _get_setting_for_node( opt: str, /, @@ -1900,6 +1904,27 @@ def sort_columns_alphabetically( node.columns = {k: v for k, v in sorted(node.columns.items(), key=lambda i: i[0])} +def sort_columns_as_configured( + context: YamlRefactorContext, node: ResultNode | None = None +) -> None: + if node is None: + logger.info(":wave: Sorting columns alphabetically across all matched nodes.") + for _ in context.pool.map( + partial(sort_columns_alphabetically, context), + (n for _, n in _iter_candidate_nodes(context)), + ): + ... + return + logger.info(":alphabet_white: Sorting columns alphabetically => %s", node.unique_id) + sort_by = _get_setting_for_node("sort-by", node, fallback="database") + if sort_by == "database": + sort_columns_as_in_database(context, node) + elif sort_by == "alphabetical": + sort_columns_alphabetically(context, node) + else: + raise ValueError(f"Invalid sort-by value: {sort_by} for node: {node.unique_id}") + + def synchronize_data_types(context: YamlRefactorContext, node: ResultNode | None = None) -> None: """Populate data types for columns in a dbt node and it's corresponding yaml section. Changes are implicitly buffered until commit_yamls is called.""" if node is None: @@ -2004,16 +2029,16 @@ def synthesize_missing_documentation_with_openai( table_name=node.relation_name or node.name, upstream_docs=upstream_docs, ) - for column_name, col in node.columns.items(): - if not col.description or col.description in context.placeholders: + for column_name, column in node.columns.items(): + if not column.description or column.description in context.placeholders: logger.info( ":robot: Synthesizing documentation for column => %s in node => %s", column_name, node.unique_id, ) - col.description = generate_column_doc( + column.description = generate_column_doc( column_name, - existing_context=f"DataType={col.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}", + existing_context=f"DataType={column.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}", table_name=node.relation_name or node.name, upstream_docs=upstream_docs, temperature=0.7, diff --git a/uv.lock b/uv.lock index e8b5cec..40b0726 100644 --- a/uv.lock +++ b/uv.lock @@ -391,7 +391,7 @@ wheels = [ [[package]] name = "dbt-osmosis" -version = "1.1.3" +version = "1.1.4" source = { editable = "." } dependencies = [ { name = "click" },