From a78514fdf00a45a64e21e613edafdcf39bc8d905 Mon Sep 17 00:00:00 2001
From: z3z1ma <butler.alex2010@gmail.com>
Date: Thu, 2 Jan 2025 23:08:49 -0700
Subject: [PATCH] feat: allow setting sort-by to choose alphabetical yaml col
 sorting on a per node/directory basis

---
 pyproject.toml                  |  2 +-
 src/dbt_osmosis/cli/main.py     |  6 +++---
 src/dbt_osmosis/core/osmosis.py | 33 +++++++++++++++++++++++++++++----
 uv.lock                         |  2 +-
 4 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7ca0de5..d5df3ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "dbt-osmosis"
-version = "1.1.3"
+version = "1.1.4"
 description = "A dbt utility for managing YAML to make developing with dbt more delightful."
 readme = "README.md"
 license = { text = "Apache-2.0" }
diff --git a/src/dbt_osmosis/cli/main.py b/src/dbt_osmosis/cli/main.py
index 717b0f2..896fc35 100644
--- a/src/dbt_osmosis/cli/main.py
+++ b/src/dbt_osmosis/cli/main.py
@@ -25,7 +25,7 @@
     inherit_upstream_column_knowledge,
     inject_missing_columns,
     remove_columns_not_in_database,
-    sort_columns_as_in_database,
+    sort_columns_as_configured,
     sync_node_to_yaml,
     synchronize_data_types,
     synthesize_missing_documentation_with_openai,
@@ -259,7 +259,7 @@ def refactor(
     inject_missing_columns(context=context)
     remove_columns_not_in_database(context=context)
     inherit_upstream_column_knowledge(context=context)
-    sort_columns_as_in_database(context=context)
+    sort_columns_as_configured(context=context)
     synchronize_data_types(context=context)
     if synthesize:
         synthesize_missing_documentation_with_openai(context=context)
@@ -432,7 +432,7 @@ def document(
 
     inject_missing_columns(context=context)
     inherit_upstream_column_knowledge(context=context)
-    sort_columns_as_in_database(context=context)
+    sort_columns_as_configured(context=context)
     if synthesize:
         synthesize_missing_documentation_with_openai(context=context)
     sync_node_to_yaml(context=context)
diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py
index 36fda17..76637da 100644
--- a/src/dbt_osmosis/core/osmosis.py
+++ b/src/dbt_osmosis/core/osmosis.py
@@ -88,6 +88,7 @@
     "remove_columns_not_in_database",
     "sort_columns_as_in_database",
     "sort_columns_alphabetically",
+    "sort_columns_as_configured",
     "synchronize_data_types",
 ]
 
@@ -876,6 +877,9 @@ def process_column(col: BaseColumn | ColumnMetadata):
     return normalized_cols
 
 
+# TODO: instead of getting specific keys, perhaps we get a NodeConfigContext object scoped to a node / node+column
+# and internally the __getitem__ or similar handles the complex resolution of keys (under the hood, we can
+# probably use a ChainMap)
 def _get_setting_for_node(
     opt: str,
     /,
@@ -1900,6 +1904,27 @@ def sort_columns_alphabetically(
     node.columns = {k: v for k, v in sorted(node.columns.items(), key=lambda i: i[0])}
 
 
+def sort_columns_as_configured(
+    context: YamlRefactorContext, node: ResultNode | None = None
+) -> None:
+    if node is None:
+        logger.info(":wave: Sorting columns alphabetically across all matched nodes.")
+        for _ in context.pool.map(
+            partial(sort_columns_alphabetically, context),
+            (n for _, n in _iter_candidate_nodes(context)),
+        ):
+            ...
+        return
+    logger.info(":alphabet_white: Sorting columns alphabetically => %s", node.unique_id)
+    sort_by = _get_setting_for_node("sort-by", node, fallback="database")
+    if sort_by == "database":
+        sort_columns_as_in_database(context, node)
+    elif sort_by == "alphabetical":
+        sort_columns_alphabetically(context, node)
+    else:
+        raise ValueError(f"Invalid sort-by value: {sort_by} for node: {node.unique_id}")
+
+
 def synchronize_data_types(context: YamlRefactorContext, node: ResultNode | None = None) -> None:
     """Populate data types for columns in a dbt node and it's corresponding yaml section. Changes are implicitly buffered until commit_yamls is called."""
     if node is None:
@@ -2004,16 +2029,16 @@ def synthesize_missing_documentation_with_openai(
                 table_name=node.relation_name or node.name,
                 upstream_docs=upstream_docs,
             )
-        for column_name, col in node.columns.items():
-            if not col.description or col.description in context.placeholders:
+        for column_name, column in node.columns.items():
+            if not column.description or column.description in context.placeholders:
                 logger.info(
                     ":robot: Synthesizing documentation for column => %s in node => %s",
                     column_name,
                     node.unique_id,
                 )
-                col.description = generate_column_doc(
+                column.description = generate_column_doc(
                     column_name,
-                    existing_context=f"DataType={col.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}",
+                    existing_context=f"DataType={column.data_type or 'unknown'}>\nColumnParent={node.unique_id}\nTableDescription={node.description}",
                     table_name=node.relation_name or node.name,
                     upstream_docs=upstream_docs,
                     temperature=0.7,
diff --git a/uv.lock b/uv.lock
index e8b5cec..40b0726 100644
--- a/uv.lock
+++ b/uv.lock
@@ -391,7 +391,7 @@ wheels = [
 
 [[package]]
 name = "dbt-osmosis"
-version = "1.1.3"
+version = "1.1.4"
 source = { editable = "." }
 dependencies = [
     { name = "click" },