ToucanToco · davinov · Dec 15, 2023 · Dec 15, 2023
@@ -14,8 +14,8 @@ repos:
     language: system
 
   - id: system
-    name: Lint with Black
-    entry: black
+    name: Lint with Ruff format
+    entry: ruff format
     types: [python]
     language: system
 

@@ -1,6 +1,6 @@
 .DEFAULT_GOAL := all
 ruff = ruff src tests playground.py
-black = black src tests playground.py
+format = ruff format src tests playground.py
 
 .PHONY: clean
 clean:
@@ -24,12 +24,12 @@ install-playground:
 .PHONY: format
 format:
 	poetry run $(ruff) --fix
-	poetry run $(black)
+	poetry run $(format)
 
 .PHONY: lint
 lint:
 	$(ruff)
-	$(black) --check
+	$(format) --check
 	mypy
 
 .PHONY: test

@@ -122,14 +122,8 @@ class ColumnType(str, Enum):
 
 DOMAINS = {
     **{splitext(basename(csv_file))[0]: pd.read_csv(csv_file) for csv_file in csv_files},
-    **{
-        splitext(basename(json_file))[0]: pd.read_json(json_file, orient="table")
-        for json_file in json_files
-    },
-    **{
-        splitext(basename(geojson_file))[0]: gpd.read_file(geojson_file)
-        for geojson_file in geojson_files
-    },
+    **{splitext(basename(json_file))[0]: pd.read_json(json_file, orient="table") for json_file in json_files},
+    **{splitext(basename(geojson_file))[0]: gpd.read_file(geojson_file) for geojson_file in geojson_files},
 }
 
 
@@ -140,9 +134,7 @@ def get_available_domains():
 def sanitize_table_schema(schema: dict) -> dict:
     return {
         "fields": [
-            {"name": field["name"], "type": "geometry"}
-            if field.get("extDtype") == "geometry"
-            else field
+            {"name": field["name"], "type": "geometry"} if field.get("extDtype") == "geometry" else field
             for field in schema["fields"]
         ]
     }
@@ -467,11 +459,7 @@ def get_table_columns():
         for table in tables_info:
             with suppress(Exception):
                 table_name = table[1]
-                infos = (
-                    _SNOWFLAKE_CONNECTION.cursor()
-                    .execute(f'DESCRIBE TABLE "{table_name}";')
-                    .fetchall()
-                )
+                infos = _SNOWFLAKE_CONNECTION.cursor().execute(f'DESCRIBE TABLE "{table_name}";').fetchall()
                 tables_columns[table_name] = [info[0] for info in infos if info[2] == "COLUMN"]
         return tables_columns
 
@@ -497,11 +485,7 @@ async def handle_snowflake_backend_request():
                 tables_columns=tables_columns,
             )
 
-            total_count = (
-                _SNOWFLAKE_CONNECTION.cursor()
-                .execute(f"SELECT COUNT(*) FROM ({query})")
-                .fetchone()[0]
-            )
+            total_count = _SNOWFLAKE_CONNECTION.cursor().execute(f"SELECT COUNT(*) FROM ({query})").fetchone()[0]
             # By using snowflake's connector ability to turn results into a DataFrame,
             # we can re-use all the methods to parse this data- interchange format in the front-end
             df_results = (
@@ -558,16 +542,12 @@ def postgresql_type_to_data_type(pg_type: str) -> ColumnType | None:
 @app.route("/postgresql", methods=["GET", "POST"])
 async def handle_postgres_backend_request():
     # improve by using a connexion pool
-    postgresql_connexion = await psycopg.AsyncConnection.connect(
-        os.getenv("POSTGRESQL_CONNECTION_STRING")
-    )
+    postgresql_connexion = await psycopg.AsyncConnection.connect(os.getenv("POSTGRESQL_CONNECTION_STRING"))
     db_schema = "public"
 
     if request.method == "GET":
         async with postgresql_connexion.cursor() as cur:
-            tables_info_exec = await cur.execute(
-                f"SELECT * FROM pg_catalog.pg_tables WHERE schemaname='{db_schema}';"
-            )
+            tables_info_exec = await cur.execute(f"SELECT * FROM pg_catalog.pg_tables WHERE schemaname='{db_schema}';")
             tables_info = await tables_info_exec.fetchall()
             return jsonify([table_infos[1] for table_infos in tables_info])
 
@@ -598,9 +578,7 @@ async def handle_postgres_backend_request():
         )
 
         async with postgresql_connexion.cursor() as cur:
-            query_total_count_exec = await cur.execute(
-                f"WITH Q AS ({sql_query}) SELECT COUNT(*) FROM Q"
-            )
+            query_total_count_exec = await cur.execute(f"WITH Q AS ({sql_query}) SELECT COUNT(*) FROM Q")
             # fetchone() returns a tuple
             query_total_count = (await query_total_count_exec.fetchone())[0]
 
@@ -623,9 +601,7 @@ async def handle_postgres_backend_request():
             query_results_columns = [
                 {
                     "name": c.name,
-                    "type": [
-                        postgresql_type_to_data_type(t[1]) for t in types if t[0] == c.type_code
-                    ][0],
+                    "type": [postgresql_type_to_data_type(t[1]) for t in types if t[0] == c.type_code][0],
                 }
                 for c in query_results_desc
             ]
@@ -681,10 +657,7 @@ async def handle_athena_post_request():
 
     # Find all columns for all available tables
     table_info = _athena_table_info()
-    tables_columns = {
-        row["Table"]: [c.strip() for c in row["Columns"].split(",")]
-        for _, row in table_info.iterrows()
-    }
+    tables_columns = {row["Table"]: [c.strip() for c in row["Columns"].split(",")] for _, row in table_info.iterrows()}
 
     sql_query = (
         pypika_translate_pipeline(
@@ -731,10 +704,7 @@ def _bigquery_tables_list(client: bigquery.Client) -> list[str]:
 
 
 def _bigquery_tables_info(client: bigquery.Client) -> dict[str, list[str]]:
-    return {
-        table: [field.name for field in client.get_table(table).schema]
-        for table in _bigquery_tables_list(client)
-    }
+    return {table: [field.name for field in client.get_table(table).schema] for table in _bigquery_tables_list(client)}
 
 
 @app.get("/google-big-query")

@@ -42,7 +42,6 @@ snowflake-sqlalchemy = "^1.5.0"
 types-python-dateutil = "^2.8.19"
 pytest = "^7.4.1"
 pytest-xdist = ">=2.5,<4.0"
-black = "^23.7.0"
 mypy = ">=0.990,<2"
 docker = "^6.1.3"
 sqlalchemy = "^1.4.49"
@@ -62,10 +61,6 @@ all = ["pandas", "geopandas", "pypika"]
 # playground
 playground = ["quart", "Quart-CORS", "hypercorn", "pymongo", "pandas", "psycopg", "toucan-connectors"]
 
-[tool.black]
-line-length = 100
-target-version = ["py310"]
-
 [tool.mypy]
 files = "src/"
 exclude = "weaverbird/backends/sql_translator"

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/addmissingdates.py b/server/src/weaverbird/backends/mongo_translator/steps/addmissingdates.py
@@ -99,9 +99,7 @@ def _add_missing_dates_day_or_month(step: AddMissingDatesStep) -> list[MongoStep
                     # use the variable in the following expression, in which we recreate a date which granularity will
                     # depend on the user-specified granularity
                     "in": {
-                        "$dateFromParts": _generate_date_from_parts(
-                            "$$currentDay", step.dates_granularity
-                        ),
+                        "$dateFromParts": _generate_date_from_parts("$$currentDay", step.dates_granularity),
                     },
                 },
             },
@@ -132,9 +130,7 @@ def _add_missing_dates_day_or_month(step: AddMissingDatesStep) -> list[MongoStep
     add_missing_dates = {
         "$map": {
             # loop over unique dates array
-            "input": all_days_range
-            if step.dates_granularity == "day"
-            else unique_days_for_month_granularity,
+            "input": all_days_range if step.dates_granularity == "day" else unique_days_for_month_granularity,
             # use a variable "date" as cursor
             "as": "date",
             # and apply the following expression to every "date"
@@ -167,9 +163,7 @@ def _add_missing_dates_day_or_month(step: AddMissingDatesStep) -> list[MongoStep
         {
             "$addFields": {
                 "_vqbDay": {
-                    "$dateFromParts": _generate_date_from_parts(
-                        f"${step.dates_column}", step.dates_granularity
-                    ),
+                    "$dateFromParts": _generate_date_from_parts(f"${step.dates_column}", step.dates_granularity),
                 },
             },
         },
@@ -200,9 +194,7 @@ def _add_missing_dates_day_or_month(step: AddMissingDatesStep) -> list[MongoStep
 
 def translate_addmissingdates(step: AddMissingDatesStep) -> list[MongoStep]:
     return (
-        _add_missing_dates_year(step)
-        if step.dates_granularity == "year"
-        else _add_missing_dates_day_or_month(step)
+        _add_missing_dates_year(step) if step.dates_granularity == "year" else _add_missing_dates_day_or_month(step)
     ) + [
         # Get back to 1 row per document
         {"$unwind": "$_vqbAllDates"},

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/cumsum.py b/server/src/weaverbird/backends/mongo_translator/steps/cumsum.py
@@ -19,9 +19,7 @@ def translate_cumsum(step: CumSumStep) -> list[MongoStep]:
             "$project": {
                 **{col: f"$_id.{col}" for col in groupby},
                 **{
-                    new_name
-                    if new_name
-                    else f"{name}_CUMSUM": {
+                    new_name if new_name else f"{name}_CUMSUM": {
                         "$sum": {"$slice": [f"${name}", {"$add": ["$_VQB_INDEX", 1]}]}
                     }
                     for name, new_name in step.to_cumsum

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/date_extract.py b/server/src/weaverbird/backends/mongo_translator/steps/date_extract.py
@@ -299,9 +299,7 @@ def translate_date_extract(step: DateExtractStep) -> list[MongoStep]:
     # For retrocompatibility
     if step.operation:
         date_info = [step.operation] if step.operation else step.date_info
-        new_columns = [
-            step.new_column_name if step.new_column_name else f"{step.column}_{step.operation}"
-        ]
+        new_columns = [step.new_column_name if step.new_column_name else f"{step.column}_{step.operation}"]
     else:
         date_info = step.date_info.copy()
         new_columns = step.new_columns.copy()

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/evolution.py b/server/src/weaverbird/backends/mongo_translator/steps/evolution.py
@@ -5,11 +5,7 @@
 
 
 def translate_evolution(step: EvolutionStep) -> list[MongoStep]:
-    new_column = (
-        step.new_column
-        if step.new_column
-        else f"{step.value_col}_EVOL_{step.evolution_format.upper()}"
-    )
+    new_column = step.new_column if step.new_column else f"{step.value_col}_EVOL_{step.evolution_format.upper()}"
     error_msg = "Error: More than one previous date found for the specified index columns"
     add_field_result: dict[str, Any] = {}
 
@@ -56,9 +52,7 @@ def translate_evolution(step: EvolutionStep) -> list[MongoStep]:
         {
             "$facet": {
                 "_VQB_ORIGINALS": [{"$project": {"_id": 0}}],
-                "_VQB_COPIES_ARRAY": [
-                    {"$group": {"_id": None, "_VQB_ALL_DOCS": {"$push": "$$ROOT"}}}
-                ],
+                "_VQB_COPIES_ARRAY": [{"$group": {"_id": None, "_VQB_ALL_DOCS": {"$push": "$$ROOT"}}}],
             },
         },
         {"$unwind": "$_VQB_ORIGINALS"},

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/join.py b/server/src/weaverbird/backends/mongo_translator/steps/join.py
@@ -29,9 +29,7 @@ def translate_join(step: JoinStep) -> list[MongoStep]:
             right_without_domain.steps = [s.copy(deep=True) for s in right[1:]]
         else:
             right_domain = DomainStep(**right[0])
-            right_without_domain.steps = [
-                getattr(steps, f"{s['name'].capitalize()}Step")(**s) for s in right[1:]
-            ]
+            right_without_domain.steps = [getattr(steps, f"{s['name'].capitalize()}Step")(**s) for s in right[1:]]
 
     mongo_let: dict[str, str] = {}
     mongo_expr_and: list[dict[str, list[str]]] = []
@@ -57,14 +55,10 @@ def translate_join(step: JoinStep) -> list[MongoStep]:
     if step.type == "inner":
         mongo_pipeline.append({"$unwind": "$_vqbJoinKey"})
     elif step.type == "left":
-        mongo_pipeline.append(
-            {"$unwind": {"path": "$_vqbJoinKey", "preserveNullAndEmptyArrays": True}}
-        )
+        mongo_pipeline.append({"$unwind": {"path": "$_vqbJoinKey", "preserveNullAndEmptyArrays": True}})
     else:
         mongo_pipeline.append({"$match": {"_vqbJoinKey": {"$eq": []}}})
-        mongo_pipeline.append(
-            {"$unwind": {"path": "$_vqbJoinKey", "preserveNullAndEmptyArrays": True}}
-        )
+        mongo_pipeline.append({"$unwind": {"path": "$_vqbJoinKey", "preserveNullAndEmptyArrays": True}})
 
     mongo_pipeline.append(
         {

diff --git a/server/src/weaverbird/backends/mongo_translator/steps/moving_average.py b/server/src/weaverbird/backends/mongo_translator/steps/moving_average.py
@@ -26,9 +26,7 @@ def translate_moving_average(step: MovingAverageStep) -> list[MongoStep]:
                         "in": {
                             "$cond": [
                                 # If the index is less than the moving window minus 1...
-                                {
-                                    "$lt": ["$$idx", (step.moving_window) - 1]
-                                },  # explicit type for typescript
+                                {"$lt": ["$$idx", (step.moving_window) - 1]},  # explicit type for typescript
                                 # ... then we cannot apply the moving average computation, and
                                 # we just keep the original document without any new field...
                                 {"$arrayElemAt": ["$_vqbArray", "$$idx"]},
@@ -40,8 +38,7 @@ def translate_moving_average(step: MovingAverageStep) -> list[MongoStep]:
                                         {"$arrayElemAt": ["$_vqbArray", "$$idx"]},
                                         # and add the new moving average column
                                         {
-                                            step.new_column_name
-                                            or f"{step.value_column}_MOVING_AVG": {
+                                            step.new_column_name or f"{step.value_column}_MOVING_AVG": {
                                                 "$avg": {
                                                     "$slice": [
                                                         f"$_vqbArray.{step.value_column}",