diff --git a/.changes/unreleased/Features-20241015-174143.yaml b/.changes/unreleased/Features-20241015-174143.yaml new file mode 100644 index 00000000..1320ab44 --- /dev/null +++ b/.changes/unreleased/Features-20241015-174143.yaml @@ -0,0 +1,7 @@ +kind: Features +body: Add file_format and table_format configurations +time: 2024-10-15T17:41:43.584728+02:00 +custom: + Author: damian3031 + Issue: "" + PR: "438" diff --git a/dbt/include/trino/macros/adapters.sql b/dbt/include/trino/macros/adapters.sql index faf72b2d..98bc4161 100644 --- a/dbt/include/trino/macros/adapters.sql +++ b/dbt/include/trino/macros/adapters.sql @@ -58,7 +58,6 @@ {% macro trino__create_csv_table(model, agate_table) %} {%- set column_override = model['config'].get('column_types', {}) -%} {%- set quote_seed_column = model['config'].get('quote_columns', None) -%} - {%- set _properties = config.get('properties') -%} {% set sql %} create table {{ this.render() }} ( @@ -68,7 +67,7 @@ {%- set column_name = (col_name | string) -%} {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%} {%- endfor -%} - ) {{ properties(_properties) }} + ) {{ properties() }} {% endset %} {% call statement('_') -%} @@ -78,10 +77,44 @@ {{ return(sql) }} {% endmacro %} -{% macro properties(properties) %} - {%- if properties is not none -%} +{% macro properties() %} + {%- set _properties = config.get('properties') -%} + {%- set table_format = config.get('table_format') -%} + {%- set file_format = config.get('file_format') -%} + + {%- if file_format -%} + {%- if _properties -%} + {%- if _properties.format -%} + {% set msg %} + You can specify either 'file_format' or 'properties.format' configurations, but not both. + {% endset %} + {% do exceptions.raise_compiler_error(msg) %} + {%- else -%} + {%- do _properties.update({'format': file_format}) -%} + {%- endif -%} + {%- else -%} + {%- set _properties = {'format': file_format} -%} + {%- endif -%} + {%- endif -%} + + {%- if table_format -%} + {%- if _properties -%} + {%- if _properties.type -%} + {% set msg %} + You can specify either 'table_format' or 'properties.type' configurations, but not both. + {% endset %} + {% do exceptions.raise_compiler_error(msg) %} + {%- else -%} + {%- do _properties.update({'type': table_format}) -%} + {%- endif -%} + {%- else -%} + {%- set _properties = {'type': table_format} -%} + {%- endif -%} + {%- endif -%} + + {%- if _properties is not none -%} WITH ( - {%- for key, value in properties.items() -%} + {%- for key, value in _properties.items() -%} {{ key }} = {{ value }} {%- if not loop.last -%}{{ ',\n ' }}{%- endif -%} {%- endfor -%} @@ -100,7 +133,6 @@ {%- endmacro -%} {% macro trino__create_table_as(temporary, relation, sql, replace=False) -%} - {%- set _properties = config.get('properties') -%} {%- if replace -%} {%- set or_replace = ' or replace' -%} @@ -117,7 +149,7 @@ {{ get_assert_columns_equivalent(sql) }} {%- set sql = get_select_subquery(sql) %} {{ comment(model.get('description')) }} - {{ properties(_properties) }} + {{ properties() }} ; insert into {{ relation }} @@ -130,7 +162,7 @@ create{{ or_replace }} table {{ relation }} {{ comment(model.get('description')) }} - {{ properties(_properties) }} + {{ properties() }} as ( {{ sql }} ); diff --git a/dbt/include/trino/macros/materializations/materialized_view.sql b/dbt/include/trino/macros/materializations/materialized_view.sql index 6bee70bb..d85aabdb 100644 --- a/dbt/include/trino/macros/materializations/materialized_view.sql +++ b/dbt/include/trino/macros/materializations/materialized_view.sql @@ -1,7 +1,6 @@ {%- macro trino__get_create_materialized_view_as_sql(target_relation, sql) -%} - {%- set _properties = config.get('properties') -%} create materialized view {{ target_relation }} - {{ properties(_properties) }} + {{ properties() }} as {{ sql }} ; diff --git a/tests/functional/adapter/test_table_properties.py b/tests/functional/adapter/test_table_properties.py new file mode 100644 index 00000000..4954d45c --- /dev/null +++ b/tests/functional/adapter/test_table_properties.py @@ -0,0 +1,165 @@ +import pytest +from dbt.tests.util import run_dbt, run_dbt_and_capture + +from tests.functional.adapter.materialization.fixtures import model_sql, seed_csv + + +class BaseTableProperties: + # Everything that goes in the "seeds" directory + @pytest.fixture(scope="class") + def seeds(self): + return { + "seed.csv": seed_csv, + } + + # Everything that goes in the "models" directory + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_sql, + } + + +@pytest.mark.iceberg +class TestTableProperties(BaseTableProperties): + # Configuration in dbt_project.yml + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "properties_test", + "models": { + "+materialized": "table", + "+properties": { + "format": "'PARQUET'", + "format_version": "2", + }, + }, + } + + def test_table_properties(self, project): + # Seed seed + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + + # Create model with properties + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert len(results) == 1 + assert "WITH (" in logs + assert "format = 'PARQUET'" in logs + assert "format_version = 2" in logs + + +@pytest.mark.iceberg +class TestFileFormatConfig(BaseTableProperties): + # Configuration in dbt_project.yml + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "properties_test", + "models": { + "+materialized": "table", + "file_format": "'PARQUET'", + }, + } + + def test_table_properties(self, project): + # Seed seed + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + + # Create model with properties + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert len(results) == 1 + assert "WITH (" in logs + assert "format = 'PARQUET'" in logs + + +@pytest.mark.iceberg +class TestFileFormatConfigAndFormatTablePropertyFail(BaseTableProperties): + # Configuration in dbt_project.yml + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "properties_test", + "models": { + "+materialized": "table", + "+properties": { + "format": "'PARQUET'", + }, + "file_format": "'ORC'", + }, + } + + def test_table_properties(self, project): + # Seed seed + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + + # Create model with properties + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=False) + assert len(results) == 1 + assert ( + "You can specify either 'file_format' or 'properties.format' configurations, but not both." + in logs + ) + + +@pytest.mark.hive +# Setting `type` property is available only in Starburst Galaxy +# https://docs.starburst.io/starburst-galaxy/data-engineering/working-with-data-lakes/table-formats/gl-iceberg.html +@pytest.mark.skip_profile("trino_starburst") +class TestTableFormatConfig(BaseTableProperties): + # Configuration in dbt_project.yml + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "properties_test", + "models": { + "+materialized": "table", + "table_format": "'iceberg'", + }, + } + + def test_table_properties(self, project): + # Seed seed + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + + # Create model with properties + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=True) + assert len(results) == 1 + assert "WITH (" in logs + assert "type = 'iceberg'" in logs + + +@pytest.mark.hive +# Setting `type` property is available only in Starburst Galaxy +# https://docs.starburst.io/starburst-galaxy/data-engineering/working-with-data-lakes/table-formats/gl-iceberg.html +@pytest.mark.skip_profile("trino_starburst") +class TestTableFormatConfigAndTypeTablePropertyFail(BaseTableProperties): + # Configuration in dbt_project.yml + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "name": "properties_test", + "models": { + "+materialized": "table", + "+properties": { + "type": "'iceberg'", + }, + "table_format": "'iceberg'", + }, + } + + def test_table_properties(self, project): + # Seed seed + results = run_dbt(["seed"], expect_pass=True) + assert len(results) == 1 + + # Create model with properties + results, logs = run_dbt_and_capture(["--debug", "run"], expect_pass=False) + assert len(results) == 1 + assert ( + "You can specify either 'table_format' or 'properties.type' configurations, but not both." + in logs + )