-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #44 from dbt-msft/synapse_support
Add support for Azure Synapse External Tables
- Loading branch information
Showing
13 changed files
with
414 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
{% macro test_sqlserver__equal_rowcount(model) %} | ||
|
||
{% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} | ||
|
||
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} | ||
{%- if not execute -%} | ||
{{ return('') }} | ||
{% endif %} | ||
|
||
with a as ( | ||
|
||
select count(*) as count_a from {{ model.include(database=False) }} | ||
|
||
), | ||
b as ( | ||
|
||
select count(*) as count_b from {{ compare_model.include(database=False) }} | ||
|
||
), | ||
final as ( | ||
|
||
select abs( | ||
(select count_a from a) - | ||
(select count_b from b) | ||
) | ||
as diff_count | ||
|
||
) | ||
|
||
select diff_count from final | ||
|
||
{% endmacro %} | ||
|
||
|
||
{% macro test_sqlserver__equality(model) %} | ||
|
||
|
||
{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} | ||
{%- if not execute -%} | ||
{{ return('') }} | ||
{% endif %} | ||
|
||
-- setup | ||
{%- do dbt_utils._is_relation(model, 'test_equality') -%} | ||
|
||
{#- | ||
If the compare_cols arg is provided, we can run this test without querying the | ||
information schema — this allows the model to be an ephemeral model | ||
-#} | ||
{%- if not kwargs.get('compare_columns', None) -%} | ||
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} | ||
{%- endif -%} | ||
|
||
{% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} | ||
{% set compare_columns = kwargs.get('compare_columns', adapter.get_columns_in_relation(model) | map(attribute='quoted') ) %} | ||
{% set compare_cols_csv = compare_columns | join(', ') %} | ||
|
||
with a as ( | ||
|
||
select * from {{ model.include(database=False) }} | ||
|
||
), | ||
|
||
b as ( | ||
|
||
select * from {{ compare_model.include(database=False) }} | ||
|
||
), | ||
|
||
a_minus_b as ( | ||
|
||
select {{compare_cols_csv}} from a | ||
{{ dbt_utils.except() }} | ||
select {{compare_cols_csv}} from b | ||
|
||
), | ||
|
||
b_minus_a as ( | ||
|
||
select {{compare_cols_csv}} from b | ||
{{ dbt_utils.except() }} | ||
select {{compare_cols_csv}} from a | ||
|
||
), | ||
|
||
unioned as ( | ||
|
||
select * from a_minus_b | ||
union all | ||
select * from b_minus_a | ||
|
||
), | ||
|
||
final as ( | ||
|
||
select (select count(*) from unioned) + | ||
(select abs( | ||
(select count(*) from a_minus_b) - | ||
(select count(*) from b_minus_a) | ||
)) | ||
as count | ||
|
||
) | ||
|
||
select count from final | ||
|
||
{% endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,3 +43,41 @@ | |
{% do run_query(create_external_stage) %} | ||
|
||
{% endmacro %} | ||
|
||
{% macro sqlserver__prep_external() %} | ||
|
||
{% set external_data_source = target.schema ~ '.dbt_external_tables_testing' %} | ||
|
||
{% set create_external_data_source %} | ||
IF EXISTS ( SELECT * FROM sys.external_data_sources WHERE name = '{{external_data_source}}' ) | ||
DROP EXTERNAL DATA SOURCE [{{external_data_source}}]; | ||
|
||
CREATE EXTERNAL DATA SOURCE [{{external_data_source}}] WITH ( | ||
TYPE = HADOOP, | ||
LOCATION = 'wasbs://[email protected]' | ||
) | ||
{% endset %} | ||
|
||
{% set external_file_format = target.schema ~ '.dbt_external_ff_testing' %} | ||
|
||
{% set create_external_file_format %} | ||
IF EXISTS ( SELECT * FROM sys.external_file_formats WHERE name = '{{external_file_format}}' ) | ||
DROP EXTERNAL FILE FORMAT [{{external_file_format}}]; | ||
|
||
CREATE EXTERNAL FILE FORMAT [{{external_file_format}}] | ||
WITH ( | ||
FORMAT_TYPE = DELIMITEDTEXT, | ||
FORMAT_OPTIONS ( | ||
FIELD_TERMINATOR = N',', | ||
FIRST_ROW = 2, | ||
USE_TYPE_DEFAULT = True | ||
) | ||
) | ||
{% endset %} | ||
|
||
{% do log('Creating external data source ' ~ external_data_source, info = true) %} | ||
{% do run_query(create_external_data_source) %} | ||
{% do log('Creating external file format ' ~ external_file_format, info = true) %} | ||
{% do run_query(create_external_file_format) %} | ||
|
||
{% endmacro %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
version: 2 | ||
|
||
sources: | ||
- name: sqlserver_external | ||
schema: "{{ target.schema }}" | ||
loader: ADLSblob | ||
|
||
tables: | ||
|
||
- name: people_csv_unpartitioned | ||
external: &csv-people | ||
location: '/csv' | ||
file_format: "{{ target.schema ~ '.dbt_external_ff_testing' }}" | ||
data_source: "{{ target.schema ~ '.dbt_external_tables_testing' }}" | ||
reject_type: VALUE | ||
reject_value: 0 | ||
ansi_nulls: true | ||
quoted_identifier: true | ||
columns: &cols-of-the-people | ||
- name: id | ||
data_type: int | ||
- name: first_name | ||
data_type: varchar(64) | ||
- name: last_name | ||
data_type: varchar(64) | ||
- name: email | ||
data_type: varchar(64) | ||
tests: &equal-to-the-people | ||
- sqlserver__equality: | ||
compare_model: ref('people') | ||
compare_columns: | ||
- id | ||
- first_name | ||
- last_name | ||
|
||
- name: people_csv_partitioned | ||
external: | ||
<<: *csv-people | ||
# SYNAPSE DOES NOT DO PARTITIONS | ||
# (BUT WE COULD MAKE A WORKAROUND !!!) | ||
# partitions: &parts-of-the-people | ||
# - name: section | ||
# data_type: varchar | ||
# expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" | ||
columns: *cols-of-the-people | ||
# tests: *equal-to-the-people | ||
|
||
# JSON IS NOT SUPPORTED BY SYNAPSE ATM | ||
|
||
# - name: people_json_unpartitioned | ||
# external: &json-people | ||
# location: '@{{ target.schema }}.dbt_external_tables_testing/json' | ||
# file_format: '( type = json )' | ||
# columns: *cols-of-the-people | ||
# tests: *equal-to-the-people | ||
|
||
# - name: people_json_partitioned | ||
# external: | ||
# <<: *json-people | ||
# partitions: *parts-of-the-people | ||
# columns: *cols-of-the-people | ||
# tests: *equal-to-the-people | ||
|
||
# NO COLUMNS BREAKS THINGS CURRENTLY | ||
# just to test syntax | ||
# - name: people_csv_unpartitioned_no_columns | ||
# external: *csv-people | ||
# tests: &same-rowcount | ||
# - sqlserver__equal_rowcount: | ||
# compare_model: ref('people') | ||
|
||
# - name: people_csv_partitioned_no_columns | ||
# external: | ||
# <<: *csv-people | ||
# partitions: *parts-of-the-people | ||
# tests: *same-rowcount | ||
|
||
# - name: people_json_unpartitioned_no_columns | ||
# external: *csv-people | ||
# tests: *same-rowcount | ||
|
||
# - name: people_json_partitioned_no_columns | ||
# external: | ||
# <<: *json-people | ||
# partitions: *parts-of-the-people | ||
# tests: *same-rowcount | ||
|
||
# - name: people_json_multipartitioned_no_columns | ||
# external: | ||
# <<: *json-people | ||
# partitions: | ||
# - name: file_type | ||
# data_type: varchar | ||
# expression: "split_part(metadata$filename, 'section=', 1)" | ||
# - name: section | ||
# data_type: varchar | ||
# expression: "substr(split_part(metadata$filename, 'section=', 2), 1, 1)" | ||
# tests: *same-rowcount |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.