From dcd29004de3327158ca5b3d0569975c22fa2df55 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 13 Oct 2017 16:03:09 -0400 Subject: [PATCH 01/81] make the union macro work with dbt 090 --- macros/sql/union.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/macros/sql/union.sql b/macros/sql/union.sql index 7c50ac6e..35fc015c 100644 --- a/macros/sql/union.sql +++ b/macros/sql/union.sql @@ -9,9 +9,10 @@ {%- set _ = table_columns.update({table: []}) %} - {%- set table_parts = table.split('.') %} + {% set schema = table.schema %} + {% set table_name = table.name %} - {%- set cols = adapter.get_columns_in_table(*table_parts) %} + {%- set cols = adapter.get_columns_in_table(schema, table_name) %} {%- for col in cols -%} {# update the list of columns in this table #} From 50d4147f1677da333e22c3e861f3d75e4ae6370b Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 14 Oct 2017 20:17:08 -0400 Subject: [PATCH 02/81] add pivot macro, fix readme --- README.md | 14 +++++++++++--- macros/sql/pivot.sql | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100644 macros/sql/pivot.sql diff --git a/README.md b/README.md index 5fc873b5..be73b929 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ This macro generates a `select` statement for each field that exists in the `fro Usage: ``` -{{ macro star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} +{{ star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} ``` #### union_tables ([source](macros/sql/union.sql)) @@ -95,7 +95,15 @@ This macro implements an "outer union." The list of tables provided to this macr Usage: ``` -{{ macro union_tables(tables=[ref('table_1', 'table_2')], column_override={"some_field": "varchar(100)"}) }} +{{ union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} +``` + +#### get_column_values ([source](macros/sql/pivot.sql)) +This macro returns the unique values for a column in a given table. + +Usage: +``` +{{ get_column_values(table=ref('users', column='state', max_records=50) }} ``` --- ### Web @@ -104,7 +112,7 @@ This macro extracts a url parameter from a column containing a url. Usage: ``` -{{ macro get_url_parameter(field='page_url', url_parameter='utm_source') }} +{{ get_url_parameter(field='page_url', url_parameter='utm_source') }} ``` ---- diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql new file mode 100644 index 00000000..4a2091b6 --- /dev/null +++ b/macros/sql/pivot.sql @@ -0,0 +1,38 @@ + +{# +This macro fetches the unique values for `column` in the table `source_ref` + +Arguments: + source_ref: A model `ref`, or a schema.table string for the table to query (Required) + column: The column to query for unique values + max_records: If provided, the maximum number of unique records to return (default: none) + +#} + +{% macro get_column_values(source_ref, column, max_records=none) -%} + + {%- call statement('get_column_values', fetch_result=True) %} + + select + {{ column }} as value + + from {{ source_ref }} + group by 1 + order by count(*) desc + + {% if max_records is not none %} + limit {{ max_records }} + {% endif %} + + {%- endcall -%} + + {%- set value_list = load_result('get_column_values') -%} + + {%- if value_list and value_list['data'] -%} + {%- set values = value_list['data'] | map(attribute=0) | list %} + {{ values | tojson }} + {%- else -%} + [] + {%- endif -%} + +{%- endmacro %} From 1e516dc06a4ff56c3c0c51ad550c555a152f6a24 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 14 Oct 2017 20:21:27 -0400 Subject: [PATCH 03/81] fix readme --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index be73b929..e5c3b0e1 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,10 @@ This macro returns the unique values for a column in a given table. Usage: ``` -{{ get_column_values(table=ref('users', column='state', max_records=50) }} +-- Returns a list of the top 50 states in the `users` table +{% states = get_column_values(table=ref('users'), column='state', max_records=50) %} + +... ``` --- ### Web From 04ed19d9238a3424ca6944b98cc46a9fbd09af11 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 14 Oct 2017 20:24:30 -0400 Subject: [PATCH 04/81] s/source_ref/table --- README.md | 2 +- macros/sql/pivot.sql | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e5c3b0e1..4ba55a5e 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ This macro returns the unique values for a column in a given table. Usage: ``` -- Returns a list of the top 50 states in the `users` table -{% states = get_column_values(table=ref('users'), column='state', max_records=50) %} +{% states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} ... ``` diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index 4a2091b6..cdc44eca 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -1,22 +1,21 @@ {# -This macro fetches the unique values for `column` in the table `source_ref` +This macro fetches the unique values for `column` in the table `table` Arguments: - source_ref: A model `ref`, or a schema.table string for the table to query (Required) + table: A model `ref`, or a schema.table string for the table to query (Required) column: The column to query for unique values max_records: If provided, the maximum number of unique records to return (default: none) - #} -{% macro get_column_values(source_ref, column, max_records=none) -%} +{% macro get_column_values(table, column, max_records=none) -%} {%- call statement('get_column_values', fetch_result=True) %} select {{ column }} as value - from {{ source_ref }} + from {{ table }} group by 1 order by count(*) desc From bd87133237a1091fbc76b31fc8d940f81004c237 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 14 Oct 2017 20:30:19 -0400 Subject: [PATCH 05/81] readme updates --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4ba55a5e..30f6803a 100644 --- a/README.md +++ b/README.md @@ -99,12 +99,18 @@ Usage: ``` #### get_column_values ([source](macros/sql/pivot.sql)) -This macro returns the unique values for a column in a given table. +This macro returns the unique values for a column in a given table. NOTE: Using untrusted data to build SQL queries +can lead to [SQL Injection](https://en.wikipedia.org/wiki/SQL_injection). Please only use this macro on trusted datasets, +or sanitize the data before using it in a susequent query. Usage: ``` -- Returns a list of the top 50 states in the `users` table -{% states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} +{% set states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} + +{% for state in states %} + ... +{% endfor %} ... ``` From 9cdc4907fc5849e70122da98c4ee95c52bfdfd4b Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sat, 14 Oct 2017 20:31:01 -0400 Subject: [PATCH 06/81] readme --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 30f6803a..65043c05 100644 --- a/README.md +++ b/README.md @@ -99,9 +99,7 @@ Usage: ``` #### get_column_values ([source](macros/sql/pivot.sql)) -This macro returns the unique values for a column in a given table. NOTE: Using untrusted data to build SQL queries -can lead to [SQL Injection](https://en.wikipedia.org/wiki/SQL_injection). Please only use this macro on trusted datasets, -or sanitize the data before using it in a susequent query. +This macro returns the unique values for a column in a given table. Usage: ``` From fe0de09a32a450fd1c19e6797ff4471ab54b1a21 Mon Sep 17 00:00:00 2001 From: Erin Ogilvy Date: Fri, 20 Oct 2017 12:47:23 -0400 Subject: [PATCH 07/81] nullcheck macro null check string and text column types from input list of column names --- macros/sql/nullcheck | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 macros/sql/nullcheck diff --git a/macros/sql/nullcheck b/macros/sql/nullcheck new file mode 100644 index 00000000..7a7c04c9 --- /dev/null +++ b/macros/sql/nullcheck @@ -0,0 +1,17 @@ +{% macro nullcheck(cols) %} +{%- for col in cols %} + + {% if col.is_string() -%} + + nullif({{col.name}},'') as {{col.name}} + + {%- else -%} + + {{col.name}} + + {%- endif -%} + +{%- if not loop.last -%} , {%- endif -%} + +{%- endfor -%} +{% endmacro %} From cf3758132e172e8d0ddf5be10d2a687d0b83a6c6 Mon Sep 17 00:00:00 2001 From: Erin Ogilvy Date: Fri, 20 Oct 2017 12:49:14 -0400 Subject: [PATCH 08/81] nullcheck_table using nullcheck macro to null check string and text data types from base table --- macros/sql/nullcheck_table | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 macros/sql/nullcheck_table diff --git a/macros/sql/nullcheck_table b/macros/sql/nullcheck_table new file mode 100644 index 00000000..f2ad3d25 --- /dev/null +++ b/macros/sql/nullcheck_table @@ -0,0 +1,8 @@ +{% macro nullcheck_table(schema, table) %} + + {% set cols = adapter.get_columns_in_table(schema, table) %} + + select {{ nullcheck(cols) }} + from {{schema}}.{{table}} + +{% endmacro %} From a1449baa92b814a8e489df650dbc066630eefe1e Mon Sep 17 00:00:00 2001 From: Erin Ogilvy Date: Mon, 23 Oct 2017 09:06:28 -0400 Subject: [PATCH 09/81] Rename nullcheck_table to nullcheck_table.sql adding .sql --- macros/sql/{nullcheck_table => nullcheck_table.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename macros/sql/{nullcheck_table => nullcheck_table.sql} (100%) diff --git a/macros/sql/nullcheck_table b/macros/sql/nullcheck_table.sql similarity index 100% rename from macros/sql/nullcheck_table rename to macros/sql/nullcheck_table.sql From 8ffa0c5b7cef9375d201f0e6700f4c753e4be93e Mon Sep 17 00:00:00 2001 From: Erin Ogilvy Date: Mon, 23 Oct 2017 09:14:36 -0400 Subject: [PATCH 10/81] Rename nullcheck to nullcheck.sql adding .sql --- macros/sql/{nullcheck => nullcheck.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename macros/sql/{nullcheck => nullcheck.sql} (100%) diff --git a/macros/sql/nullcheck b/macros/sql/nullcheck.sql similarity index 100% rename from macros/sql/nullcheck rename to macros/sql/nullcheck.sql From 43a63167cb2ace90ffaae0d23df988e76a946365 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 25 Oct 2017 11:46:53 -0400 Subject: [PATCH 11/81] rename pivot macrol file --- macros/sql/{pivot.sql => get_column_values.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename macros/sql/{pivot.sql => get_column_values.sql} (100%) diff --git a/macros/sql/pivot.sql b/macros/sql/get_column_values.sql similarity index 100% rename from macros/sql/pivot.sql rename to macros/sql/get_column_values.sql From 20556cb330f70cfd5a5469c100616cf127b0b2ff Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 25 Oct 2017 11:47:44 -0400 Subject: [PATCH 12/81] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 65043c05..9a7fd308 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Usage: {{ union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} ``` -#### get_column_values ([source](macros/sql/pivot.sql)) +#### get_column_values ([source](macros/sql/get_column_values.sql)) This macro returns the unique values for a column in a given table. Usage: From 0d1cc44f2bde36e25eec69b106433fdbffda68d5 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 25 Oct 2017 11:53:45 -0400 Subject: [PATCH 13/81] 090 compat --- macros/sql/get_column_values.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index cdc44eca..d312a7e7 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -29,7 +29,7 @@ Arguments: {%- if value_list and value_list['data'] -%} {%- set values = value_list['data'] | map(attribute=0) | list %} - {{ values | tojson }} + {{ tojson(values) }} {%- else -%} [] {%- endif -%} From 2884e9414bc1dcb60b06a9a1417bfe1fa9ce60bf Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 29 Oct 2017 23:14:45 -0400 Subject: [PATCH 14/81] make star macro work with ref or schema.table --- macros/sql/star.sql | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/macros/sql/star.sql b/macros/sql/star.sql index 65f853af..d98ba900 100644 --- a/macros/sql/star.sql +++ b/macros/sql/star.sql @@ -1,8 +1,13 @@ {% macro star(from, except=[]) -%} - {%- set table_parts = from.split('.') %} + {%- if from.name -%} + {%- set schema_name, table_name = from.schema, from.name -%} + {%- else -%} + {%- set schema_name, table_name = (from | string).split(".") -%} + {%- endif -%} + {%- set include_cols = [] %} - {%- set cols = adapter.get_columns_in_table(*table_parts) %} + {%- set cols = adapter.get_columns_in_table(schema_name, table_name) -%} {%- for col in cols -%} {%- if col.column not in except -%} From 32d40e35ebb95451219b8045f7af959a59cc2495 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Sun, 29 Oct 2017 23:21:31 -0400 Subject: [PATCH 15/81] work with ref or schema.table --- macros/sql/union.sql | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/macros/sql/union.sql b/macros/sql/union.sql index 35fc015c..d5261f65 100644 --- a/macros/sql/union.sql +++ b/macros/sql/union.sql @@ -9,8 +9,11 @@ {%- set _ = table_columns.update({table: []}) %} - {% set schema = table.schema %} - {% set table_name = table.name %} + {%- if table.name -%} + {%- set schema, table_name = table.schema, table.name -%} + {%- else -%} + {%- set schema, table_name = (table | string).split(".") -%} + {%- endif -%} {%- set cols = adapter.get_columns_in_table(schema, table_name) %} {%- for col in cols -%} From 3594edfed3b03bdd4c9f9817815bba2bc03c59c6 Mon Sep 17 00:00:00 2001 From: nave91 Date: Mon, 6 Nov 2017 11:59:17 -0500 Subject: [PATCH 16/81] adding atleast_one and not_constant tests --- macros/schema_tests/atleast_one.sql | 27 +++++++++++++++++++++++++++ macros/schema_tests/not_constant.sql | 26 ++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 macros/schema_tests/atleast_one.sql create mode 100644 macros/schema_tests/not_constant.sql diff --git a/macros/schema_tests/atleast_one.sql b/macros/schema_tests/atleast_one.sql new file mode 100644 index 00000000..03dc3a31 --- /dev/null +++ b/macros/schema_tests/atleast_one.sql @@ -0,0 +1,27 @@ +{% macro test_atleast_one(model, arg) %} + +with validation as ( + + select + {{ arg }} as atleast_one_field + + from {{ model }} + +), + +validation_errors as ( + + + select + count(atleast_one_field) count_of_rows + + from validation +) + +-- returns null if there is atleast one non-null value +select count(*) + + from validation_errors + + where count_of_rows = 0 +{% endmacro %} \ No newline at end of file diff --git a/macros/schema_tests/not_constant.sql b/macros/schema_tests/not_constant.sql new file mode 100644 index 00000000..55334b36 --- /dev/null +++ b/macros/schema_tests/not_constant.sql @@ -0,0 +1,26 @@ +{% macro test_not_constant(model, arg) %} + +with validation as ( + + select + {{ arg }} as not_constant_field + + from {{ model }} + where {{ arg }} is not null + +), + +validation_errors as ( + + select + count(distinct not_constant_field) number_of_distinct_rows + + from validation + +) + +select count(*) +from validation_errors + where number_of_distinct_rows = 1 + +{% endmacro %} \ No newline at end of file From 44a710873e068daa8a6e5962f7326518019e1f82 Mon Sep 17 00:00:00 2001 From: nave91 Date: Mon, 6 Nov 2017 18:48:04 -0500 Subject: [PATCH 17/81] adding documentation and changing SQL to not use CTE --- README.md | 25 +++++++++++++++++++++ macros/schema_tests/atleast_one.sql | 28 +++++++++-------------- macros/schema_tests/not_constant.sql | 33 ++++++++++++---------------- 3 files changed, 49 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9a7fd308..667e474d 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,31 @@ model_name: - ref('other_table_name') ``` + +#### atleast_one ([source](macros/schema_tests/atleast_one.sql)) +This schema test asserts if column has atleast one value. + +Usage: +``` +model_name: + constraints: + atleast_one: + - column_name + +``` + +#### not_constant ([source](macros/schema_tests/not_constant.sql)) +This schema test asserts if column does not have same value in all rows. + +Usage: +``` +model_name: + constraints: + not_constant: + - column_name + +``` + --- ### SQL helpers #### group_by ([source](macros/sql/groupby.sql)) diff --git a/macros/schema_tests/atleast_one.sql b/macros/schema_tests/atleast_one.sql index 03dc3a31..9965ab9d 100644 --- a/macros/schema_tests/atleast_one.sql +++ b/macros/schema_tests/atleast_one.sql @@ -1,27 +1,19 @@ {% macro test_atleast_one(model, arg) %} -with validation as ( - - select - {{ arg }} as atleast_one_field - - from {{ model }} - -), - -validation_errors as ( +select count(*) +from - select - count(atleast_one_field) count_of_rows + ( + select * - from validation -) + from ( + select + count({{ arg }}) count_of_rows --- returns null if there is atleast one non-null value -select count(*) + from {{ model }} + ) rows_with_values - from validation_errors + where count_of_rows = 0 ) validation_errors - where count_of_rows = 0 {% endmacro %} \ No newline at end of file diff --git a/macros/schema_tests/not_constant.sql b/macros/schema_tests/not_constant.sql index 55334b36..eb262d10 100644 --- a/macros/schema_tests/not_constant.sql +++ b/macros/schema_tests/not_constant.sql @@ -1,26 +1,21 @@ {% macro test_not_constant(model, arg) %} -with validation as ( - - select - {{ arg }} as not_constant_field - - from {{ model }} - where {{ arg }} is not null - -), - -validation_errors as ( +select count(*) +from + ( + select + count(distinct not_constant_field) count_of_distinct_rows + from ( + select + {{ arg }} as not_constant_field - select - count(distinct not_constant_field) number_of_distinct_rows + from {{ model }} - from validation + where {{ arg }} is not null -) + ) not_null_rows + ) discount_rows_count -select count(*) -from validation_errors - where number_of_distinct_rows = 1 + where count_of_distinct_rows = 1 -{% endmacro %} \ No newline at end of file +{% endmacro %} From b6301d2214a1b109b8d99ccb8c3696a597826b53 Mon Sep 17 00:00:00 2001 From: nave91 Date: Mon, 6 Nov 2017 22:20:15 -0500 Subject: [PATCH 18/81] using having instead of where sub query --- macros/schema_tests/atleast_one.sql | 18 +++++++----------- macros/schema_tests/not_constant.sql | 21 +++++++++------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/macros/schema_tests/atleast_one.sql b/macros/schema_tests/atleast_one.sql index 9965ab9d..8166a39a 100644 --- a/macros/schema_tests/atleast_one.sql +++ b/macros/schema_tests/atleast_one.sql @@ -1,19 +1,15 @@ {% macro test_atleast_one(model, arg) %} select count(*) +from ( + select -from + count({{ arg }}) - ( - select * + from {{ model }} - from ( - select - count({{ arg }}) count_of_rows + having count({{ arg }}) = 0 - from {{ model }} - ) rows_with_values +) validation_errors - where count_of_rows = 0 ) validation_errors - -{% endmacro %} \ No newline at end of file +{% endmacro %} diff --git a/macros/schema_tests/not_constant.sql b/macros/schema_tests/not_constant.sql index eb262d10..80b9b0df 100644 --- a/macros/schema_tests/not_constant.sql +++ b/macros/schema_tests/not_constant.sql @@ -1,21 +1,18 @@ + {% macro test_not_constant(model, arg) %} select count(*) -from - ( - select - count(distinct not_constant_field) count_of_distinct_rows - from ( - select - {{ arg }} as not_constant_field - from {{ model }} +from ( + + select + count(distinct {{ arg }}) + + from {{ model }} - where {{ arg }} is not null + having count(distinct {{ arg }}) = 1 - ) not_null_rows - ) discount_rows_count + ) validation_errors - where count_of_distinct_rows = 1 {% endmacro %} From 2a7677cadab57c0f4a219ef1b3bac9299d9b13bc Mon Sep 17 00:00:00 2001 From: nave91 Date: Mon, 6 Nov 2017 22:43:43 -0500 Subject: [PATCH 19/81] at least --- README.md | 6 +++--- macros/schema_tests/atleast_one.sql | 15 --------------- 2 files changed, 3 insertions(+), 18 deletions(-) delete mode 100644 macros/schema_tests/atleast_one.sql diff --git a/README.md b/README.md index 667e474d..0f5dfe17 100644 --- a/README.md +++ b/README.md @@ -73,14 +73,14 @@ model_name: ``` -#### atleast_one ([source](macros/schema_tests/atleast_one.sql)) -This schema test asserts if column has atleast one value. +#### at_least_one ([source](macros/schema_tests/at_least_one.sql)) +This schema test asserts if column has at least one value. Usage: ``` model_name: constraints: - atleast_one: + at_least_one: - column_name ``` diff --git a/macros/schema_tests/atleast_one.sql b/macros/schema_tests/atleast_one.sql deleted file mode 100644 index 8166a39a..00000000 --- a/macros/schema_tests/atleast_one.sql +++ /dev/null @@ -1,15 +0,0 @@ -{% macro test_atleast_one(model, arg) %} - -select count(*) -from ( - select - - count({{ arg }}) - - from {{ model }} - - having count({{ arg }}) = 0 - -) validation_errors - -{% endmacro %} From 895c3e6fff8289b334a54b04b396ab500e5975eb Mon Sep 17 00:00:00 2001 From: nave91 Date: Mon, 6 Nov 2017 22:43:50 -0500 Subject: [PATCH 20/81] at least --- macros/schema_tests/at_least_one.sql | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 macros/schema_tests/at_least_one.sql diff --git a/macros/schema_tests/at_least_one.sql b/macros/schema_tests/at_least_one.sql new file mode 100644 index 00000000..94c00fe8 --- /dev/null +++ b/macros/schema_tests/at_least_one.sql @@ -0,0 +1,15 @@ +{% macro test_at_least_one(model, arg) %} + +select count(*) +from ( + select + + count({{ arg }}) + + from {{ model }} + + having count({{ arg }}) = 0 + +) validation_errors + +{% endmacro %} From 196f340a891d91b2775b4c7063fcbb5d0b12b266 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 30 Nov 2017 10:55:21 -0500 Subject: [PATCH 21/81] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 0f5dfe17..65761536 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ # dbt-utils -Current version: 0.1.0 - This package contains macros that can be (re)used across dbt projects. ## Macros From 1e85599d4ee72625ebe955492f6f1f755a3db40f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 19 Dec 2017 14:51:16 -0500 Subject: [PATCH 22/81] prefix adapter macro with package name (For 091) --- macros/cross_db_utils/current_timestamp.sql | 6 +++++- macros/cross_db_utils/dateadd.sql | 2 +- macros/cross_db_utils/split_part.sql | 2 +- macros/datetime/date_spine.sql | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/macros/cross_db_utils/current_timestamp.sql b/macros/cross_db_utils/current_timestamp.sql index 38f3766d..50e68816 100644 --- a/macros/cross_db_utils/current_timestamp.sql +++ b/macros/cross_db_utils/current_timestamp.sql @@ -1,5 +1,5 @@ {% macro current_timestamp() %} - {{ adapter_macro('current_timestamp') }} + {{ adapter_macro('dbt_utils.current_timestamp') }} {% endmacro %} {% macro default__current_timestamp() %} @@ -9,3 +9,7 @@ {% macro redshift__current_timestamp() %} current_timestamp::timestamp {% endmacro %} + +{% macro postgres__current_timestamp() %} + current_timestamp::timestamp +{% endmacro %} diff --git a/macros/cross_db_utils/dateadd.sql b/macros/cross_db_utils/dateadd.sql index 189051d0..3969fe9c 100644 --- a/macros/cross_db_utils/dateadd.sql +++ b/macros/cross_db_utils/dateadd.sql @@ -1,5 +1,5 @@ {% macro dateadd(datepart, interval, from_date_or_timestamp) %} - {{ adapter_macro('dateadd', datepart, interval, from_date_or_timestamp) }} + {{ adapter_macro('dbt_utils.dateadd', datepart, interval, from_date_or_timestamp) }} {% endmacro %} diff --git a/macros/cross_db_utils/split_part.sql b/macros/cross_db_utils/split_part.sql index 8b60a666..c2599486 100644 --- a/macros/cross_db_utils/split_part.sql +++ b/macros/cross_db_utils/split_part.sql @@ -1,5 +1,5 @@ {% macro split_part(string_text, delimiter_text, part_number) %} - {{ adapter_macro('split_part', string_text, delimiter_text, part_number) }} + {{ adapter_macro('dbt_utils.split_part', string_text, delimiter_text, part_number) }} {% endmacro %} diff --git a/macros/datetime/date_spine.sql b/macros/datetime/date_spine.sql index 0afc0e8a..8bb090a3 100644 --- a/macros/datetime/date_spine.sql +++ b/macros/datetime/date_spine.sql @@ -22,7 +22,7 @@ all_periods as ( select ( {{ - dateadd( + dbt_utils.dateadd( datepart, "row_number() over () - 1", start_date From e552f18ef78a05d7f85c116a2d82ff7b3085b2f8 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 3 Jan 2018 16:43:23 -0800 Subject: [PATCH 23/81] add cardinality model --- macros/schema_tests/cardinality.sql | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 macros/schema_tests/cardinality.sql diff --git a/macros/schema_tests/cardinality.sql b/macros/schema_tests/cardinality.sql new file mode 100644 index 00000000..b489c446 --- /dev/null +++ b/macros/schema_tests/cardinality.sql @@ -0,0 +1,6 @@ +{% macro test_cardinality(model, arg) %} + +select {{ arg.field }} +from {{ model }} + +{% endmacro %} From 925b93792a078b6833fa67ba014c05081f382fed Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 3 Jan 2018 17:03:58 -0800 Subject: [PATCH 24/81] add logic with rename --- macros/schema_tests/cardinality.sql | 6 ----- macros/schema_tests/cardinality_equality.sql | 28 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) delete mode 100644 macros/schema_tests/cardinality.sql create mode 100644 macros/schema_tests/cardinality_equality.sql diff --git a/macros/schema_tests/cardinality.sql b/macros/schema_tests/cardinality.sql deleted file mode 100644 index b489c446..00000000 --- a/macros/schema_tests/cardinality.sql +++ /dev/null @@ -1,6 +0,0 @@ -{% macro test_cardinality(model, arg) %} - -select {{ arg.field }} -from {{ model }} - -{% endmacro %} diff --git a/macros/schema_tests/cardinality_equality.sql b/macros/schema_tests/cardinality_equality.sql new file mode 100644 index 00000000..9b4e27c3 --- /dev/null +++ b/macros/schema_tests/cardinality_equality.sql @@ -0,0 +1,28 @@ +{% macro test_cardinality_equality(model, from, to, field) %} + +with table_a as ( +select + count(1) as num_rows, + {{ from }} +from {{ model }} +group by 2 +), + +table_b as ( +select + count(1) as num_rows, + {{ field }} +from {{ to }} +group by 2 +) + +select count(1) +from ( +select * +from table_a +except +select * +from table_b +) + +{% endmacro %} From f197e1a57ada625fe09844ddda0d7ad763e20489 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 3 Jan 2018 17:19:11 -0800 Subject: [PATCH 25/81] update readme --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 65761536..db4364a9 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ model_name: ``` #### at_least_one ([source](macros/schema_tests/at_least_one.sql)) -This schema test asserts if column has at least one value. +This schema test asserts if column has at least one value. Usage: ``` @@ -95,6 +95,17 @@ model_name: ``` +#### cardinality_equality ([source](macros/schema_tests/cardinality_equality.sql)) +This schema test asserts if values in a given column have exactly the same cardinality as values from a different column in a different model. + +Usage: +``` +model_name: + constraints: + cardinality_equality: + - {from: column_name, to: ref('other_model_name'), field: other_column_name} +``` + --- ### SQL helpers #### group_by ([source](macros/sql/groupby.sql)) From 755ef911f49d837d0593442c1f6985ad6f1bc5eb Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 4 Jan 2018 08:02:19 -0800 Subject: [PATCH 26/81] switch position of groupby dimensions --- macros/schema_tests/cardinality_equality.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macros/schema_tests/cardinality_equality.sql b/macros/schema_tests/cardinality_equality.sql index 9b4e27c3..dc870b88 100644 --- a/macros/schema_tests/cardinality_equality.sql +++ b/macros/schema_tests/cardinality_equality.sql @@ -2,18 +2,18 @@ with table_a as ( select - count(1) as num_rows, - {{ from }} + {{ from }}, + count(1) as num_rows from {{ model }} -group by 2 +group by 1 ), table_b as ( select - count(1) as num_rows, - {{ field }} + { field }}, + count(1) as num_rows from {{ to }} -group by 2 +group by 1 ) select count(1) From cacd6beed66998172ea3f92d3a0aef342dff9d2d Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 4 Jan 2018 08:07:56 -0800 Subject: [PATCH 27/81] address asymmetrical nature of except operator --- macros/schema_tests/cardinality_equality.sql | 26 ++++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/macros/schema_tests/cardinality_equality.sql b/macros/schema_tests/cardinality_equality.sql index dc870b88..867a3d28 100644 --- a/macros/schema_tests/cardinality_equality.sql +++ b/macros/schema_tests/cardinality_equality.sql @@ -14,15 +14,31 @@ select count(1) as num_rows from {{ to }} group by 1 +), + +except_a as ( + select * + from table_a + except + select * + from table_b +), + +except_b as ( + select * + from table_b + except + select * + from table_a ) select count(1) from ( -select * -from table_a -except -select * -from table_b + select * + from except_a + union all + select * + from except_b ) {% endmacro %} From 3b822d166e6d975ffd045062022d81481a515987 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 4 Jan 2018 09:19:33 -0800 Subject: [PATCH 28/81] typo and add CTE --- macros/schema_tests/cardinality_equality.sql | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/macros/schema_tests/cardinality_equality.sql b/macros/schema_tests/cardinality_equality.sql index 867a3d28..b7fc2924 100644 --- a/macros/schema_tests/cardinality_equality.sql +++ b/macros/schema_tests/cardinality_equality.sql @@ -3,15 +3,15 @@ with table_a as ( select {{ from }}, - count(1) as num_rows + count(*) as num_rows from {{ model }} group by 1 ), table_b as ( select - { field }}, - count(1) as num_rows + {{ field }}, + count(*) as num_rows from {{ to }} group by 1 ), @@ -30,10 +30,9 @@ except_b as ( except select * from table_a -) +), -select count(1) -from ( +unioned as ( select * from except_a union all @@ -41,4 +40,7 @@ from ( from except_b ) +select count(*) +from unioned + {% endmacro %} From ed461c6770dd9397edc9e77b17fd6c3ef62abdd0 Mon Sep 17 00:00:00 2001 From: Jeff Waugh Date: Mon, 8 Jan 2018 09:48:27 +1100 Subject: [PATCH 29/81] get_tables_by_prefix, ideal for union_tables jobs Solution suggested by @drewbanin, enhanced with "exclude" parameter. --- README.md | 43 +++++++++++++++++++---------- macros/sql/get_tables_by_prefix.sql | 23 +++++++++++++++ 2 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 macros/sql/get_tables_by_prefix.sql diff --git a/README.md b/README.md index db4364a9..07379517 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,34 @@ model_name: --- ### SQL helpers +#### get_column_values ([source](macros/sql/get_column_values.sql)) +This macro returns the unique values for a column in a given table. + +Usage: +``` +-- Returns a list of the top 50 states in the `users` table +{% set states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} + +{% for state in states %} + ... +{% endfor %} + +... +``` + +#### get_tables_by_prefix ([source](macros/sql/get_tables_by_prefix.sql)) +This macro returns a list of tables that match a given prefix, with an optional +exclusion pattern. It's particularly handy paired with `union_tables`. + +Usage: +``` +-- Returns a list of tables that match schema.prefix% +{{ set tables = get_tables_by_prefix('schema', 'prefix')}} + +-- Returns a list of tables as above, excluding any with underscores +{{ set tables = get_tables_by_prefix('schema', 'prefix', '%_%')}} +``` + #### group_by ([source](macros/sql/groupby.sql)) This macro build a group by statement for fields 1...N @@ -131,21 +159,6 @@ Usage: ``` {{ union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} ``` - -#### get_column_values ([source](macros/sql/get_column_values.sql)) -This macro returns the unique values for a column in a given table. - -Usage: -``` --- Returns a list of the top 50 states in the `users` table -{% set states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} - -{% for state in states %} - ... -{% endfor %} - -... -``` --- ### Web #### get_url_parameter ([source](macros/web/get_url_parameter.sql)) diff --git a/macros/sql/get_tables_by_prefix.sql b/macros/sql/get_tables_by_prefix.sql new file mode 100644 index 00000000..1ca19f57 --- /dev/null +++ b/macros/sql/get_tables_by_prefix.sql @@ -0,0 +1,23 @@ +{% macro get_tables_by_prefix(schema, prefix, exclude='') %} + + {%- call statement('tables', fetch_result=True) %} + + select + distinct table_schema || '.' || table_name as ref + from information_schema.tables + where table_schema = '{{ schema }}' + and table_name ilike '{{ prefix }}%' + and table_name not ilike '{{ exclude }}' + + {%- endcall -%} + + {%- set table_list = load_result('tables') -%} + + {%- if table_list and table_list['data'] -%} + {%- set tables = table_list['data'] | map(attribute=0) | list %} + {{ return(tables) }} + {%- else -%} + {{ return([]) }} + {%- endif -%} + +{% endmacro %} From f74ea2f51887421f22d65ae66724a17ca22cb5cf Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 8 Jan 2018 17:43:53 -0500 Subject: [PATCH 30/81] Update README.md Prefix macros with `dbt_utils.` to avoid confusion --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 07379517..86a21e0f 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ This macro adds a time/day interval to the supplied date/timestamp. Note: The `d Usage: ``` -{{ dateadd(datepart='day', interval=1, from_date_or_timestamp='2017-01-01') }} +{{ dbt_utils.dateadd(datepart='day', interval=1, from_date_or_timestamp='2017-01-01') }} ``` #### split_part ([source](macros/cross_db_utils/split_part.sql)) @@ -31,7 +31,7 @@ This macro adds a time/day interval to the supplied date/timestamp. Note: The `d Usage: ``` -{{ split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} +{{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} ``` --- ### Date/Time @@ -40,7 +40,7 @@ This macro returns the sql required to build a date spine. Usage: ``` -{{ date_spine( +{{ dbt_utils.date_spine( table=ref('organizations'), datepart="minute", start_date="to_date('01/01/2016', 'mm/dd/yyyy')", @@ -55,7 +55,7 @@ This macro calculates the [haversine distance](http://daynebatten.com/2015/09/la Usage: ``` -{{ haversine_distance(lat1=,lon1=,lat2=,lon2=) }} +{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=) }} ``` --- ### Schema Tests @@ -66,7 +66,7 @@ Usage: ``` model_name: constraints: - equality: + dbt_utils.equality: - ref('other_table_name') ``` @@ -78,7 +78,7 @@ Usage: ``` model_name: constraints: - at_least_one: + dbt_utils.at_least_one: - column_name ``` @@ -90,7 +90,7 @@ Usage: ``` model_name: constraints: - not_constant: + dbt_utils.not_constant: - column_name ``` @@ -102,7 +102,7 @@ Usage: ``` model_name: constraints: - cardinality_equality: + dbt_utils.cardinality_equality: - {from: column_name, to: ref('other_model_name'), field: other_column_name} ``` @@ -114,7 +114,7 @@ This macro returns the unique values for a column in a given table. Usage: ``` -- Returns a list of the top 50 states in the `users` table -{% set states = fromjson(get_column_values(table=ref('users'), column='state', max_records=50)) %} +{% set states = fromjson(dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50)) %} {% for state in states %} ... @@ -130,10 +130,10 @@ exclusion pattern. It's particularly handy paired with `union_tables`. Usage: ``` -- Returns a list of tables that match schema.prefix% -{{ set tables = get_tables_by_prefix('schema', 'prefix')}} +{{ set tables = dbt_utils.get_tables_by_prefix('schema', 'prefix')}} -- Returns a list of tables as above, excluding any with underscores -{{ set tables = get_tables_by_prefix('schema', 'prefix', '%_%')}} +{{ set tables = dbt_utils.get_tables_by_prefix('schema', 'prefix', '%_%')}} ``` #### group_by ([source](macros/sql/groupby.sql)) @@ -141,7 +141,7 @@ This macro build a group by statement for fields 1...N Usage: ``` -{{ group_by(n=3) }} --> group by 1,2,3 +{{ dbt_utils.group_by(n=3) }} --> group by 1,2,3 ``` #### star ([source](macros/sql/star.sql)) @@ -149,7 +149,7 @@ This macro generates a `select` statement for each field that exists in the `fro Usage: ``` -{{ star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} +{{ dbt_utils.star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} ``` #### union_tables ([source](macros/sql/union.sql)) @@ -157,7 +157,7 @@ This macro implements an "outer union." The list of tables provided to this macr Usage: ``` -{{ union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} +{{ dbt_utils.union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} ``` --- ### Web @@ -166,7 +166,7 @@ This macro extracts a url parameter from a column containing a url. Usage: ``` -{{ get_url_parameter(field='page_url', url_parameter='utm_source') }} +{{ dbt_utils.get_url_parameter(field='page_url', url_parameter='utm_source') }} ``` ---- From 28e2ddb9f1067c40497caa47e154bc38e65e8cbe Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 8 Jan 2018 17:50:06 -0500 Subject: [PATCH 31/81] Update README.md --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 86a21e0f..ed1028c0 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,15 @@ # dbt-utils -This package contains macros that can be (re)used across dbt projects. +This package contains macros that can be (re)used across dbt projects. To use these macros, add this package as a dependency in your `dbt_project.yml` file: + +```yml +repositories: + # Be sure to replace VERSION_NUMBER below! + - https://github.com/fishtown-analytics/dbt-utils.git@VERSION_NUMBER +``` + +It's a good practice to "tag" your dependencies with version numbers. You can find the latest release of this package [here](https://github.com/fishtown-analytics/dbt-utils/tags). ## Macros ### Cross-database @@ -15,7 +23,7 @@ This macro returns the current timestamp. Usage: ``` -{{ current_timestamp() }} +{{ dbt_utils.current_timestamp() }} ``` #### dateadd ([source](macros/cross_db_utils/dateadd.sql)) From 11936b145399592fe9db28876308b315d93fc6ce Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Mon, 8 Jan 2018 23:11:27 -0500 Subject: [PATCH 32/81] added new generate_series macro --- macros/sql/generate_series.sql | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 macros/sql/generate_series.sql diff --git a/macros/sql/generate_series.sql b/macros/sql/generate_series.sql new file mode 100644 index 00000000..ae41aac9 --- /dev/null +++ b/macros/sql/generate_series.sql @@ -0,0 +1,53 @@ +{% macro get_powers_of_two(upper_bound) %} + + {% if upper_bound <= 0 %} + {{ exceptions.raise_compiler_error("upper bound must be positive") }} + {% endif %} + + {% for _ in range(1, upper_bound + 2) %} + {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %} + {% endfor %} + +{% endmacro %} + + +{% macro generate_series(upper_bound) %} + + {% set n = dbt_utils.get_powers_of_two(upper_bound) %} + + with + + {% for i in range(n) %} + + p{{i}} as ( + select 0 as generated_number union all select 1 + ) {% if not loop.last %},{% endif %} + + {% endfor %} + + , unioned as ( + + select + + {% for i in range(n) %} + p{{i}}.generated_number * pow(2, {{i}}) + {% if not loop.last %} + {% endif %} + {% endfor %} + + 1 + as generated_number + + from + + {% for i in range(n) %} + p{{i}} + {% if not loop.last %} cross join {% endif %} + {% endfor %} + + ) + + select * + from unioned + where generated_number <= {{upper_bound}} + order by generated_number + +{% endmacro %} From 2b8e606440eed55c1252c9fbecd3c2c1b089b135 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Tue, 9 Jan 2018 10:55:26 -0500 Subject: [PATCH 33/81] added readme doc for generate_series --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index ed1028c0..fe40bdf8 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,14 @@ Usage: ``` {{ dbt_utils.union_tables(tables=[ref('table_1'), ref('table_2')], column_override={"some_field": "varchar(100)"}) }} ``` + +#### generate_series ([source](macros/sql/generate_series.sql)) +This macro implements a cross-database mechanism to generate an arbitrarily long list of numbers. Specify the maximum number you'd like in your list and it will create a 1-indexed SQL result set. + +Usage: +``` +{{ dbt_utils.generate_series(upper_bound=1000) }} +``` --- ### Web #### get_url_parameter ([source](macros/web/get_url_parameter.sql)) From e9db777ea498232fec10164eb385993c1356b5c5 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Tue, 9 Jan 2018 13:02:47 -0500 Subject: [PATCH 34/81] fixed range bug in generate_series --- macros/sql/generate_series.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/generate_series.sql b/macros/sql/generate_series.sql index ae41aac9..46e2132e 100644 --- a/macros/sql/generate_series.sql +++ b/macros/sql/generate_series.sql @@ -4,7 +4,7 @@ {{ exceptions.raise_compiler_error("upper bound must be positive") }} {% endif %} - {% for _ in range(1, upper_bound + 2) %} + {% for _ in range(1, 100) %} {% if upper_bound <= 2 ** loop.index %}{{ return(loop.index) }}{% endif %} {% endfor %} From af2f0c3bf6b0faf9b7654c0a58df3d3717c05db5 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 10 Jan 2018 08:03:57 -0800 Subject: [PATCH 35/81] qualify split part macro --- macros/web/get_url_parameter.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/web/get_url_parameter.sql b/macros/web/get_url_parameter.sql index 6420b599..6d467788 100644 --- a/macros/web/get_url_parameter.sql +++ b/macros/web/get_url_parameter.sql @@ -3,7 +3,7 @@ {% set formatted_url_parameter = "'" + url_parameter + "='" %} {{ - split_part(split_part(field, formatted_url_parameter, 2), "'&'", 1) + dbt_utils.split_part(split_part(field, formatted_url_parameter, 2), "'&'", 1) }} {% endmacro %} From 336837a3ab709f0010c5789248a4f55374920325 Mon Sep 17 00:00:00 2001 From: David Wallace Date: Wed, 10 Jan 2018 08:06:56 -0800 Subject: [PATCH 36/81] one more split_part qual --- macros/web/get_url_parameter.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/web/get_url_parameter.sql b/macros/web/get_url_parameter.sql index 6d467788..6da72c2b 100644 --- a/macros/web/get_url_parameter.sql +++ b/macros/web/get_url_parameter.sql @@ -3,7 +3,7 @@ {% set formatted_url_parameter = "'" + url_parameter + "='" %} {{ - dbt_utils.split_part(split_part(field, formatted_url_parameter, 2), "'&'", 1) + dbt_utils.split_part(dbt_utils.split_part(field, formatted_url_parameter, 2), "'&'", 1) }} {% endmacro %} From eb90fa98ee10ccce240038a330b93a09b67b40e3 Mon Sep 17 00:00:00 2001 From: Mike Prentice Date: Wed, 10 Jan 2018 15:21:36 -0500 Subject: [PATCH 37/81] Add pivot macro --- macros/sql/pivot.sql | 61 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 macros/sql/pivot.sql diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql new file mode 100644 index 00000000..8080bd55 --- /dev/null +++ b/macros/sql/pivot.sql @@ -0,0 +1,61 @@ +{# +Pivot values from rows to columns. + +Example: + + Input: + + | size | color | + +======+=======+ + | S | red | + | S | blue | + | S | red | + | M | red | + + select + size, + {{ pivot('size', ['red', 'blue']) }} + from + + Output: + + | size | red | blue | + +======+=====+======+ + | S | 2 | 1 | + | M | 1 | 0 | + +Arguments: + column: Column name, required + values: List of row values to turn into columns, required + alias: Whether to create column aliases, default is True + op: SQL aggregation function, default is sum + cmp: SQL value comparison, default is = + prefix: Column alias prefix, default is blank + postfix: Column alias postfix, default is blank + then_value: Value to use if comparison succeeds, default is 1 + else_value: Value to use if comparison fails, default is 0 +#} + +{% macro pivot(column, + values, + alias=True, + op='sum', + cmp='=', + prefix='', + postfix='', + then_value=1, + else_value=0) %} + {% for v in values %} + {{ op }}( + case + when {{ column }} {{ cmp }} '{{ v }}' + then {{ then_value }} + else {{ else_value }} + end + ) + {% if alias %} + as {{ prefix + v + postfix }} + {% endif %} + {% if not loop.last %},{% endif %} + {% endfor %} +{% endmacro %} From a1659f5b34a41eb1efc70178cad04eef122ab5fb Mon Sep 17 00:00:00 2001 From: Mike Prentice Date: Wed, 10 Jan 2018 15:48:40 -0500 Subject: [PATCH 38/81] Tweak doc to add group by size --- macros/sql/pivot.sql | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index 8080bd55..10a217b6 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -6,7 +6,7 @@ Example: Input: | size | color | - +======+=======+ + |------+-------| | S | red | | S | blue | | S | red | @@ -16,11 +16,12 @@ Example: size, {{ pivot('size', ['red', 'blue']) }} from
+ group by size Output: | size | red | blue | - +======+=====+======+ + |------+-----+------| | S | 2 | 1 | | M | 1 | 0 | From 3a84de5fd73cdee5babe03ae2257387c8a3d4aa1 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Wed, 10 Jan 2018 19:29:31 -0500 Subject: [PATCH 39/81] added recency schema test --- README.md | 11 +++++++++++ macros/schema_tests/recency.sql | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 macros/schema_tests/recency.sql diff --git a/README.md b/README.md index fe40bdf8..6ee4f519 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,17 @@ model_name: ``` +#### recency ([source](macros/schema_tests/recency.sql)) +This schema test asserts that there is data in the referenced model at least as recent as the defined interval prior to the current timestamp. + +Usage: +``` +model_name: + constraints: + dbt_utils.recency: + - {field: created_at, datepart: day, interval: 1} +``` + #### at_least_one ([source](macros/schema_tests/at_least_one.sql)) This schema test asserts if column has at least one value. diff --git a/macros/schema_tests/recency.sql b/macros/schema_tests/recency.sql new file mode 100644 index 00000000..0bb6e9e2 --- /dev/null +++ b/macros/schema_tests/recency.sql @@ -0,0 +1,11 @@ +{% macro test_recency(model, field, datepart, interval) %} + +select + case when count(*) > 0 then 0 + else 1 + end as error_result +from {{model}} +where {{field}} >= + {{dbt_utils.dateadd(datepart, interval * -1, dbt_utils.current_timestamp())}} + +{% endmacro %} From baa2a2b5322c251721b04b86cf3d12d3547d59da Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 10 Jan 2018 20:14:57 -0500 Subject: [PATCH 40/81] make get_column_values return instead of json hackery --- README.md | 2 +- macros/sql/get_column_values.sql | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6ee4f519..caf947af 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ This macro returns the unique values for a column in a given table. Usage: ``` -- Returns a list of the top 50 states in the `users` table -{% set states = fromjson(dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50)) %} +{% set states = dbt_utils.get_column_values(table=ref('users'), column='state', max_records=50) %} {% for state in states %} ... diff --git a/macros/sql/get_column_values.sql b/macros/sql/get_column_values.sql index d312a7e7..36e9897b 100644 --- a/macros/sql/get_column_values.sql +++ b/macros/sql/get_column_values.sql @@ -6,6 +6,9 @@ Arguments: table: A model `ref`, or a schema.table string for the table to query (Required) column: The column to query for unique values max_records: If provided, the maximum number of unique records to return (default: none) + +Returns: + A list of distinct values for the specified columns #} {% macro get_column_values(table, column, max_records=none) -%} @@ -29,9 +32,9 @@ Arguments: {%- if value_list and value_list['data'] -%} {%- set values = value_list['data'] | map(attribute=0) | list %} - {{ tojson(values) }} + {{ return(values) }} {%- else -%} - [] + {{ return([]) }} {%- endif -%} {%- endmacro %} From 034eebd455027634c2a0f44f510c4ac05babd30e Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Thu, 11 Jan 2018 08:15:47 -0500 Subject: [PATCH 41/81] cleans up date spine call signature by building table using generate_series macro --- README.md | 1 - macros/cross_db_utils/datediff.sql | 32 ++++++++++++++++++++++++++++++ macros/datetime/date_spine.sql | 31 +++++++++++++++++++++++++---- macros/sql/generate_series.sql | 14 +++---------- 4 files changed, 62 insertions(+), 16 deletions(-) create mode 100644 macros/cross_db_utils/datediff.sql diff --git a/README.md b/README.md index caf947af..d6b61b87 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,6 @@ This macro returns the sql required to build a date spine. Usage: ``` {{ dbt_utils.date_spine( - table=ref('organizations'), datepart="minute", start_date="to_date('01/01/2016', 'mm/dd/yyyy')", end_date="dateadd(week, 1, current_date)" diff --git a/macros/cross_db_utils/datediff.sql b/macros/cross_db_utils/datediff.sql new file mode 100644 index 00000000..870503e5 --- /dev/null +++ b/macros/cross_db_utils/datediff.sql @@ -0,0 +1,32 @@ +{% macro datediff(first_date, second_date, datepart) %} + {{ adapter_macro('dbt_utils.datediff', first_date, second_date, datepart) }} +{% endmacro %} + + +{% macro default__datediff(first_date, second_date, datepart) %} + + datediff( + {{ datepart }}, + {{ first_date }}, + {{ second_date }} + ) + +{% endmacro %} + + +{% macro bigquery__datediff(first_date, second_date, datepart) %} + + date_diff( + {{first_date}}, + {{second_date}}, + {{datepart}} + ) + +{% endmacro %} + + +{% macro postgres__datediff(first_date, second_date, datepart) %} + + {{ exceptions.raise_compiler_error("macro datediff not implemented for this adapter") }} + +{% endmacro %} diff --git a/macros/datetime/date_spine.sql b/macros/datetime/date_spine.sql index 8bb090a3..3962a730 100644 --- a/macros/datetime/date_spine.sql +++ b/macros/datetime/date_spine.sql @@ -1,11 +1,32 @@ -{% macro date_spine(table, datepart, start_date, end_date) %} +{% macro get_intervals_between(start_date, end_date, datepart) -%} + + {%- call statement('get_intervals_between', fetch_result=True) %} + + select {{dbt_utils.datediff(start_date, end_date, datepart)}} + + {%- endcall -%} + + {%- set value_list = load_result('get_intervals_between') -%} + + {%- if value_list and value_list['data'] -%} + {%- set values = value_list['data'] | map(attribute=0) | list %} + {{ return(values[0]) }} + {%- else -%} + {{ return(1) }} + {%- endif -%} + +{%- endmacro %} + + + + +{% macro date_spine(datepart, start_date, end_date) %} /* call as follows: date_spine( - ref('organizations'), - "minute", + "day", "to_date('01/01/2016', 'mm/dd/yyyy')", "dateadd(week, 1, current_date)" ) @@ -14,7 +35,9 @@ date_spine( with rawdata as ( - select * from {{ table }} + {{dbt_utils.generate_series( + dbt_utils.get_intervals_between(start_date, end_date, datepart) + )}} ), diff --git a/macros/sql/generate_series.sql b/macros/sql/generate_series.sql index 46e2132e..68303845 100644 --- a/macros/sql/generate_series.sql +++ b/macros/sql/generate_series.sql @@ -15,17 +15,9 @@ {% set n = dbt_utils.get_powers_of_two(upper_bound) %} - with - - {% for i in range(n) %} - - p{{i}} as ( + with p as ( select 0 as generated_number union all select 1 - ) {% if not loop.last %},{% endif %} - - {% endfor %} - - , unioned as ( + ), unioned as ( select @@ -39,7 +31,7 @@ from {% for i in range(n) %} - p{{i}} + p as p{{i}} {% if not loop.last %} cross join {% endif %} {% endfor %} From b7522148a0202872b0fcdf620727c23c9134f6ea Mon Sep 17 00:00:00 2001 From: David Wallace Date: Thu, 11 Jan 2018 08:31:01 -0800 Subject: [PATCH 42/81] add nullif condition --- macros/web/get_url_parameter.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macros/web/get_url_parameter.sql b/macros/web/get_url_parameter.sql index 6da72c2b..412d1b32 100644 --- a/macros/web/get_url_parameter.sql +++ b/macros/web/get_url_parameter.sql @@ -1,9 +1,9 @@ -{% macro get_url_parameter(field, url_parameter) %} +{% macro get_url_parameter(field, url_parameter) -%} -{% set formatted_url_parameter = "'" + url_parameter + "='" %} +{%- set formatted_url_parameter = "'" + url_parameter + "='" -%} -{{ - dbt_utils.split_part(dbt_utils.split_part(field, formatted_url_parameter, 2), "'&'", 1) -}} +{%- set split = dbt_utils.split_part(dbt_utils.split_part(field, formatted_url_parameter, 2), "'&'", 1) -%} -{% endmacro %} +nullif({{ split }},'') + +{%- endmacro %} From f823602bdb4d26b319353c769614f444a0625d5a Mon Sep 17 00:00:00 2001 From: Mike Prentice Date: Fri, 12 Jan 2018 15:42:23 -0500 Subject: [PATCH 43/81] Fixup docs and names based on PR feedback --- README.md | 45 ++++++++++++++++++++++++++++++++++++++++++++ macros/sql/pivot.sql | 19 ++++++++++--------- 2 files changed, 55 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index fe40bdf8..213a348a 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,51 @@ Usage: ``` {{ dbt_utils.generate_series(upper_bound=1000) }} ``` + +#### pivot ([source]/(macros/sql/pivot.sql)) +This macro pivots values from rows to columns. + +Usage: +``` +{{ dbt_utils.pivot(, ) }} +``` + +Example: + + Input: `public.test` + + | size | color | + |------|-------| + | S | red | + | S | blue | + | S | red | + | M | red | + + select + size, + {{ dbt_utils.pivot('size', dbt_utils.get_column_values('public.test', + 'color')) }} + from public.test + group by size + + Output: + + | size | red | blue | + |------|-----|------| + | S | 2 | 1 | + | M | 1 | 0 | + +Arguments: + column: Column name, required + values: List of row values to turn into columns, required + alias: Whether to create column aliases, default is True + agg: SQL aggregation function, default is sum + cmp: SQL value comparison, default is = + prefix: Column alias prefix, default is blank + suffix: Column alias postfix, default is blank + then\_value: Value to use if comparison succeeds, default is 1 + else\_value: Value to use if comparison fails, default is 0 + --- ### Web #### get_url_parameter ([source](macros/web/get_url_parameter.sql)) diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index 10a217b6..721e8365 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -3,7 +3,7 @@ Pivot values from rows to columns. Example: - Input: + Input: `public.test` | size | color | |------+-------| @@ -14,8 +14,9 @@ Example: select size, - {{ pivot('size', ['red', 'blue']) }} - from
+ {{ dbt_utils.pivot('size', dbt_utils.get_column_values('public.test', + 'color')) }} + from public.test group by size Output: @@ -29,10 +30,10 @@ Arguments: column: Column name, required values: List of row values to turn into columns, required alias: Whether to create column aliases, default is True - op: SQL aggregation function, default is sum + agg: SQL aggregation function, default is sum cmp: SQL value comparison, default is = prefix: Column alias prefix, default is blank - postfix: Column alias postfix, default is blank + suffix: Column alias postfix, default is blank then_value: Value to use if comparison succeeds, default is 1 else_value: Value to use if comparison fails, default is 0 #} @@ -40,14 +41,14 @@ Arguments: {% macro pivot(column, values, alias=True, - op='sum', + agg='sum', cmp='=', prefix='', - postfix='', + suffix='', then_value=1, else_value=0) %} {% for v in values %} - {{ op }}( + {{ agg }}( case when {{ column }} {{ cmp }} '{{ v }}' then {{ then_value }} @@ -55,7 +56,7 @@ Arguments: end ) {% if alias %} - as {{ prefix + v + postfix }} + as {{ adapter.quote(prefix ~ v ~ suffix) }} {% endif %} {% if not loop.last %},{% endif %} {% endfor %} From 7aa43267b6c4ecb1e5395bb511e3f3e6ddaffab6 Mon Sep 17 00:00:00 2001 From: Mike Prentice Date: Fri, 12 Jan 2018 16:19:20 -0500 Subject: [PATCH 44/81] Minor fixup to pivot link in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 213a348a..0afdbb05 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ Usage: {{ dbt_utils.generate_series(upper_bound=1000) }} ``` -#### pivot ([source]/(macros/sql/pivot.sql)) +#### pivot ([source](macros/sql/pivot.sql)) This macro pivots values from rows to columns. Usage: From 48777394f9d4ae95e3c3ea372e859819b7522c94 Mon Sep 17 00:00:00 2001 From: Mike Prentice Date: Fri, 12 Jan 2018 16:23:32 -0500 Subject: [PATCH 45/81] Fixup README formatting for pivot --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0afdbb05..8345d181 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,7 @@ Usage: Example: - Input: `public.test` + Input: public.test | size | color | |------|-------| @@ -210,15 +210,16 @@ Example: | M | 1 | 0 | Arguments: - column: Column name, required - values: List of row values to turn into columns, required - alias: Whether to create column aliases, default is True - agg: SQL aggregation function, default is sum - cmp: SQL value comparison, default is = - prefix: Column alias prefix, default is blank - suffix: Column alias postfix, default is blank - then\_value: Value to use if comparison succeeds, default is 1 - else\_value: Value to use if comparison fails, default is 0 + + - column: Column name, required + - values: List of row values to turn into columns, required + - alias: Whether to create column aliases, default is True + - agg: SQL aggregation function, default is sum + - cmp: SQL value comparison, default is = + - prefix: Column alias prefix, default is blank + - suffix: Column alias postfix, default is blank + - then_value: Value to use if comparison succeeds, default is 1 + - else_value: Value to use if comparison fails, default is 0 --- ### Web From 2fe6f9284f456f10f68c99b2d2841e232abac1f6 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 18 Jan 2018 17:37:07 -0500 Subject: [PATCH 46/81] Update README.md Fix `pivot` docs -- s/size/color Thanks dwall :) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b6ba033f..d1068dea 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ Example: select size, - {{ dbt_utils.pivot('size', dbt_utils.get_column_values('public.test', + {{ dbt_utils.pivot('color', dbt_utils.get_column_values('public.test', 'color')) }} from public.test group by size From 1ca327c1c9ca448beed3a32423d77db4d83b30c3 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 26 Jan 2018 15:15:33 -0500 Subject: [PATCH 47/81] fix for orderby on snowflake --- macros/datetime/date_spine.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/datetime/date_spine.sql b/macros/datetime/date_spine.sql index 3962a730..dd9693c3 100644 --- a/macros/datetime/date_spine.sql +++ b/macros/datetime/date_spine.sql @@ -47,7 +47,7 @@ all_periods as ( {{ dbt_utils.dateadd( datepart, - "row_number() over () - 1", + "row_number() over (order by 1) - 1", start_date ) }} From e8017e4abb83da51953685771f4f2f1160e5e7c9 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Tue, 30 Jan 2018 22:03:12 -0500 Subject: [PATCH 48/81] fixed issue 49 --- macros/cross_db_utils/datediff.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/cross_db_utils/datediff.sql b/macros/cross_db_utils/datediff.sql index 870503e5..457bd60c 100644 --- a/macros/cross_db_utils/datediff.sql +++ b/macros/cross_db_utils/datediff.sql @@ -17,8 +17,8 @@ {% macro bigquery__datediff(first_date, second_date, datepart) %} date_diff( - {{first_date}}, {{second_date}}, + {{first_date}}, {{datepart}} ) From 6c6529a1a1a5e393262f6ce1026bc0f75543e039 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Wed, 31 Jan 2018 21:57:46 -0500 Subject: [PATCH 49/81] added surrogate key functionality --- macros/cross_db_utils/datatypes.sql | 21 +++++++++++++++++++++ macros/sql/surrogate_key.sql | 14 ++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 macros/cross_db_utils/datatypes.sql create mode 100644 macros/sql/surrogate_key.sql diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql new file mode 100644 index 00000000..47777edc --- /dev/null +++ b/macros/cross_db_utils/datatypes.sql @@ -0,0 +1,21 @@ +{# string ------------------------------------------------- #} + +{% macro type_string() %} + {{ adapter_macro('dbt_utils.type_string') }} +{% endmacro %} + +{% macro default__type_string() %} + string +{% endmacro %} + +{% macro redshift__type_string() %} + varchar +{% endmacro %} + +{% macro postgres__type_string() %} + varchar +{% endmacro %} + +{% macro snowflake__type_string() %} + varchar +{% endmacro %} diff --git a/macros/sql/surrogate_key.sql b/macros/sql/surrogate_key.sql new file mode 100644 index 00000000..09e9f7b9 --- /dev/null +++ b/macros/sql/surrogate_key.sql @@ -0,0 +1,14 @@ +{% macro surrogate_key(fields) -%} + +md5(concat( + + {%- for field in fields %} + + coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '') + {% if not loop.last %},{% endif %} + + {%- endfor -%} + +)) + +{%- endmacro %} From 942d1aaa490d48fe8e702685ae2de39e01350689 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Wed, 31 Jan 2018 22:22:29 -0500 Subject: [PATCH 50/81] added try_cast functionality --- macros/cross_db_utils/safe_cast.sql | 20 ++++++++++++++++++++ macros/sql/surrogate_key.sql | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 macros/cross_db_utils/safe_cast.sql diff --git a/macros/cross_db_utils/safe_cast.sql b/macros/cross_db_utils/safe_cast.sql new file mode 100644 index 00000000..592a0d39 --- /dev/null +++ b/macros/cross_db_utils/safe_cast.sql @@ -0,0 +1,20 @@ +{% macro safe_cast(field, type) %} + {{ adapter_macro('dbt_utils.safe_cast', field, type) }} +{% endmacro %} + + +{% macro default__safe_cast(field, type) %} + {# most databases don't support this function yet + so we just need to use cast #} + cast({{field}} as {{type}}) +{% endmacro %} + + +{% macro snowflake__safe_cast(field, type) %} + try_cast({{field}} as {{type}}) +{% endmacro %} + + +{% macro bigquery__safe_cast(field, type) %} + safe_cast({{field}} as {{type}}) +{% endmacro %} diff --git a/macros/sql/surrogate_key.sql b/macros/sql/surrogate_key.sql index 09e9f7b9..71fbf981 100644 --- a/macros/sql/surrogate_key.sql +++ b/macros/sql/surrogate_key.sql @@ -4,7 +4,9 @@ md5(concat( {%- for field in fields %} - coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '') + coalesce( + {{dbt_utils.safe_cast(field, dbt_utils.type_string())}} + , '') {% if not loop.last %},{% endif %} {%- endfor -%} From 24a58d2217598c6ecb51655690459fe43f31c240 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Wed, 31 Jan 2018 22:29:18 -0500 Subject: [PATCH 51/81] added docs for surrogate_key --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index d1068dea..40df9cf3 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,14 @@ Usage: {{ dbt_utils.generate_series(upper_bound=1000) }} ``` +#### surrogate_key ([source](macros/sql/surrogate_key.sql)) +Implements a cross-database way to generate a hashed surrogate key using the array of fields specified. + +Usage: +``` +{{ dbt_utils.surrogate_key(fields=['field_a', 'field_b'...]) }} +``` + #### pivot ([source](macros/sql/pivot.sql)) This macro pivots values from rows to columns. From bccf5ab6732c7e780e13fc21f57721806586b70e Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Thu, 1 Feb 2018 11:29:10 -0500 Subject: [PATCH 52/81] changed the way that concatenation works in surrogate_key --- macros/cross_db_utils/concat.sql | 27 +++++++++++++++++++++++++++ macros/cross_db_utils/datatypes.sql | 4 ++-- macros/sql/surrogate_key.sql | 25 +++++++++++++++---------- 3 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 macros/cross_db_utils/concat.sql diff --git a/macros/cross_db_utils/concat.sql b/macros/cross_db_utils/concat.sql new file mode 100644 index 00000000..fe7ec273 --- /dev/null +++ b/macros/cross_db_utils/concat.sql @@ -0,0 +1,27 @@ +{% macro concat(fields) %} + {{ adapter_macro('dbt_utils.concat', fields) }} +{% endmacro %} + + +{% macro default__concat(fields) -%} + concat({{ fields|join(', ') }}) +{%- endmacro %} + + +{% macro alternative_concat(fields) %} + {{ fields|join(' || ') }} +{% endmacro %} + + +{% macro redshift__concat(fields) %} + + {{dbt_utils.alternative_concat(fields)}} + +{% endmacro %} + + +{% macro snowflake__dateadd(fields) %} + + {{dbt_utils.alternative_concat(fields)}} + +{% endmacro %} diff --git a/macros/cross_db_utils/datatypes.sql b/macros/cross_db_utils/datatypes.sql index 47777edc..eaf26519 100644 --- a/macros/cross_db_utils/datatypes.sql +++ b/macros/cross_db_utils/datatypes.sql @@ -8,9 +8,9 @@ string {% endmacro %} -{% macro redshift__type_string() %} +{%- macro redshift__type_string() -%} varchar -{% endmacro %} +{%- endmacro -%} {% macro postgres__type_string() %} varchar diff --git a/macros/sql/surrogate_key.sql b/macros/sql/surrogate_key.sql index 71fbf981..40f838b2 100644 --- a/macros/sql/surrogate_key.sql +++ b/macros/sql/surrogate_key.sql @@ -1,16 +1,21 @@ -{% macro surrogate_key(fields) -%} +{%- macro surrogate_key() -%} -md5(concat( +{% set fields = [] %} - {%- for field in fields %} +{%- for field in varargs -%} - coalesce( - {{dbt_utils.safe_cast(field, dbt_utils.type_string())}} - , '') - {% if not loop.last %},{% endif %} + {% set _ = fields.append( + "coalesce(cast(" ~ field ~ " as " ~ dbt_utils.type_string() ~ "), '')" + ) %} - {%- endfor -%} + {% if not loop.last %} + {% set _ = fields.append("'-'") %} + {% endif %} -)) +{%- endfor -%} -{%- endmacro %} +md5( + {{dbt_utils.concat(fields)}} +) + +{%- endmacro -%} From 605a979343fcd1828dcf827be01e383233f561e1 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Thu, 1 Feb 2018 11:31:02 -0500 Subject: [PATCH 53/81] cleaned up spacing --- macros/cross_db_utils/concat.sql | 4 ---- macros/sql/surrogate_key.sql | 4 +--- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/macros/cross_db_utils/concat.sql b/macros/cross_db_utils/concat.sql index fe7ec273..9e87dac5 100644 --- a/macros/cross_db_utils/concat.sql +++ b/macros/cross_db_utils/concat.sql @@ -14,14 +14,10 @@ {% macro redshift__concat(fields) %} - {{dbt_utils.alternative_concat(fields)}} - {% endmacro %} {% macro snowflake__dateadd(fields) %} - {{dbt_utils.alternative_concat(fields)}} - {% endmacro %} diff --git a/macros/sql/surrogate_key.sql b/macros/sql/surrogate_key.sql index 40f838b2..36ffa3c3 100644 --- a/macros/sql/surrogate_key.sql +++ b/macros/sql/surrogate_key.sql @@ -14,8 +14,6 @@ {%- endfor -%} -md5( - {{dbt_utils.concat(fields)}} -) +md5({{dbt_utils.concat(fields)}}) {%- endmacro -%} From ce40b44846b88b4fbb6796597ac0885951125ad5 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Thu, 1 Feb 2018 11:32:11 -0500 Subject: [PATCH 54/81] updated docs for surrogate_key --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 40df9cf3..f423b0cb 100644 --- a/README.md +++ b/README.md @@ -187,11 +187,11 @@ Usage: ``` #### surrogate_key ([source](macros/sql/surrogate_key.sql)) -Implements a cross-database way to generate a hashed surrogate key using the array of fields specified. +Implements a cross-database way to generate a hashed surrogate key using the fields specified. Usage: ``` -{{ dbt_utils.surrogate_key(fields=['field_a', 'field_b'...]) }} +{{ dbt_utils.surrogate_key('field_a', 'field_b'[,...]) }} ``` #### pivot ([source](macros/sql/pivot.sql)) From 4439d0231ae55d43f1d600d56a6a0411d5191612 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Thu, 1 Feb 2018 11:47:01 -0500 Subject: [PATCH 55/81] fixed typo --- macros/cross_db_utils/concat.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/cross_db_utils/concat.sql b/macros/cross_db_utils/concat.sql index 9e87dac5..ce82dde5 100644 --- a/macros/cross_db_utils/concat.sql +++ b/macros/cross_db_utils/concat.sql @@ -18,6 +18,6 @@ {% endmacro %} -{% macro snowflake__dateadd(fields) %} +{% macro snowflake__concat(fields) %} {{dbt_utils.alternative_concat(fields)}} {% endmacro %} From 3550c6fb8bd29aec4d7ff29ad63a161ac97812f6 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Sun, 11 Feb 2018 13:47:04 -0500 Subject: [PATCH 56/81] added last_day functionality --- macros/cross_db_utils/date_trunc.sql | 11 ++++++++++ macros/cross_db_utils/dateadd.sql | 3 +-- macros/cross_db_utils/last_day.sql | 32 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 macros/cross_db_utils/date_trunc.sql create mode 100644 macros/cross_db_utils/last_day.sql diff --git a/macros/cross_db_utils/date_trunc.sql b/macros/cross_db_utils/date_trunc.sql new file mode 100644 index 00000000..fa3388da --- /dev/null +++ b/macros/cross_db_utils/date_trunc.sql @@ -0,0 +1,11 @@ +{% macro date_trunc(datepart, date) %} + {{ adapter_macro('dbt_utils.date_trunc', datepart, date) }} +{% endmacro %} + +{% macro default__date_trunc(datepart, date) %} + date_trunc('{{datepart}}', {{date}}) +{% endmacro %} + +{% macro bigquery__date_trunc(datepart, date) %} + date_trunc({{date}}, {{datepart}}) +{% endmacro %} diff --git a/macros/cross_db_utils/dateadd.sql b/macros/cross_db_utils/dateadd.sql index 3969fe9c..ecfc5bc6 100644 --- a/macros/cross_db_utils/dateadd.sql +++ b/macros/cross_db_utils/dateadd.sql @@ -18,8 +18,7 @@ date_add( {{ from_date_or_timestamp }}, - {{ interval }}, - "{{ datepart }}" + INTERVAL {{ interval }} {{ datepart }} ) {% endmacro %} diff --git a/macros/cross_db_utils/last_day.sql b/macros/cross_db_utils/last_day.sql new file mode 100644 index 00000000..f8dba89f --- /dev/null +++ b/macros/cross_db_utils/last_day.sql @@ -0,0 +1,32 @@ +{% macro last_day(date, datepart) %} + {{ adapter_macro('dbt_utils.last_day', date, datepart) }} +{% endmacro %} + + +{% macro default__last_day(date, datepart) -%} + cast( + {{dbt_utils.dateadd('day', '-1', + dbt_utils.dateadd(datepart, '1', dbt_utils.date_trunc(datepart, date)) + )}} + as date) +{%- endmacro %} + + +{% macro postgres__last_day(date, datepart) -%} + + {%- if datepart == 'quarter' -%} + + {{ exceptions.raise_compiler_error( + "dbt_utils.last_day is not supported for datepart 'quarter' on this adapter") }} + + {%- else -%} + + cast( + {{dbt_utils.dateadd('day', '-1', + dbt_utils.dateadd(datepart, '1', dbt_utils.date_trunc(datepart, date)) + )}} + as date) + + {%- endif -%} + +{%- endmacro %} From 20c6b193b0fb3c2963e67aa89264640a78b0ec71 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Sun, 11 Feb 2018 13:49:06 -0500 Subject: [PATCH 57/81] fixed case --- macros/cross_db_utils/dateadd.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/cross_db_utils/dateadd.sql b/macros/cross_db_utils/dateadd.sql index ecfc5bc6..73b89871 100644 --- a/macros/cross_db_utils/dateadd.sql +++ b/macros/cross_db_utils/dateadd.sql @@ -18,7 +18,7 @@ date_add( {{ from_date_or_timestamp }}, - INTERVAL {{ interval }} {{ datepart }} + interval {{ interval }} {{ datepart }} ) {% endmacro %} From 82c8c7e2237f3041c9c333901f5034cd70b07e8a Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Sun, 11 Feb 2018 13:52:04 -0500 Subject: [PATCH 58/81] added comment on dateparts --- macros/cross_db_utils/last_day.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/macros/cross_db_utils/last_day.sql b/macros/cross_db_utils/last_day.sql index f8dba89f..758fcd89 100644 --- a/macros/cross_db_utils/last_day.sql +++ b/macros/cross_db_utils/last_day.sql @@ -1,3 +1,8 @@ +/* +This function has been tested with dateparts of month and quarters. Further +testing is required to validate that it will work on other dateparts. +*/ + {% macro last_day(date, datepart) %} {{ adapter_macro('dbt_utils.last_day', date, datepart) }} {% endmacro %} From 8db9abb1dacc6d6dc019d8760145ab691a5b95ff Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Sun, 11 Feb 2018 13:59:23 -0500 Subject: [PATCH 59/81] added docs --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index f423b0cb..c16abad1 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,25 @@ Usage: ``` {{ dbt_utils.split_part(string_text='1,2,3', delimiter_text=',', part_number=1) }} ``` + +#### date_trunc ([source](macros/cross_db_utils/date_trunc.sql)) +Truncates a date or timestamp to the specified datepart. Note: The `datepart` argument is database-specific. + +Usage: +``` +{{ dbt_utils.date_trunc(datepart, date) }} +``` + +#### last_day ([source](macros/cross_db_utils/last_day.sql)) +Gets the last day for a given date and datepart. Notes: + +- The `datepart` argument is database-specific. +- This macro currently only supports dateparts of `month` and `quarter`. + +Usage: +``` +{{ dbt_utils.last_day(date, datepart) }} +``` --- ### Date/Time #### date_spine ([source](macros/datetime/date_spine.sql)) From 428c5961b020cef56e9d1455ae9dc44a3de1da89 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Sun, 11 Feb 2018 14:51:12 -0500 Subject: [PATCH 60/81] made code more dry --- macros/cross_db_utils/last_day.sql | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/macros/cross_db_utils/last_day.sql b/macros/cross_db_utils/last_day.sql index 758fcd89..0efe6257 100644 --- a/macros/cross_db_utils/last_day.sql +++ b/macros/cross_db_utils/last_day.sql @@ -8,30 +8,27 @@ testing is required to validate that it will work on other dateparts. {% endmacro %} -{% macro default__last_day(date, datepart) -%} +{%- macro default_last_day(date, datepart) -%} cast( {{dbt_utils.dateadd('day', '-1', dbt_utils.dateadd(datepart, '1', dbt_utils.date_trunc(datepart, date)) )}} as date) +{%- endmacro -%} + + +{% macro default__last_day(date, datepart) -%} + {{dbt_utils.default_last_day(date, datepart)}} {%- endmacro %} {% macro postgres__last_day(date, datepart) -%} {%- if datepart == 'quarter' -%} - {{ exceptions.raise_compiler_error( "dbt_utils.last_day is not supported for datepart 'quarter' on this adapter") }} - {%- else -%} - - cast( - {{dbt_utils.dateadd('day', '-1', - dbt_utils.dateadd(datepart, '1', dbt_utils.date_trunc(datepart, date)) - )}} - as date) - + {{dbt_utils.default_last_day(date, datepart)}} {%- endif -%} {%- endmacro %} From ff905ac849e3a0d1946d184cc066d6ad7ec4b5d6 Mon Sep 17 00:00:00 2001 From: Tristan Handy Date: Wed, 28 Feb 2018 21:59:03 -0500 Subject: [PATCH 61/81] Create LICENSE --- LICENSE | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From 9f5212286a7f0b46b69013ce3cb4cf6a2e4874ab Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 1 Mar 2018 21:37:55 -0500 Subject: [PATCH 62/81] add namespace to macro --- macros/sql/nullcheck_table.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/sql/nullcheck_table.sql b/macros/sql/nullcheck_table.sql index f2ad3d25..bd6e34a0 100644 --- a/macros/sql/nullcheck_table.sql +++ b/macros/sql/nullcheck_table.sql @@ -2,7 +2,7 @@ {% set cols = adapter.get_columns_in_table(schema, table) %} - select {{ nullcheck(cols) }} + select {{ dbt_utils.nullcheck(cols) }} from {{schema}}.{{table}} {% endmacro %} From 34db167792187c3f955902b0a85230dea590ea3f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Thu, 1 Mar 2018 22:24:41 -0500 Subject: [PATCH 63/81] add integration test project --- integration_tests/.gitignore | 4 + integration_tests/README.md | 7 ++ integration_tests/data/.gitkeep | 0 .../data/cross_db/data_concat.csv | 5 + .../data/cross_db/data_date_trunc.csv | 3 + .../data/cross_db/data_dateadd.csv | 6 ++ .../data/cross_db/data_datediff.csv | 7 ++ .../data/cross_db/data_last_day.csv | 4 + .../data/cross_db/data_safe_cast.csv | 4 + .../data/cross_db/data_split_part.csv | 4 + .../data/datetime/data_date_spine.csv | 11 ++ integration_tests/data/etc/data_people.csv | 101 ++++++++++++++++++ .../schema_tests/data_test_at_least_one.csv | 2 + .../schema_tests/data_test_not_constant.csv | 4 + .../data/sql/data_events_20180101.csv | 3 + .../data/sql/data_events_20180102.csv | 3 + .../data/sql/data_generate_series.csv | 11 ++ .../data/sql/data_get_column_values.csv | 12 +++ .../data/sql/data_nullcheck_table.csv | 4 + integration_tests/data/sql/data_pivot.csv | 4 + .../data/sql/data_pivot_expected.csv | 3 + integration_tests/data/sql/data_star.csv | 4 + .../data/sql/data_star_expected.csv | 4 + .../data/sql/data_surrogate_key.csv | 5 + integration_tests/data/web/data_urls.csv | 3 + integration_tests/dbt_project.yml | 19 ++++ integration_tests/macros/.gitkeep | 0 integration_tests/macros/tests.sql | 13 +++ .../models/cross_db_utils/schema.yml | 41 +++++++ .../models/cross_db_utils/test_concat.sql | 12 +++ .../cross_db_utils/test_current_timestamp.sql | 6 ++ .../models/cross_db_utils/test_date_trunc.sql | 20 ++++ .../models/cross_db_utils/test_dateadd.sql | 18 ++++ .../models/cross_db_utils/test_datediff.sql | 24 +++++ .../models/cross_db_utils/test_last_day.sql | 16 +++ .../models/cross_db_utils/test_safe_cast.sql | 12 +++ .../models/cross_db_utils/test_split_part.sql | 28 +++++ integration_tests/models/datetime/schema.yml | 6 ++ .../models/datetime/test_date_spine.sql | 15 +++ .../models/schema_tests/schema.yml | 29 +++++ .../models/schema_tests/test_recency.sql | 3 + integration_tests/models/sql/schema.yml | 53 +++++++++ .../models/sql/test_generate_series.sql | 8 ++ .../models/sql/test_get_column_values.sql | 13 +++ .../test_get_tables_by_prefix_and_union.sql | 4 + integration_tests/models/sql/test_groupby.sql | 8 ++ .../models/sql/test_nullcheck_table.sql | 15 +++ integration_tests/models/sql/test_pivot.sql | 8 ++ integration_tests/models/sql/test_star.sql | 11 ++ .../models/sql/test_surrogate_key.sql | 12 +++ integration_tests/models/web/schema.yml | 6 ++ integration_tests/models/web/test_urls.sql | 20 ++++ 52 files changed, 638 insertions(+) create mode 100644 integration_tests/.gitignore create mode 100644 integration_tests/README.md create mode 100644 integration_tests/data/.gitkeep create mode 100644 integration_tests/data/cross_db/data_concat.csv create mode 100644 integration_tests/data/cross_db/data_date_trunc.csv create mode 100644 integration_tests/data/cross_db/data_dateadd.csv create mode 100644 integration_tests/data/cross_db/data_datediff.csv create mode 100644 integration_tests/data/cross_db/data_last_day.csv create mode 100644 integration_tests/data/cross_db/data_safe_cast.csv create mode 100644 integration_tests/data/cross_db/data_split_part.csv create mode 100644 integration_tests/data/datetime/data_date_spine.csv create mode 100644 integration_tests/data/etc/data_people.csv create mode 100644 integration_tests/data/schema_tests/data_test_at_least_one.csv create mode 100644 integration_tests/data/schema_tests/data_test_not_constant.csv create mode 100644 integration_tests/data/sql/data_events_20180101.csv create mode 100644 integration_tests/data/sql/data_events_20180102.csv create mode 100644 integration_tests/data/sql/data_generate_series.csv create mode 100644 integration_tests/data/sql/data_get_column_values.csv create mode 100644 integration_tests/data/sql/data_nullcheck_table.csv create mode 100644 integration_tests/data/sql/data_pivot.csv create mode 100644 integration_tests/data/sql/data_pivot_expected.csv create mode 100644 integration_tests/data/sql/data_star.csv create mode 100644 integration_tests/data/sql/data_star_expected.csv create mode 100644 integration_tests/data/sql/data_surrogate_key.csv create mode 100644 integration_tests/data/web/data_urls.csv create mode 100644 integration_tests/dbt_project.yml create mode 100644 integration_tests/macros/.gitkeep create mode 100644 integration_tests/macros/tests.sql create mode 100644 integration_tests/models/cross_db_utils/schema.yml create mode 100644 integration_tests/models/cross_db_utils/test_concat.sql create mode 100644 integration_tests/models/cross_db_utils/test_current_timestamp.sql create mode 100644 integration_tests/models/cross_db_utils/test_date_trunc.sql create mode 100644 integration_tests/models/cross_db_utils/test_dateadd.sql create mode 100644 integration_tests/models/cross_db_utils/test_datediff.sql create mode 100644 integration_tests/models/cross_db_utils/test_last_day.sql create mode 100644 integration_tests/models/cross_db_utils/test_safe_cast.sql create mode 100644 integration_tests/models/cross_db_utils/test_split_part.sql create mode 100644 integration_tests/models/datetime/schema.yml create mode 100644 integration_tests/models/datetime/test_date_spine.sql create mode 100644 integration_tests/models/schema_tests/schema.yml create mode 100644 integration_tests/models/schema_tests/test_recency.sql create mode 100644 integration_tests/models/sql/schema.yml create mode 100644 integration_tests/models/sql/test_generate_series.sql create mode 100644 integration_tests/models/sql/test_get_column_values.sql create mode 100644 integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql create mode 100644 integration_tests/models/sql/test_groupby.sql create mode 100644 integration_tests/models/sql/test_nullcheck_table.sql create mode 100644 integration_tests/models/sql/test_pivot.sql create mode 100644 integration_tests/models/sql/test_star.sql create mode 100644 integration_tests/models/sql/test_surrogate_key.sql create mode 100644 integration_tests/models/web/schema.yml create mode 100644 integration_tests/models/web/test_urls.sql diff --git a/integration_tests/.gitignore b/integration_tests/.gitignore new file mode 100644 index 00000000..dad33a45 --- /dev/null +++ b/integration_tests/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_modules/ +logs/ diff --git a/integration_tests/README.md b/integration_tests/README.md new file mode 100644 index 00000000..7c8b8a67 --- /dev/null +++ b/integration_tests/README.md @@ -0,0 +1,7 @@ +### dbt integration test suite for dbt-utils + +database support: + - [x] postgres + - [x] redshift + - [ ] bigquery (partial) + - [ ] snowflake (partial) diff --git a/integration_tests/data/.gitkeep b/integration_tests/data/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/integration_tests/data/cross_db/data_concat.csv b/integration_tests/data/cross_db/data_concat.csv new file mode 100644 index 00000000..eae83aa7 --- /dev/null +++ b/integration_tests/data/cross_db/data_concat.csv @@ -0,0 +1,5 @@ +input_1,input_2,output +a,b,ab +a,,a +,b,b +,, diff --git a/integration_tests/data/cross_db/data_date_trunc.csv b/integration_tests/data/cross_db/data_date_trunc.csv new file mode 100644 index 00000000..cff72975 --- /dev/null +++ b/integration_tests/data/cross_db/data_date_trunc.csv @@ -0,0 +1,3 @@ +updated_at,day,month +2018-01-05 12:00:00,2018-01-05,2018-01-01 +,, diff --git a/integration_tests/data/cross_db/data_dateadd.csv b/integration_tests/data/cross_db/data_dateadd.csv new file mode 100644 index 00000000..a071c5f1 --- /dev/null +++ b/integration_tests/data/cross_db/data_dateadd.csv @@ -0,0 +1,6 @@ +from_time,interval_length,datepart,result +2018-01-01 01:00:00,1,day,2018-01-02 01:00:00 +2018-01-01 01:00:00,1,month,2018-02-01 01:00:00 +2018-01-01 01:00:00,1,year,2019-01-01 01:00:00 +2018-01-01 01:00:00,1,hour,2018-01-01 02:00:00 +,1,day, diff --git a/integration_tests/data/cross_db/data_datediff.csv b/integration_tests/data/cross_db/data_datediff.csv new file mode 100644 index 00000000..5d266969 --- /dev/null +++ b/integration_tests/data/cross_db/data_datediff.csv @@ -0,0 +1,7 @@ +first_date,second_date,datepart,result +2018-01-01 01:00:00,2018-01-02 01:00:00,day,1 +2018-01-01 01:00:00,2018-02-01 01:00:00,month,1 +2018-01-01 01:00:00,2019-01-01 01:00:00,year,1 +2018-01-01 01:00:00,2018-01-01 02:00:00,hour,1 +,2018-01-01 02:00:00,hour, +2018-01-01 02:00:00,,hour, diff --git a/integration_tests/data/cross_db/data_last_day.csv b/integration_tests/data/cross_db/data_last_day.csv new file mode 100644 index 00000000..186465d8 --- /dev/null +++ b/integration_tests/data/cross_db/data_last_day.csv @@ -0,0 +1,4 @@ +date_day,date_part,result +2018-01-02,month,2018-01-31 +2018-01-02,year,2018-12-31 +,month, diff --git a/integration_tests/data/cross_db/data_safe_cast.csv b/integration_tests/data/cross_db/data_safe_cast.csv new file mode 100644 index 00000000..dedb7301 --- /dev/null +++ b/integration_tests/data/cross_db/data_safe_cast.csv @@ -0,0 +1,4 @@ +field,output +abc,abc +123,123 +, diff --git a/integration_tests/data/cross_db/data_split_part.csv b/integration_tests/data/cross_db/data_split_part.csv new file mode 100644 index 00000000..3aa8d7f6 --- /dev/null +++ b/integration_tests/data/cross_db/data_split_part.csv @@ -0,0 +1,4 @@ +parts,split_on,result_1,result_2,result_3 +a|b|c,|,a,b,c +1|2|3,|,1,2,3 +,|,, diff --git a/integration_tests/data/datetime/data_date_spine.csv b/integration_tests/data/datetime/data_date_spine.csv new file mode 100644 index 00000000..ef620817 --- /dev/null +++ b/integration_tests/data/datetime/data_date_spine.csv @@ -0,0 +1,11 @@ +date_day +2018-01-01 +2018-01-02 +2018-01-03 +2018-01-04 +2018-01-05 +2018-01-06 +2018-01-07 +2018-01-08 +2018-01-09 +2018-01-10 diff --git a/integration_tests/data/etc/data_people.csv b/integration_tests/data/etc/data_people.csv new file mode 100644 index 00000000..90273837 --- /dev/null +++ b/integration_tests/data/etc/data_people.csv @@ -0,0 +1,101 @@ +id,first_name,last_name,email,ip_address,created_at,is_active +1,Dame,Cluley,dcluley0@nih.gov,155.86.204.241,2017-02-07 09:48:26,false +2,Guy,Wittering,gwittering1@reddit.com,221.174.176.36,2017-08-08 00:37:53,false +3,Klement,Bucke,kbucke2@dedecms.com,167.94.85.199,2016-09-05 23:43:19,true +4,Roselia,Dallander,rdallander3@adobe.com,135.10.21.248,2016-08-11 00:00:11,false +5,Arly,Terzza,aterzza4@va.gov,219.66.192.10,2017-03-23 22:11:42,true +6,Arron,Siehard,asiehard5@ibm.com,116.211.108.88,2017-07-07 23:11:50,true +7,Debera,Petrazzi,dpetrazzi6@addthis.com,18.167.49.108,2017-11-12 04:34:50,false +8,Timi,Agget,tagget7@home.pl,170.171.78.217,2016-03-14 02:04:33,true +9,Ines,Brixey,ibrixey8@biblegateway.com,251.141.4.42,2017-10-01 16:41:21,false +10,Karlen,Eggleton,keggleton9@amazon.co.jp,100.179.149.224,2016-04-15 10:05:00,true +11,Hamish,Winfield,hwinfielda@squarespace.com,5.34.205.16,2017-12-29 22:44:52,true +12,Stanton,Tiron,stironb@rambler.ru,171.5.190.125,2017-01-20 23:31:15,true +13,Tyne,Elner,telnerc@jiathis.com,165.155.112.184,2017-06-12 23:42:54,false +14,Lita,Kitley,lkitleyd@gmpg.org,138.131.8.94,2018-01-25 15:03:51,false +15,Alan,Morsley,amorsleye@dell.com,5.81.121.91,2016-03-18 19:37:49,true +16,Erinn,Stokoe,estokoef@walmart.com,244.57.254.248,2017-02-23 22:51:09,true +17,Dela,Oxley,doxleyg@state.gov,163.86.24.94,2017-04-12 20:19:20,true +18,Daryle,Reeve,dreeveh@1und1.de,175.30.172.20,2017-07-09 20:46:10,false +19,Micah,Smitham,msmithami@techcrunch.com,164.75.157.186,2016-02-25 16:17:57,true +20,Bernice,Van der Velde,bvanderveldej@i2i.jp,141.99.132.98,2017-07-28 23:31:24,false +21,Odo,Janacek,ojanacekk@redcross.org,50.195.72.49,2017-05-01 05:59:30,false +22,Lyndsey,Exter,lexterl@scribd.com,244.5.43.160,2017-02-13 11:32:04,false +23,Correy,Brash,cbrashm@loc.gov,233.67.52.95,2018-02-27 05:26:29,false +24,Lyle,Josilevich,ljosilevichn@rambler.ru,99.16.127.176,2016-08-06 03:37:03,false +25,Skip,Castiello,scastielloo@rambler.ru,118.174.3.50,2016-06-07 23:32:19,true +26,Philbert,Daltry,pdaltryp@tamu.edu,181.93.127.23,2016-08-16 12:52:52,true +27,Addie,Sikora,asikoraq@theatlantic.com,120.33.67.44,2016-09-01 12:45:37,true +28,Sibyl,Songist,ssongistr@noaa.gov,151.85.172.142,2016-02-11 01:14:50,false +29,Eyde,Dankersley,edankersleys@illinois.edu,147.170.154.132,2017-08-09 18:14:00,false +30,Dion,Pessler,dpesslert@reverbnation.com,51.92.202.203,2017-01-30 02:05:47,true +31,Rodd,Huntly,rhuntlyu@google.ru,82.198.158.0,2016-04-22 06:44:15,false +32,Inness,Cartmer,icartmerv@tripod.com,44.147.127.200,2017-03-11 12:03:56,false +33,Blakeley,Figgins,bfigginsw@ebay.co.uk,116.54.91.30,2016-05-28 14:25:49,true +34,Yancey,Leeburne,yleeburnex@people.com.cn,8.44.104.205,2016-08-09 03:15:02,false +35,Gustavus,Kemp,gkempy@sourceforge.net,101.126.34.176,2018-02-02 12:15:57,true +36,Annabela,Ardron,aardronz@slideshare.net,135.255.20.212,2017-10-29 03:13:03,true +37,Allister,Janota,ajanota10@yahoo.com,41.139.90.112,2016-09-19 04:21:50,true +38,Yoko,McBryde,ymcbryde11@weather.com,124.17.222.132,2016-08-21 14:32:04,false +39,Aprilette,Colebeck,acolebeck12@elegantthemes.com,14.62.14.45,2017-04-04 04:47:31,true +40,Oralia,Marklew,omarklew13@cnet.com,108.161.10.231,2017-12-29 23:15:15,true +41,Vi,Bryde,vbryde14@harvard.edu,20.91.132.215,2017-12-01 21:02:36,false +42,Koren,Emmanueli,kemmanueli15@fotki.com,151.86.146.63,2016-11-10 22:36:05,true +43,Corrie,Pendry,cpendry16@technorati.com,78.110.104.252,2017-11-22 07:57:23,true +44,Berton,Jakovijevic,bjakovijevic17@themeforest.net,243.201.191.244,2017-12-22 20:30:37,false +45,Ahmad,Lawerence,alawerence18@bluehost.com,234.146.69.92,2017-07-07 17:37:17,true +46,Walther,Mardee,wmardee19@sciencedirect.com,86.10.226.173,2016-06-23 09:20:51,false +47,Raynor,Reignolds,rreignolds1a@github.com,192.159.109.53,2016-04-19 13:32:00,false +48,Dom,Brodhead,dbrodhead1b@ed.gov,13.193.83.80,2016-09-24 03:16:43,false +49,Patton,Marrett,pmarrett1c@sourceforge.net,73.142.143.198,2016-06-02 19:20:48,true +50,Murielle,Reina,mreina1d@washington.edu,88.67.241.169,2017-10-01 01:56:52,true +51,Markos,Zylberdik,mzylberdik1e@ask.com,169.62.233.37,2017-03-23 19:40:19,true +52,Dorisa,Gosalvez,dgosalvez1f@mit.edu,10.111.156.111,2016-02-24 12:37:30,true +53,Amata,Moar,amoar1g@tinypic.com,214.241.229.183,2016-05-22 05:04:06,true +54,Graehme,Finnigan,gfinnigan1h@trellian.com,229.14.230.4,2016-12-27 00:49:18,true +55,Tanya,Sheers,tsheers1i@house.gov,43.212.37.134,2018-02-04 05:17:30,true +56,Germain,Beavers,gbeavers1j@hexun.com,91.219.240.74,2017-01-26 23:03:39,false +57,Emmye,Cerie,ecerie1k@independent.co.uk,58.183.233.79,2017-04-30 14:13:31,true +58,Reese,Glaisner,rglaisner1l@dropbox.com,63.181.9.68,2016-07-29 05:49:41,true +59,Christie,Phlippsen,cphlippsen1m@ucoz.ru,236.91.248.168,2017-07-07 12:37:10,false +60,Anthia,Tolland,atolland1n@hibu.com,124.60.13.101,2016-02-06 14:38:37,true +61,Annamarie,Pipworth,apipworth1o@ftc.gov,53.219.191.107,2017-06-13 08:29:04,true +62,Price,O'Gready,pogready1p@theatlantic.com,131.188.180.57,2016-09-28 08:44:38,false +63,Sergei,Cicero,scicero1q@telegraph.co.uk,100.97.16.84,2017-10-02 15:58:45,false +64,Dolorita,Lilion,dlilion1r@vimeo.com,150.43.252.51,2017-09-06 12:39:46,true +65,Perrine,Peetermann,ppeetermann1s@fema.gov,93.27.202.229,2017-07-08 08:49:37,false +66,Frieda,Gemelli,fgemelli1t@altervista.org,20.21.177.102,2016-04-18 05:58:59,false +67,Webster,Tully,wtully1u@nba.com,61.55.62.136,2016-02-18 11:01:23,true +68,Clara,Dadd,cdadd1v@rakuten.co.jp,67.84.203.36,2017-06-10 22:20:50,false +69,Gardener,Clarkin,gclarkin1w@bbc.co.uk,211.175.17.92,2017-11-27 23:33:42,true +70,Doll,Celez,dcelez1x@imdb.com,65.124.34.165,2017-01-03 06:40:44,true +71,Willy,Remnant,wremnant1y@nasa.gov,183.190.219.35,2017-05-27 11:05:47,false +72,Felicle,Scoterbosh,fscoterbosh1z@macromedia.com,12.103.23.230,2017-05-04 05:22:27,true +73,Egan,Ryland,eryland20@t.co,227.35.15.147,2016-06-09 11:50:39,true +74,Donny,Clissold,dclissold21@yellowpages.com,210.51.117.212,2016-03-08 22:48:18,true +75,Gwyneth,Brash,gbrash22@vistaprint.com,30.243.157.153,2016-01-23 17:11:17,true +76,Mervin,Woolis,mwoolis23@elegantthemes.com,52.171.162.135,2017-06-17 15:36:58,false +77,Alicea,Mewton,amewton24@com.com,236.53.172.152,2017-12-21 10:35:45,true +78,Whittaker,Biaggiotti,wbiaggiotti25@patch.com,85.219.77.207,2017-12-27 09:25:13,true +79,Teddie,Matevushev,tmatevushev26@nsw.gov.au,121.24.14.214,2017-12-09 16:30:35,false +80,Mal,Mc Gee,mmcgee27@eventbrite.com,85.138.92.81,2016-01-14 03:02:43,true +81,Teressa,Lourenco,tlourenco28@zdnet.com,33.2.78.199,2016-03-17 02:29:47,false +82,Willabella,Danelutti,wdanelutti29@ted.com,221.78.224.255,2016-03-06 14:34:53,true +83,Samaria,Hessle,shessle2a@surveymonkey.com,216.8.59.131,2017-03-30 11:02:45,true +84,Ruperto,Staite,rstaite2b@wikispaces.com,79.47.189.125,2017-08-23 22:09:19,true +85,Ashlee,Scotsbrook,ascotsbrook2c@trellian.com,91.104.127.195,2017-10-02 15:01:49,false +86,Godfry,Lawson,glawson2d@seesaa.net,241.54.44.84,2016-04-03 04:42:19,false +87,Rose,Lathleiffure,rlathleiffure2e@instagram.com,21.172.211.218,2016-05-11 04:37:44,true +88,Ricky,Helwig,rhelwig2f@sciencedirect.com,130.213.100.214,2017-12-02 11:58:19,true +89,Hersh,Castleman,hcastleman2g@mediafire.com,196.170.63.20,2016-11-06 15:18:34,false +90,Upton,Midghall,umidghall2h@wordpress.org,29.108.156.94,2017-03-24 03:48:22,true +91,Devi,Lowmass,dlowmass2i@parallels.com,243.189.157.74,2016-07-31 13:35:43,true +92,Cherry,Goldstein,cgoldstein2j@delicious.com,21.78.25.159,2016-06-02 22:19:13,true +93,Alfy,Yakubovics,ayakubovics2k@bigcartel.com,29.28.179.184,2016-10-13 08:03:28,true +94,Ambrosi,Martinyuk,amartinyuk2l@163.com,1.42.244.146,2016-01-24 23:02:40,false +95,Daniel,Duly,dduly2m@engadget.com,74.32.138.66,2017-03-26 09:02:19,false +96,Hazlett,Oakton,hoakton2n@booking.com,248.196.158.127,2016-11-01 10:55:45,true +97,Vivienne,Millery,vmillery2o@nyu.edu,99.21.99.255,2016-04-19 15:25:08,true +98,Glynda,Kupper,gkupper2p@yahoo.co.jp,89.13.224.249,2016-04-05 07:01:28,false +99,Pavlov,MacDwyer,pmacdwyer2q@netvibes.com,147.162.14.191,2018-02-10 06:57:45,true +100,Fonzie,Filip,ffilip2r@tripadvisor.com,244.178.118.180,2016-11-18 00:09:42,false diff --git a/integration_tests/data/schema_tests/data_test_at_least_one.csv b/integration_tests/data/schema_tests/data_test_at_least_one.csv new file mode 100644 index 00000000..8b75a3f5 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_at_least_one.csv @@ -0,0 +1,2 @@ +field +a diff --git a/integration_tests/data/schema_tests/data_test_not_constant.csv b/integration_tests/data/schema_tests/data_test_not_constant.csv new file mode 100644 index 00000000..25b5f504 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_not_constant.csv @@ -0,0 +1,4 @@ +field +1 +1 +2 diff --git a/integration_tests/data/sql/data_events_20180101.csv b/integration_tests/data/sql/data_events_20180101.csv new file mode 100644 index 00000000..5570828c --- /dev/null +++ b/integration_tests/data/sql/data_events_20180101.csv @@ -0,0 +1,3 @@ +user_id,event +1,play +2,pause diff --git a/integration_tests/data/sql/data_events_20180102.csv b/integration_tests/data/sql/data_events_20180102.csv new file mode 100644 index 00000000..aaa69a3d --- /dev/null +++ b/integration_tests/data/sql/data_events_20180102.csv @@ -0,0 +1,3 @@ +user_id,event +3,play +4,pause diff --git a/integration_tests/data/sql/data_generate_series.csv b/integration_tests/data/sql/data_generate_series.csv new file mode 100644 index 00000000..1964c4a2 --- /dev/null +++ b/integration_tests/data/sql/data_generate_series.csv @@ -0,0 +1,11 @@ +generated_number +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/integration_tests/data/sql/data_get_column_values.csv b/integration_tests/data/sql/data_get_column_values.csv new file mode 100644 index 00000000..5651149b --- /dev/null +++ b/integration_tests/data/sql/data_get_column_values.csv @@ -0,0 +1,12 @@ +field +a +b +c +d +e +f +g +g +g +g +g diff --git a/integration_tests/data/sql/data_nullcheck_table.csv b/integration_tests/data/sql/data_nullcheck_table.csv new file mode 100644 index 00000000..3b6cdd74 --- /dev/null +++ b/integration_tests/data/sql/data_nullcheck_table.csv @@ -0,0 +1,4 @@ +field_1,field_2,field_3 +a,'',1 +'',b,2 +'','',3 diff --git a/integration_tests/data/sql/data_pivot.csv b/integration_tests/data/sql/data_pivot.csv new file mode 100644 index 00000000..e43cfd90 --- /dev/null +++ b/integration_tests/data/sql/data_pivot.csv @@ -0,0 +1,4 @@ +size,color +S,red +S,blue +M,red diff --git a/integration_tests/data/sql/data_pivot_expected.csv b/integration_tests/data/sql/data_pivot_expected.csv new file mode 100644 index 00000000..ea309371 --- /dev/null +++ b/integration_tests/data/sql/data_pivot_expected.csv @@ -0,0 +1,3 @@ +size,red,blue +S,1,1 +M,1,0 diff --git a/integration_tests/data/sql/data_star.csv b/integration_tests/data/sql/data_star.csv new file mode 100644 index 00000000..74150762 --- /dev/null +++ b/integration_tests/data/sql/data_star.csv @@ -0,0 +1,4 @@ +field_1,field_2,field_3 +a,b,c +d,e,f +g,h,i diff --git a/integration_tests/data/sql/data_star_expected.csv b/integration_tests/data/sql/data_star_expected.csv new file mode 100644 index 00000000..f5aa3eed --- /dev/null +++ b/integration_tests/data/sql/data_star_expected.csv @@ -0,0 +1,4 @@ +field_1,field_2 +a,b +d,e +g,h diff --git a/integration_tests/data/sql/data_surrogate_key.csv b/integration_tests/data/sql/data_surrogate_key.csv new file mode 100644 index 00000000..8c858381 --- /dev/null +++ b/integration_tests/data/sql/data_surrogate_key.csv @@ -0,0 +1,5 @@ +field_1,field_2,field_3,expected +a,b,c,7b193b3d33184464106f41ddf733783b +a,,c,c5fd1b92380c6222ab0ef67839208624 +,,c,267743defab4558f1940311b66274e26 +,,,cfab1ba8c67c7c838db98d666f02a132 diff --git a/integration_tests/data/web/data_urls.csv b/integration_tests/data/web/data_urls.csv new file mode 100644 index 00000000..a98bd191 --- /dev/null +++ b/integration_tests/data/web/data_urls.csv @@ -0,0 +1,3 @@ +url,medium,source +http://drewbanin.com/milky?utm_medium=organic,organic, +http://drewbanin.com/milky?utm_medium=organic&utm_source=github,organic,github diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml new file mode 100644 index 00000000..2ca7cda0 --- /dev/null +++ b/integration_tests/dbt_project.yml @@ -0,0 +1,19 @@ + +name: 'dbt_utils_integration_tests' +version: '1.0' + +profile: 'default' + +source-paths: ["models"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_modules" + +packages: + - local: ../ diff --git a/integration_tests/macros/.gitkeep b/integration_tests/macros/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/integration_tests/macros/tests.sql b/integration_tests/macros/tests.sql new file mode 100644 index 00000000..cb3f1582 --- /dev/null +++ b/integration_tests/macros/tests.sql @@ -0,0 +1,13 @@ + +{% macro test_assert_equal(model, actual, expected) %} + +select count(*) from {{ model }} where {{ actual }} != {{ expected }} + +{% endmacro %} + + +{% macro test_not_empty_string(model, arg) %} + +select count(*) from {{ model }} where {{ arg }} = '' + +{% endmacro %} diff --git a/integration_tests/models/cross_db_utils/schema.yml b/integration_tests/models/cross_db_utils/schema.yml new file mode 100644 index 00000000..72e63819 --- /dev/null +++ b/integration_tests/models/cross_db_utils/schema.yml @@ -0,0 +1,41 @@ + +test_concat: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_current_timestamp: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_date_trunc: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_dateadd: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_datediff: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_last_day: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_safe_cast: + constraints: + assert_equal: + - { actual: actual, expected: expected } + +test_split_part: + constraints: + assert_equal: + - { actual: actual, expected: expected } + diff --git a/integration_tests/models/cross_db_utils/test_concat.sql b/integration_tests/models/cross_db_utils/test_concat.sql new file mode 100644 index 00000000..9efeb748 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_concat.sql @@ -0,0 +1,12 @@ + +with data as ( + + select * from {{ ref('data_concat') }} + +) + +select + {{ dbt_utils.concat(['input_1', 'input_2']) }} as actual, + output as expected + +from data diff --git a/integration_tests/models/cross_db_utils/test_current_timestamp.sql b/integration_tests/models/cross_db_utils/test_current_timestamp.sql new file mode 100644 index 00000000..9a777f28 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_current_timestamp.sql @@ -0,0 +1,6 @@ + +-- how can we test this better? +select + {{ dbt_utils.current_timestamp() }} as actual, + {{ dbt_utils.current_timestamp() }} as expected + diff --git a/integration_tests/models/cross_db_utils/test_date_trunc.sql b/integration_tests/models/cross_db_utils/test_date_trunc.sql new file mode 100644 index 00000000..aefff306 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_date_trunc.sql @@ -0,0 +1,20 @@ + +with data as ( + + select * from {{ ref('data_date_trunc') }} + +) + +select + {{ dbt_utils.date_trunc('day', 'updated_at') }} as actual, + day as expected + +from data + +union all + +select + {{ dbt_utils.date_trunc('month', 'updated_at') }} as actual, + month as expected + +from data diff --git a/integration_tests/models/cross_db_utils/test_dateadd.sql b/integration_tests/models/cross_db_utils/test_dateadd.sql new file mode 100644 index 00000000..20384d11 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_dateadd.sql @@ -0,0 +1,18 @@ + +with data as ( + + select * from {{ ref('data_dateadd') }} + +) + +select + case + when datepart = 'hour' then {{ dbt_utils.dateadd('hour', 'interval_length', 'from_time') }} + when datepart = 'day' then {{ dbt_utils.dateadd('day', 'interval_length', 'from_time') }} + when datepart = 'month' then {{ dbt_utils.dateadd('month', 'interval_length', 'from_time') }} + when datepart = 'year' then {{ dbt_utils.dateadd('year', 'interval_length', 'from_time') }} + else null + end as actual, + result as expected + +from data diff --git a/integration_tests/models/cross_db_utils/test_datediff.sql b/integration_tests/models/cross_db_utils/test_datediff.sql new file mode 100644 index 00000000..ea333972 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_datediff.sql @@ -0,0 +1,24 @@ + +with data as ( + + select * from {{ ref('data_datediff') }} + +) + +select + -- not implemented for postgres + {% if target.type == 'postgres' %} + null as actual, + null as expected + {% else %} + case + when datepart = 'hour' then {{ dbt_utils.datediff('first_date', 'second_date', 'hour') }} + when datepart = 'day' then {{ dbt_utils.datediff('first_date', 'second_date', 'day') }} + when datepart = 'month' then {{ dbt_utils.datediff('first_date', 'second_date', 'month') }} + when datepart = 'year' then {{ dbt_utils.datediff('first_date', 'second_date', 'year') }} + else null + end as actual, + result as expected + {% endif %} + +from data diff --git a/integration_tests/models/cross_db_utils/test_last_day.sql b/integration_tests/models/cross_db_utils/test_last_day.sql new file mode 100644 index 00000000..5391ae10 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_last_day.sql @@ -0,0 +1,16 @@ + +with data as ( + + select * from {{ ref('data_last_day') }} + +) + +select + case + when date_part = 'month' then {{ dbt_utils.last_day('date_day', 'month') }} + when date_part = 'year' then {{ dbt_utils.last_day('date_day', 'year') }} + else null + end as actual, + result as expected + +from data diff --git a/integration_tests/models/cross_db_utils/test_safe_cast.sql b/integration_tests/models/cross_db_utils/test_safe_cast.sql new file mode 100644 index 00000000..8ed74fc3 --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_safe_cast.sql @@ -0,0 +1,12 @@ + +with data as ( + + select * from {{ ref('data_safe_cast') }} + +) + +select + {{ dbt_utils.safe_cast('field', dbt_utils.type_string()) }} as actual, + output as expected + +from data diff --git a/integration_tests/models/cross_db_utils/test_split_part.sql b/integration_tests/models/cross_db_utils/test_split_part.sql new file mode 100644 index 00000000..6a10327c --- /dev/null +++ b/integration_tests/models/cross_db_utils/test_split_part.sql @@ -0,0 +1,28 @@ + +with data as ( + + select * from {{ ref('data_split_part') }} + +) + +select + {{ dbt_utils.split_part('parts', 'split_on', 1) }} as actual, + result_1 as expected + +from data + +union all + +select + {{ dbt_utils.split_part('parts', 'split_on', 2) }} as actual, + result_2 as expected + +from data + +union all + +select + {{ dbt_utils.split_part('parts', 'split_on', 3) }} as actual, + result_3 as expected + +from data diff --git a/integration_tests/models/datetime/schema.yml b/integration_tests/models/datetime/schema.yml new file mode 100644 index 00000000..eb22de94 --- /dev/null +++ b/integration_tests/models/datetime/schema.yml @@ -0,0 +1,6 @@ + + +test_date_spine: + constraints: + dbt_utils.equality: + - ref('data_date_spine') diff --git a/integration_tests/models/datetime/test_date_spine.sql b/integration_tests/models/datetime/test_date_spine.sql new file mode 100644 index 00000000..b8a023e8 --- /dev/null +++ b/integration_tests/models/datetime/test_date_spine.sql @@ -0,0 +1,15 @@ + +with date_spine as ( + + {% if target.type == 'postgres' %} + {{ log("WARNING: Not testing - datediff macro is unsupported on Postgres", info=True) }} + select * from {{ ref('data_date_spine') }} + {% else %} + {{ dbt_utils.date_spine("day", "'2018-01-01'", "'2018-01-10'") }} + {% endif %} + +) + +select date_day +from date_spine + diff --git a/integration_tests/models/schema_tests/schema.yml b/integration_tests/models/schema_tests/schema.yml new file mode 100644 index 00000000..8ec3e40b --- /dev/null +++ b/integration_tests/models/schema_tests/schema.yml @@ -0,0 +1,29 @@ + +data_test_not_constant: + constraints: + dbt_utils.not_constant: + - field + + +data_test_at_least_one: + constraints: + dbt_utils.at_least_one: + - field + + +test_recency: + constraints: + dbt_utils.recency: + - {field: today, datepart: day, interval: 1} + + +data_people: + constraints: + dbt_utils.equality: + - ref('data_people') + + +data_people: + constraints: + dbt_utils.cardinality_equality: + - {from: is_active, to: ref('data_people'), field: is_active} diff --git a/integration_tests/models/schema_tests/test_recency.sql b/integration_tests/models/schema_tests/test_recency.sql new file mode 100644 index 00000000..be0d4738 --- /dev/null +++ b/integration_tests/models/schema_tests/test_recency.sql @@ -0,0 +1,3 @@ + +select + {{ dbt_utils.date_trunc('day', dbt_utils.current_timestamp()) }} as today diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml new file mode 100644 index 00000000..e7d1c401 --- /dev/null +++ b/integration_tests/models/sql/schema.yml @@ -0,0 +1,53 @@ + +test_generate_series: + constraints: + dbt_utils.equality: + - ref('data_generate_series') + + +test_get_column_values: + constraints: + accepted_values: + - { field: count_a, values: ['1'] } + - { field: count_b, values: ['1'] } + - { field: count_c, values: ['1'] } + - { field: count_d, values: ['1'] } + - { field: count_e, values: ['1'] } + - { field: count_f, values: ['1'] } + - { field: count_g, values: ['5'] } + +test_get_tables_by_prefix_and_union: + constraints: + not_null: + - user_id + - event + + unique: + - user_id + + dbt_utils.at_least_one: + - user_id + +test_nullcheck_table: + constraints: + not_empty_string: + - field_1 + - field_2 + - field_3 + + +test_pivot: + constraints: + dbt_utils.equality: + - ref('data_pivot_expected') + + +test_star: + constraints: + dbt_utils.equality: + - ref('data_star_expected') + +test_surrogate_key: + constraints: + assert_equal: + - {actual: actual, expected: expected} diff --git a/integration_tests/models/sql/test_generate_series.sql b/integration_tests/models/sql/test_generate_series.sql new file mode 100644 index 00000000..5a0a1656 --- /dev/null +++ b/integration_tests/models/sql/test_generate_series.sql @@ -0,0 +1,8 @@ + +with data as ( + + {{ dbt_utils.generate_series(10) }} + +) + +select generated_number from data diff --git a/integration_tests/models/sql/test_get_column_values.sql b/integration_tests/models/sql/test_get_column_values.sql new file mode 100644 index 00000000..47f45bb9 --- /dev/null +++ b/integration_tests/models/sql/test_get_column_values.sql @@ -0,0 +1,13 @@ + +{% set columns = dbt_utils.get_column_values(ref('data_get_column_values'), 'field') %} + + +select + {% for column in columns -%} + + sum(case when field = '{{ column }}' then 1 else 0 end) as count_{{ column }} + {%- if not loop.last %},{% endif -%} + + {%- endfor %} + +from {{ ref('data_get_column_values') }} diff --git a/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql b/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql new file mode 100644 index 00000000..f2b6a983 --- /dev/null +++ b/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql @@ -0,0 +1,4 @@ + +-- twofer +{% set tables = dbt_utils.get_tables_by_prefix(target.schema, 'data_events_') %} +{{ dbt_utils.union_tables(tables) }} diff --git a/integration_tests/models/sql/test_groupby.sql b/integration_tests/models/sql/test_groupby.sql new file mode 100644 index 00000000..b0f3b17f --- /dev/null +++ b/integration_tests/models/sql/test_groupby.sql @@ -0,0 +1,8 @@ + + +select + 'a' as column_1, + 'b' as column_2, + count(*) as total + +{{ dbt_utils.group_by(2) }} diff --git a/integration_tests/models/sql/test_nullcheck_table.sql b/integration_tests/models/sql/test_nullcheck_table.sql new file mode 100644 index 00000000..e987c1d7 --- /dev/null +++ b/integration_tests/models/sql/test_nullcheck_table.sql @@ -0,0 +1,15 @@ + +{% set tbl = ref('data_nullcheck_table') %} + +with data as ( + + {{ dbt_utils.nullcheck_table(tbl.schema, tbl.name) }} + +) + +select + {{ dbt_utils.safe_cast('field_1', dbt_utils.type_string()) }} as field_1, + {{ dbt_utils.safe_cast('field_2', dbt_utils.type_string()) }} as field_2, + {{ dbt_utils.safe_cast('field_3', dbt_utils.type_string()) }} as field_3 + +from data diff --git a/integration_tests/models/sql/test_pivot.sql b/integration_tests/models/sql/test_pivot.sql new file mode 100644 index 00000000..1bffa0ab --- /dev/null +++ b/integration_tests/models/sql/test_pivot.sql @@ -0,0 +1,8 @@ + + +select + size, + {{ dbt_utils.pivot('color', ['red', 'blue']) }} + +from {{ ref('data_pivot') }} +group by size diff --git a/integration_tests/models/sql/test_star.sql b/integration_tests/models/sql/test_star.sql new file mode 100644 index 00000000..6ddcd723 --- /dev/null +++ b/integration_tests/models/sql/test_star.sql @@ -0,0 +1,11 @@ + +with data as ( + + select + {{ dbt_utils.star(from=ref('data_star'), except=['field_3']) }} + + from {{ ref('data_star') }} + +) + +select * from data diff --git a/integration_tests/models/sql/test_surrogate_key.sql b/integration_tests/models/sql/test_surrogate_key.sql new file mode 100644 index 00000000..6e4bfb4d --- /dev/null +++ b/integration_tests/models/sql/test_surrogate_key.sql @@ -0,0 +1,12 @@ + +with data as ( + + select * from {{ ref('data_surrogate_key') }} + +) + +select + {{ dbt_utils.surrogate_key('field_1', 'field_2', 'field_3') }} as actual, + expected + +from data diff --git a/integration_tests/models/web/schema.yml b/integration_tests/models/web/schema.yml new file mode 100644 index 00000000..4e29621b --- /dev/null +++ b/integration_tests/models/web/schema.yml @@ -0,0 +1,6 @@ + +test_urls: + constraints: + assert_equal: + - {actual: actual, expected: expected} + diff --git a/integration_tests/models/web/test_urls.sql b/integration_tests/models/web/test_urls.sql new file mode 100644 index 00000000..60353dbb --- /dev/null +++ b/integration_tests/models/web/test_urls.sql @@ -0,0 +1,20 @@ + +with data as ( + + select * from {{ ref('data_urls') }} + +) + +select + {{ dbt_utils.get_url_parameter('url', 'utm_medium') }} as actual, + medium as expected + +from data + +union all + +select + {{ dbt_utils.get_url_parameter('url', 'utm_source') }} as actual, + source as expected + +from data From 54b0f55bb5bbc13552b7576bd4b4f535471d62ca Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Tue, 6 Mar 2018 23:51:17 -0500 Subject: [PATCH 64/81] Update README.md --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index c16abad1..39ab1f72 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,3 @@ -

- dbt logo -

- ----- - # dbt-utils This package contains macros that can be (re)used across dbt projects. To use these macros, add this package as a dependency in your `dbt_project.yml` file: From 4dc9b67df1e1a561ef3fd3ba0f1dfda09787f8d3 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 7 Mar 2018 00:01:37 -0500 Subject: [PATCH 65/81] Update README.md --- README.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/README.md b/README.md index 39ab1f72..9a678d6f 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,6 @@ # dbt-utils -This package contains macros that can be (re)used across dbt projects. To use these macros, add this package as a dependency in your `dbt_project.yml` file: - -```yml -repositories: - # Be sure to replace VERSION_NUMBER below! - - https://github.com/fishtown-analytics/dbt-utils.git@VERSION_NUMBER -``` - -It's a good practice to "tag" your dependencies with version numbers. You can find the latest release of this package [here](https://github.com/fishtown-analytics/dbt-utils/tags). +This package contains macros that can be (re)used across dbt projects. ## Macros ### Cross-database From 769794490d24b9efa1bede0a7c36451a630f9166 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 7 Mar 2018 00:02:12 -0500 Subject: [PATCH 66/81] Update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 9a678d6f..60e412a4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ -# dbt-utils - -This package contains macros that can be (re)used across dbt projects. +This [dbt](https://github.com/fishtown-analytics/dbt) package contains macros that can be (re)used across dbt projects. ## Macros ### Cross-database From 33ae87c0888e3e59f341028566e996c68103ae77 Mon Sep 17 00:00:00 2001 From: Jeremy Cohen Date: Mon, 26 Mar 2018 10:02:45 -0400 Subject: [PATCH 67/81] updated star macro readme --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 60e412a4..f7f6ce70 100644 --- a/README.md +++ b/README.md @@ -166,11 +166,13 @@ Usage: ``` #### star ([source](macros/sql/star.sql)) -This macro generates a `select` statement for each field that exists in the `from` relation. Fields listed in the `except` argument will be excluded from this list. +This macro generates a list of all fields that exist in the `from` relation, excluding any fields listed in the `except` argument. The construction is identical to `select * from {{ref('my_model')}}`, replacing star (`*`) with the star macro. Usage: ``` +select {{ dbt_utils.star(from=ref('my_model'), except=["exclude_field_1", "exclude_field_2"]) }} +from {{ref('my_model')}} ``` #### union_tables ([source](macros/sql/union.sql)) From 3d0a4f08c6cd494019bf76d38bddf15a5407eefc Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Wed, 11 Apr 2018 14:49:02 -0400 Subject: [PATCH 68/81] tests passing on snowflake, add CI --- circle.yml | 24 +++++++++++++++++++ integration_tests/Makefile | 23 ++++++++++++++++++ integration_tests/ci/sample.profiles.yml | 20 ++++++++++++++++ integration_tests/ci/setup_db.sh | 10 ++++++++ .../data/datetime/data_date_spine.csv | 1 - integration_tests/dbt_project.yml | 5 +--- .../models/datetime/test_date_spine.sql | 7 ++++++ .../models/sql/test_generate_series.sql | 7 ++++++ .../test_get_tables_by_prefix_and_union.sql | 14 ++++++++--- .../models/sql/test_nullcheck_table.sql | 10 ++++---- integration_tests/models/sql/test_pivot.sql | 11 ++++++++- integration_tests/models/sql/test_star.sql | 7 +++++- integration_tests/packages.yml | 3 +++ 13 files changed, 127 insertions(+), 15 deletions(-) create mode 100644 circle.yml create mode 100644 integration_tests/Makefile create mode 100644 integration_tests/ci/sample.profiles.yml create mode 100644 integration_tests/ci/setup_db.sh create mode 100644 integration_tests/packages.yml diff --git a/circle.yml b/circle.yml new file mode 100644 index 00000000..bbc1f89a --- /dev/null +++ b/circle.yml @@ -0,0 +1,24 @@ +machine: + timezone: + America/New_York + + python: + version: + 3.6.1 + +database: + override: + - bash integration_tests/ci/setup_db.sh + +dependencies: + pre: + - pip install dbt + - mkdir -p ~/.dbt + - cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml + +test: + override: + - cd integration_tests + - dbt seed + - dbt run + - dbt test diff --git a/integration_tests/Makefile b/integration_tests/Makefile new file mode 100644 index 00000000..243f960f --- /dev/null +++ b/integration_tests/Makefile @@ -0,0 +1,23 @@ + +test-postgres: + dbt seed --target postgres + dbt run --target postgres + dbt test --target postgres + +test-redshift: + dbt seed --target redshift + dbt run --target redshift + dbt test --target redshift + +test-snowflake: + dbt seed --target snowflake + dbt run --target snowflake + dbt test --target snowflake + +test-bigquery: + dbt seed --target bigquery + dbt run --target bigquery + dbt test --target bigquery + +test-all: test-postgres test-redshift test-snowflake test-bigquery + echo "Completed successfully" diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml new file mode 100644 index 00000000..d87fa799 --- /dev/null +++ b/integration_tests/ci/sample.profiles.yml @@ -0,0 +1,20 @@ + +# HEY! This file is used in the dbt-utils integrations tests with CircleCI. +# You should __NEVER__ check credentials into version control. Thanks for reading :) + +config: + send_anonymous_usage_stats: False + use_colors: False + +default: + target: ci + outputs: + ci: + type: postgres + host: localhost + user: "{{ env_var('CI_DBT_USER') }}" + pass: "{{ env_var('CI_DBT_PASS') }}" + port: "{{ env_var('CI_DBT_PORT') }}" + dbname: "{{ env_var('CI_DBT_DBNAME') }}" + schema: dbt_utils + threads: 1 diff --git a/integration_tests/ci/setup_db.sh b/integration_tests/ci/setup_db.sh new file mode 100644 index 00000000..e3bdee6f --- /dev/null +++ b/integration_tests/ci/setup_db.sh @@ -0,0 +1,10 @@ + +#!/bin/bash +set -x + +createdb dbt +psql -c "CREATE ROLE root WITH PASSWORD 'password';" +psql -c "ALTER ROLE root WITH LOGIN;" +psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" + +set +x diff --git a/integration_tests/data/datetime/data_date_spine.csv b/integration_tests/data/datetime/data_date_spine.csv index ef620817..72946922 100644 --- a/integration_tests/data/datetime/data_date_spine.csv +++ b/integration_tests/data/datetime/data_date_spine.csv @@ -8,4 +8,3 @@ date_day 2018-01-07 2018-01-08 2018-01-09 -2018-01-10 diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 2ca7cda0..77ca601e 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -2,7 +2,7 @@ name: 'dbt_utils_integration_tests' version: '1.0' -profile: 'default' +profile: 'integration_tests' source-paths: ["models"] analysis-paths: ["analysis"] @@ -14,6 +14,3 @@ target-path: "target" # directory which will store compiled SQL files clean-targets: # directories to be removed by `dbt clean` - "target" - "dbt_modules" - -packages: - - local: ../ diff --git a/integration_tests/models/datetime/test_date_spine.sql b/integration_tests/models/datetime/test_date_spine.sql index b8a023e8..fddaef0b 100644 --- a/integration_tests/models/datetime/test_date_spine.sql +++ b/integration_tests/models/datetime/test_date_spine.sql @@ -1,4 +1,11 @@ +-- snowflake doesn't like this as a view because the `generate_series` +-- call creates a CTE called `unioned`, as does the `equality` schema test. +-- Ideally, Snowflake would be smart enough to know that these CTE names are +-- different, as they live in different relations. TODO: use a less common cte name + +{{ config(materialized='table') }} + with date_spine as ( {% if target.type == 'postgres' %} diff --git a/integration_tests/models/sql/test_generate_series.sql b/integration_tests/models/sql/test_generate_series.sql index 5a0a1656..a943cf6c 100644 --- a/integration_tests/models/sql/test_generate_series.sql +++ b/integration_tests/models/sql/test_generate_series.sql @@ -1,4 +1,11 @@ +-- snowflake doesn't like this as a view because the `generate_series` +-- call creates a CTE called `unioned`, as does the `equality` schema test. +-- Ideally, Snowflake would be smart enough to know that these CTE names are +-- different, as they live in different relations. TODO: use a less common cte name + +{{ config(materialized='table') }} + with data as ( {{ dbt_utils.generate_series(10) }} diff --git a/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql b/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql index f2b6a983..ac1e34a0 100644 --- a/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql +++ b/integration_tests/models/sql/test_get_tables_by_prefix_and_union.sql @@ -1,4 +1,12 @@ --- twofer -{% set tables = dbt_utils.get_tables_by_prefix(target.schema, 'data_events_') %} -{{ dbt_utils.union_tables(tables) }} +{% if target.type == 'snowflake' %} + + {% set tables = dbt_utils.get_tables_by_prefix((target.schema | upper), 'data_events_') %} + {{ dbt_utils.union_tables(tables) }} + +{% else %} + + {% set tables = dbt_utils.get_tables_by_prefix(target.schema, 'data_events_') %} + {{ dbt_utils.union_tables(tables) }} + +{% endif %} diff --git a/integration_tests/models/sql/test_nullcheck_table.sql b/integration_tests/models/sql/test_nullcheck_table.sql index e987c1d7..883b4e52 100644 --- a/integration_tests/models/sql/test_nullcheck_table.sql +++ b/integration_tests/models/sql/test_nullcheck_table.sql @@ -1,15 +1,15 @@ {% set tbl = ref('data_nullcheck_table') %} -with data as ( +with nulled as ( {{ dbt_utils.nullcheck_table(tbl.schema, tbl.name) }} ) select - {{ dbt_utils.safe_cast('field_1', dbt_utils.type_string()) }} as field_1, - {{ dbt_utils.safe_cast('field_2', dbt_utils.type_string()) }} as field_2, - {{ dbt_utils.safe_cast('field_3', dbt_utils.type_string()) }} as field_3 + field_1::varchar as field_1, + field_2::varchar as field_2, + field_3::varchar as field_3 -from data +from nulled diff --git a/integration_tests/models/sql/test_pivot.sql b/integration_tests/models/sql/test_pivot.sql index 1bffa0ab..a63e6e3b 100644 --- a/integration_tests/models/sql/test_pivot.sql +++ b/integration_tests/models/sql/test_pivot.sql @@ -1,8 +1,17 @@ +-- TODO: How do we make this work nicely on Snowflake too? + +{% if target.type == 'snowflake' %} + {% set column_values = ['RED', 'BLUE'] %} + {% set cmp = 'ilike' %} +{% else %} + {% set column_values = ['red', 'blue'] %} + {% set cmp = '=' %} +{% endif %} select size, - {{ dbt_utils.pivot('color', ['red', 'blue']) }} + {{ dbt_utils.pivot('color', column_values, cmp=cmp) }} from {{ ref('data_pivot') }} group by size diff --git a/integration_tests/models/sql/test_star.sql b/integration_tests/models/sql/test_star.sql index 6ddcd723..3c1af078 100644 --- a/integration_tests/models/sql/test_star.sql +++ b/integration_tests/models/sql/test_star.sql @@ -1,8 +1,13 @@ +-- TODO : Should the star macro use a case-insensitive comparison for the `except` field on Snowflake? + +{% set exclude_field = 'FIELD_3' if target.type == 'snowflake' else 'field_3' %} + + with data as ( select - {{ dbt_utils.star(from=ref('data_star'), except=['field_3']) }} + {{ dbt_utils.star(from=ref('data_star'), except=[exclude_field]) }} from {{ ref('data_star') }} diff --git a/integration_tests/packages.yml b/integration_tests/packages.yml new file mode 100644 index 00000000..7468ac55 --- /dev/null +++ b/integration_tests/packages.yml @@ -0,0 +1,3 @@ + +packages: + - local: ../ From 10696c71dbf7da6adbd2fef7db024e0476d3c4d8 Mon Sep 17 00:00:00 2001 From: Peter Fine Date: Tue, 17 Apr 2018 21:53:16 +0100 Subject: [PATCH 69/81] Update pivot.sql The example code doesn't match the output, because it's looking for colours in the size column. This update fixes it as per the example. --- macros/sql/pivot.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/sql/pivot.sql b/macros/sql/pivot.sql index 721e8365..685ae110 100644 --- a/macros/sql/pivot.sql +++ b/macros/sql/pivot.sql @@ -14,8 +14,8 @@ Example: select size, - {{ dbt_utils.pivot('size', dbt_utils.get_column_values('public.test', - 'color')) }} + {{ dbt_utils.pivot('color', dbt_utils.get_column_values('public.test', + 'color')) }} from public.test group by size From 1481b6da85a4f39e12d8bb94e4de067c25b75fff Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:25:15 -0400 Subject: [PATCH 70/81] set up circle? --- circle.yml | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 circle.yml diff --git a/circle.yml b/circle.yml deleted file mode 100644 index bbc1f89a..00000000 --- a/circle.yml +++ /dev/null @@ -1,24 +0,0 @@ -machine: - timezone: - America/New_York - - python: - version: - 3.6.1 - -database: - override: - - bash integration_tests/ci/setup_db.sh - -dependencies: - pre: - - pip install dbt - - mkdir -p ~/.dbt - - cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml - -test: - override: - - cd integration_tests - - dbt seed - - dbt run - - dbt test From ea63c25fe17af5fb7f578660b60e1b861ad89011 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:25:27 -0400 Subject: [PATCH 71/81] forgot to add circle dir --- .circleci/config.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..e8126649 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,25 @@ + +machine: + timezone: + America/New_York + + python: + version: + 3.6.1 + +database: + override: + - bash integration_tests/ci/setup_db.sh + +dependencies: + pre: + - pip install dbt + - mkdir -p ~/.dbt + - cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml + +test: + override: + - cd integration_tests + - dbt seed + - dbt run + - dbt test From c3dd995fa918765ec3440060d291d58129810b2e Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:28:35 -0400 Subject: [PATCH 72/81] bump to kick off circle --- integration_tests/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration_tests/README.md b/integration_tests/README.md index 7c8b8a67..9bfb251f 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -5,3 +5,6 @@ database support: - [x] redshift - [ ] bigquery (partial) - [ ] snowflake (partial) + +todo: + - [ ] run in CI for at least postgres From db93b15d07d9bb9829bf69619a6aea7aef432b44 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:35:38 -0400 Subject: [PATCH 73/81] use v2 of circle config --- .circleci/config.yml | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e8126649..54179a42 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,25 +1,31 @@ -machine: - timezone: - America/New_York +version: 2 - python: - version: - 3.6.1 +jobs: + build: + docker: + - image: circleci/python:3.6.2-stretch-browsers + - image: circleci/postgres:9.6.5-alpine-ram + environment: + CI_DBT_USER: root + CI_DBT_PASS: '' + CI_DBT_PORT: 5432 + CI_DBT_DBNAME: circle_test -database: - override: - - bash integration_tests/ci/setup_db.sh + steps: + - checkout -dependencies: - pre: - - pip install dbt - - mkdir -p ~/.dbt - - cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml + - run: + name: "Setup dbt" + command: | + pip install dbt + mkdir -p ~/.dbt + cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml -test: - override: - - cd integration_tests - - dbt seed - - dbt run - - dbt test + - run: + name: "Run tests (postgres)" + command: | + cd integration_tests + dbt seed + dbt run + dbt test From 175ff4fcffe8ad93aee8662e0265ef5bcd49356b Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:38:44 -0400 Subject: [PATCH 74/81] try running in venv bc of permission issue --- .circleci/config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 54179a42..0caf9b5d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,6 +18,8 @@ jobs: - run: name: "Setup dbt" command: | + virtualenv env + source env/bin/activate pip install dbt mkdir -p ~/.dbt cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml @@ -25,6 +27,7 @@ jobs: - run: name: "Run tests (postgres)" command: | + source env/bin/activate cd integration_tests dbt seed dbt run From 02ae3986d2e460d2aa69682dbcbbd034e1cc3abe Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:41:46 -0400 Subject: [PATCH 75/81] use sane image --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0caf9b5d..d36d793b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,7 +4,7 @@ version: 2 jobs: build: docker: - - image: circleci/python:3.6.2-stretch-browsers + - image: circleci/python:3.6.2-stretch - image: circleci/postgres:9.6.5-alpine-ram environment: CI_DBT_USER: root From 1fb06cbcaeb53e3487c7415a44122a4f5ea1c72f Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:44:43 -0400 Subject: [PATCH 76/81] new approach: read docs --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d36d793b..2dff2e94 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,8 +18,8 @@ jobs: - run: name: "Setup dbt" command: | - virtualenv env - source env/bin/activate + python3 -m venv venv + . venv/bin/activate pip install dbt mkdir -p ~/.dbt cp integration_tests/ci/sample.profiles.yml ~/.dbt/profiles.yml @@ -27,7 +27,7 @@ jobs: - run: name: "Run tests (postgres)" command: | - source env/bin/activate + . venv/bin/activate cd integration_tests dbt seed dbt run From 0bfd192244c13a7292e3b4d04699f18c1d50f506 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:49:31 -0400 Subject: [PATCH 77/81] cache dbt, run deps, fix profile --- .circleci/config.yml | 9 +++++++++ integration_tests/ci/sample.profiles.yml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2dff2e94..cbd23fc9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -15,6 +15,9 @@ jobs: steps: - checkout + - restore_cache: + key: deps1-{{ .Branch }} + - run: name: "Setup dbt" command: | @@ -29,6 +32,12 @@ jobs: command: | . venv/bin/activate cd integration_tests + dbt deps dbt seed dbt run dbt test + + - save_cache: + key: deps1-{{ .Branch }} + paths: + - "venv" diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index d87fa799..0321a35d 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -6,7 +6,7 @@ config: send_anonymous_usage_stats: False use_colors: False -default: +integration_tests: target: ci outputs: ci: From 0d3bfa283be5ba0194d519e3a383851dc3d9ed2c Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:52:34 -0400 Subject: [PATCH 78/81] move env vars? --- .circleci/config.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cbd23fc9..c9bd5579 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,11 +6,6 @@ jobs: docker: - image: circleci/python:3.6.2-stretch - image: circleci/postgres:9.6.5-alpine-ram - environment: - CI_DBT_USER: root - CI_DBT_PASS: '' - CI_DBT_PORT: 5432 - CI_DBT_DBNAME: circle_test steps: - checkout @@ -29,6 +24,11 @@ jobs: - run: name: "Run tests (postgres)" + environment: + CI_DBT_USER: root + CI_DBT_PASS: '' + CI_DBT_PORT: 5432 + CI_DBT_DBNAME: circle_test command: | . venv/bin/activate cd integration_tests From a85352072398f02c723c1693c687efda449a1b5b Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 23 Apr 2018 20:58:10 -0400 Subject: [PATCH 79/81] tiny tweak, cleanup, add colors --- integration_tests/ci/sample.profiles.yml | 2 +- integration_tests/ci/setup_db.sh | 10 ---------- .../models/cross_db_utils/test_datediff.sql | 4 ++-- 3 files changed, 3 insertions(+), 13 deletions(-) delete mode 100644 integration_tests/ci/setup_db.sh diff --git a/integration_tests/ci/sample.profiles.yml b/integration_tests/ci/sample.profiles.yml index 0321a35d..592daaaa 100644 --- a/integration_tests/ci/sample.profiles.yml +++ b/integration_tests/ci/sample.profiles.yml @@ -4,7 +4,7 @@ config: send_anonymous_usage_stats: False - use_colors: False + use_colors: True integration_tests: target: ci diff --git a/integration_tests/ci/setup_db.sh b/integration_tests/ci/setup_db.sh deleted file mode 100644 index e3bdee6f..00000000 --- a/integration_tests/ci/setup_db.sh +++ /dev/null @@ -1,10 +0,0 @@ - -#!/bin/bash -set -x - -createdb dbt -psql -c "CREATE ROLE root WITH PASSWORD 'password';" -psql -c "ALTER ROLE root WITH LOGIN;" -psql -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root;" - -set +x diff --git a/integration_tests/models/cross_db_utils/test_datediff.sql b/integration_tests/models/cross_db_utils/test_datediff.sql index ea333972..acb7c217 100644 --- a/integration_tests/models/cross_db_utils/test_datediff.sql +++ b/integration_tests/models/cross_db_utils/test_datediff.sql @@ -8,8 +8,8 @@ with data as ( select -- not implemented for postgres {% if target.type == 'postgres' %} - null as actual, - null as expected + null::text as actual, + null::text as expected {% else %} case when datepart = 'hour' then {{ dbt_utils.datediff('first_date', 'second_date', 'hour') }} From af7a5e6524ee351b6e8cb34341558d69925b9909 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 14 May 2018 15:53:30 -0400 Subject: [PATCH 80/81] Update README.md --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f7f6ce70..acb049a8 100644 --- a/README.md +++ b/README.md @@ -255,6 +255,13 @@ Usage: ``` ---- + +### Contributing + +We welcome contributions to this repo! To contribute a new feature or a fix, please open a Pull Request with 1) your changes, 2) updated documentation for the `README.md` file, and 3) a working integration test. See [this page](integration_tests/README.md) for more information. + +---- + ### Getting started with dbt - [What is dbt]? From df867a7ed14bf10fc7ce60413fc90be7b9c43a84 Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 14 May 2018 15:59:02 -0400 Subject: [PATCH 81/81] Update README.md --- integration_tests/README.md | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index 9bfb251f..894188d4 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -1,10 +1,20 @@ ### dbt integration test suite for dbt-utils -database support: - - [x] postgres - - [x] redshift - - [ ] bigquery (partial) - - [ ] snowflake (partial) - -todo: - - [ ] run in CI for at least postgres +This directory contains an example dbt project which tests the macros in the `dbt-utils` package. An integration test typically involves making 1) a new seed file 2) a new model file 3) a schema test. + +For an example integration tests, check out the tests for the `get_url_parameter` macro: + +1. [Macro definition](https://github.com/fishtown-analytics/dbt-utils/blob/master/macros/web/get_url_parameter.sql) +2. [Seed file with fake data](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/data/web/data_urls.csv) +3. [Model to test the macro](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/test_urls.sql) +4. [A schema test to assert the macro works as expected](https://github.com/fishtown-analytics/dbt-utils/blob/master/integration_tests/models/web/schema.yml#L2) + + +Once you've added all of these files, you should be able to run: +``` +$ dbt seed +$ dbt run --model {your_model_name} +$ dbt test --model {your_model_name} +``` + +If the tests all pass, then you're good to go! All tests will be run automatically when you create a PR against this repo.