From c7f17b3b92c3e09cd993dcf280b586000ab64b55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Fri, 15 Dec 2023 10:15:12 +0100 Subject: [PATCH 1/9] Add a yaml generation macro for activities --- .../activity/generate_activity_yml.sql | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 macros/activity_schema/activity/generate_activity_yml.sql diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql new file mode 100644 index 0000000..091e0b6 --- /dev/null +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -0,0 +1,74 @@ +-- Get data_types object from model config block +{% macro get_activity_config(model) -%} + {% set relation = ref(model) %} + {% for node in graph.nodes.values() + | selectattr("resource_type", "equalto", "model") + | selectattr("name", "equalto", relation.identifier) %} + {% do return(node.config) %} + {% endfor %} +{%- endmacro %} + +{% macro generate_activity_yml(activities) %} + +{% set yaml_output = [] %} + +{% do yaml_output.append('version: 2') %} +{% do yaml_output.append('models:') %} + +-- Loop through each activity +{% for activity in activities %} + + {% do yaml_output.append(' - name: ' ~ activity) %} + {% do yaml_output.append(' columns:') %} + + {% set columns = [ + {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': 'STRING', 'tests': ['unique', 'not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the customer.', 'data_type': 'STRING'}, + {'name': 'activity', 'description': 'Type of activity performed by the customer.', 'data_type': 'STRING', 'tests': ['not_null']}, + {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': 'TIMESTAMP', 'tests': ['not_null']}, + {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': 'INT64'}, + {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': 'STRING'}, + {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': 'JSON'}, + {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': 'INT64'}, + {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': 'TIMESTAMP'} + ] + %} + + -- Check for anonymous_customer_column + {% set stream = get_activity_config(activity).stream %} + {%- if dbt_aql.anonymous_customer_column(stream) is not none -%} + {%- do columns.insert(2, {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the customer.', 'data_type': 'STRING', 'tests': ['not_null']}) -%} + {%- endif -%} + + {% for column in columns %} + {% do yaml_output.append(' - name: ' ~ column['name']) %} + {% if column['name'] == 'feature_json' %} + -- Call the macro to get feature_json items + {% set data_types = get_activity_config(activity).data_types %} + {% do yaml_output.append(' description: > ') %} + {% do yaml_output.append(' ' ~ column['description']) %} + {% for key, data_type in data_types.items() %} + {% do yaml_output.append(' - ' ~ key ~ ': ' ~ data_type) %} + {% endfor %} + {% else %} + {% do yaml_output.append(' description: "' ~ column['description'] ~ '"') %} + {% do yaml_output.append(' data_type: ' ~ column['data_type']) %} + {% if column['tests'] %} + {% do yaml_output.append(' tests:') %} + {% for test in column['tests'] %} + {% do yaml_output.append(' - ' ~ test) %} + {% endfor %} + {% endif %} + {% endif %} + {% endfor %} + +{% endfor %} + + +{% if execute %} + {% set joined_yaml = yaml_output | join('\n') %} + {{ print(joined_yaml) }} + {% do return(joined_yaml) %} +{% endif %} + +{% endmacro %} From 25c6a99389894c705837b0d47b03cec0dd4f2dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Fri, 15 Dec 2023 10:15:33 +0100 Subject: [PATCH 2/9] Add documentation for the macro --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index bc2219b..0b6501d 100644 --- a/README.md +++ b/README.md @@ -182,6 +182,28 @@ The `build_activity` macro is a convenience function that will take the data fro All other columns will be added automatically and aliased as configured in the project to the final `select` statement that is generated by the `build_activity` macro.

+## **Generating docs and basic tests** +Since each activity table has a similiar schema, it is possible to generate the documentation and add basic tests automatically. +The `generate_activity_yml` macro takes a list of activity names as input and prints out the yaml documentation for these. You can use the output as a basis for your documentation and test setup. + +### **Usage** +In your shell use `dbt run-operation` to run the macro. Replace the activities list with your specific activity names as needed. +``` +dbt run-operation generate_activity_yml --args '{activities: [customer__visited_page, customer__bought_something]}' +``` +Copy the output to a yaml file. E.g. `activites_models.yml` + +### **Input Expectations** +The macro requires a single argument: + +#### **`activities`** +_Description_: +A list of activity names (strings). Each name in this list will generate a separate model entry in the YAML file. + +### **Output** +For each activity the macro will generate a model entry in yaml format containing columns, data types, the keys of the feature_json object as well as basic tests for uniqueness and the existence of null values. +

+ # **Streams** Each Activity Schema should have exactly 1 stream model. The model should be the name of the stream that is registered in the `streams` variable in `dbt_project.yml`. From 72d05f77c0f2000418cf61da139c77a542b83889 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 10:12:26 +0100 Subject: [PATCH 3/9] Use dbt's data type macros --- .../activity/generate_activity_yml.sql | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index 091e0b6..bc51bff 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -21,18 +21,19 @@ {% do yaml_output.append(' - name: ' ~ activity) %} {% do yaml_output.append(' columns:') %} - {% set columns = [ - {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': 'STRING', 'tests': ['unique', 'not_null']}, - {'name': 'customer_id', 'description': 'Identifier for the customer.', 'data_type': 'STRING'}, - {'name': 'activity', 'description': 'Type of activity performed by the customer.', 'data_type': 'STRING', 'tests': ['not_null']}, - {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': 'TIMESTAMP', 'tests': ['not_null']}, - {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': 'INT64'}, - {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': 'STRING'}, - {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': 'JSON'}, - {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': 'INT64'}, - {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': 'TIMESTAMP'} - ] - %} +{% set columns = [ + {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the customer.', 'data_type': type_string()}, + {'name': 'activity', 'description': 'Type of activity performed by the customer.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, + {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, + {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, + {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, + {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, + {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} +] +%} + -- Check for anonymous_customer_column {% set stream = get_activity_config(activity).stream %} From 0827ba7f286d8caf1350d258574a877d72e65409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 10:13:04 +0100 Subject: [PATCH 4/9] Remove any mentions of 'customer' from col desc --- macros/activity_schema/activity/generate_activity_yml.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index bc51bff..aa7d9ba 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -23,8 +23,8 @@ {% set columns = [ {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, - {'name': 'customer_id', 'description': 'Identifier for the customer.', 'data_type': type_string()}, - {'name': 'activity', 'description': 'Type of activity performed by the customer.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, + {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, @@ -38,7 +38,7 @@ -- Check for anonymous_customer_column {% set stream = get_activity_config(activity).stream %} {%- if dbt_aql.anonymous_customer_column(stream) is not none -%} - {%- do columns.insert(2, {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the customer.', 'data_type': 'STRING', 'tests': ['not_null']}) -%} + {%- do columns.insert(2, {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the entity.', 'data_type': 'STRING', 'tests': ['not_null']}) -%} {%- endif -%} {% for column in columns %} From 3aa39c9989038044e65c28465f25a18933344eca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 10:16:57 +0100 Subject: [PATCH 5/9] Fix missing data_type for feature_json yml --- macros/activity_schema/activity/generate_activity_yml.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index aa7d9ba..e674e34 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -51,6 +51,7 @@ {% for key, data_type in data_types.items() %} {% do yaml_output.append(' - ' ~ key ~ ': ' ~ data_type) %} {% endfor %} + {% do yaml_output.append(' data_type: ' ~ column['data_type']) %} {% else %} {% do yaml_output.append(' description: "' ~ column['description'] ~ '"') %} {% do yaml_output.append(' data_type: ' ~ column['data_type']) %} From f49eff43000723d41e31d128ded0bd5d9529a4d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 10:35:19 +0100 Subject: [PATCH 6/9] Refactor column descriptions to use macro --- .../activity/generate_activity_yml.sql | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index e674e34..75516ed 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -8,6 +8,32 @@ {% endfor %} {%- endmacro %} +-- Get column descriptions and tests +{% macro generate_columns(activity) %} + {% set columns = [ + {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, + {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the customer.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, + {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, + {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, + {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, + {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, + {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} + ] + %} + + -- Check for anonymous_customer_column + {% set stream = get_activity_config(activity).stream %} + {%- if dbt_aql.anonymous_customer_column(stream) is none -%} + {%- set columns = columns | rejectattr("name", "equalto", "anonymous_customer_id") | list -%} + {%- endif -%} + + {% do return(columns) %} + +{% endmacro %} + {% macro generate_activity_yml(activities) %} {% set yaml_output = [] %} @@ -17,30 +43,11 @@ -- Loop through each activity {% for activity in activities %} + {% set columns = generate_columns(activity) %} {% do yaml_output.append(' - name: ' ~ activity) %} {% do yaml_output.append(' columns:') %} -{% set columns = [ - {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, - {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, - {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, - {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, - {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, - {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, - {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, - {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, - {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} -] -%} - - - -- Check for anonymous_customer_column - {% set stream = get_activity_config(activity).stream %} - {%- if dbt_aql.anonymous_customer_column(stream) is not none -%} - {%- do columns.insert(2, {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the entity.', 'data_type': 'STRING', 'tests': ['not_null']}) -%} - {%- endif -%} - {% for column in columns %} {% do yaml_output.append(' - name: ' ~ column['name']) %} {% if column['name'] == 'feature_json' %} From ccba5a6bcdbfa3edf8e160e8af6435ddcd8a1028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 12:03:15 +0100 Subject: [PATCH 7/9] Use dynamic column labels --- .../activity/generate_activity_yml.sql | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index 75516ed..0c802a8 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -10,30 +10,49 @@ -- Get column descriptions and tests {% macro generate_columns(activity) %} - {% set columns = [ - {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, - {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, - {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the customer.', 'data_type': type_string(), 'tests': ['not_null']}, - {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, - {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, - {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, - {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, - {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, - {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, - {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} - ] - %} - - -- Check for anonymous_customer_column {% set stream = get_activity_config(activity).stream %} - {%- if dbt_aql.anonymous_customer_column(stream) is none -%} - {%- set columns = columns | rejectattr("name", "equalto", "anonymous_customer_id") | list -%} + {% set schema_columns = dbt_aql.schema_columns() %} + {% set customer_column = dbt_aql.customer_column(stream) %} + {% set anonymous_customer_column = dbt_aql.anonymous_customer_column(stream) %} + + {% set columns = [ + {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, + {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the entity.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, + {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, + {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, + {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, + {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, + {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} + ] %} + + -- Remove unused columns + {%- if anonymous_customer_column is none -%} + {%- set columns = columns | rejectattr("name", "equalto", "anonymous_customer_id") | list -%} {%- endif -%} + {%- if schema_columns.link is not defined -%} + {%- set columns = columns | rejectattr("name", "equalto", "link") | list -%} + {%- endif -%} + + {%- if schema_columns.revenue_impact is not defined -%} + {%- set columns = columns | rejectattr("name", "equalto", "revenue_impact") | list -%} + {%- endif -%} + + -- Update column names based on schema_columns + {% for column in columns %} + {% if column.name in schema_columns %} + {% do column.update({'name': schema_columns[column.name]}) %} + {% endif %} + {% endfor %} + {% do return(columns) %} {% endmacro %} + {% macro generate_activity_yml(activities) %} {% set yaml_output = [] %} From 35f0282a3756397dde304d9967a8df1ed2d2fa99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 14:38:01 +0100 Subject: [PATCH 8/9] Formatting --- .../activity/generate_activity_yml.sql | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index 0c802a8..794c427 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -1,11 +1,11 @@ -- Get data_types object from model config block {% macro get_activity_config(model) -%} - {% set relation = ref(model) %} - {% for node in graph.nodes.values() - | selectattr("resource_type", "equalto", "model") - | selectattr("name", "equalto", relation.identifier) %} - {% do return(node.config) %} - {% endfor %} + {% set relation = ref(model) %} + {% for node in graph.nodes.values() + | selectattr("resource_type", "equalto", "model") + | selectattr("name", "equalto", relation.identifier) %} + {% do return(node.config) %} + {% endfor %} {%- endmacro %} -- Get column descriptions and tests @@ -16,17 +16,17 @@ {% set anonymous_customer_column = dbt_aql.anonymous_customer_column(stream) %} {% set columns = [ - {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, - {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, - {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the entity.', 'data_type': type_string(), 'tests': ['not_null']}, - {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, - {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, - {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, - {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, - {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, - {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, - {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} - ] %} + {'name': 'activity_id', 'description': 'Unique identifier for the activity.', 'data_type': type_string(), 'tests': ['unique', 'not_null']}, + {'name': 'customer_id', 'description': 'Identifier for the entity.', 'data_type': type_string()}, + {'name': 'anonymous_customer_id', 'description': 'Anonymous identifier for the entity.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'activity', 'description': 'Type of activity performed.', 'data_type': type_string(), 'tests': ['not_null']}, + {'name': 'ts', 'description': 'Timestamp of when the activity occurred.', 'data_type': type_timestamp(), 'tests': ['not_null']}, + {'name': 'revenue_impact', 'description': 'Revenue impact of the activity, if applicable.', 'data_type': type_int()}, + {'name': 'link', 'description': 'Link associated with the activity, if applicable.', 'data_type': type_string()}, + {'name': 'feature_json', 'description': 'JSON containing additional feature data related to the activity. Contains the following items:', 'data_type': dbt_aql.type_json()}, + {'name': 'activity_occurrence', 'description': 'Number of times the activity occurred.', 'data_type': type_int()}, + {'name': 'activity_repeated_at', 'description': 'Timestamp of when the activity was repeated, if applicable.', 'data_type': type_timestamp()} + ] %} -- Remove unused columns {%- if anonymous_customer_column is none -%} From 4b78b46414f7de832aff6c40f1c377c0e2a006d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20W=C3=B6hrl?= Date: Tue, 19 Dec 2023 14:38:44 +0100 Subject: [PATCH 9/9] Rename macro for clarity --- macros/activity_schema/activity/generate_activity_yml.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/macros/activity_schema/activity/generate_activity_yml.sql b/macros/activity_schema/activity/generate_activity_yml.sql index 794c427..e4e5f40 100644 --- a/macros/activity_schema/activity/generate_activity_yml.sql +++ b/macros/activity_schema/activity/generate_activity_yml.sql @@ -9,7 +9,7 @@ {%- endmacro %} -- Get column descriptions and tests -{% macro generate_columns(activity) %} +{% macro get_column_descriptions(activity) %} {% set stream = get_activity_config(activity).stream %} {% set schema_columns = dbt_aql.schema_columns() %} {% set customer_column = dbt_aql.customer_column(stream) %} @@ -62,7 +62,7 @@ -- Loop through each activity {% for activity in activities %} - {% set columns = generate_columns(activity) %} + {% set columns = get_column_descriptions(activity) %} {% do yaml_output.append(' - name: ' ~ activity) %} {% do yaml_output.append(' columns:') %}