From 832cd4e39e137638b79d99579a16993999cf26c3 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Tue, 6 Dec 2022 14:45:45 +0100 Subject: [PATCH 01/13] added historical loading documentation --- _data/destinations/snowflake/v1/replication.yml | 7 +++++-- _data/tooltips.yml | 2 ++ _destinations/choosing-a-stitch-destination.md | 3 ++- .../snowflake/guides/connecting-snowflake.md | 5 +++++ _includes/destinations/historical-mode.html | 15 +++++++++++++++ .../loading/understanding-loading-behavior.md | 14 ++++++++++++++ 6 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 _includes/destinations/historical-mode.html diff --git a/_data/destinations/snowflake/v1/replication.yml b/_data/destinations/snowflake/v1/replication.yml index 95f344dd6..ea7cc8137 100644 --- a/_data/destinations/snowflake/v1/replication.yml +++ b/_data/destinations/snowflake/v1/replication.yml @@ -23,17 +23,20 @@ rename-original-column-in-split: false # LOADING BEHAVIOR # # ------------------------------ # -configurable-loading-behavior: false +configurable-loading-behavior: true default-loading-behavior: "Upsert" loading-behavior-types: - "Upsert" + - "Historical" loading-behavior-description: | + Loading behavior can be configured for {{ destination.display_name }} destinations. + The following loading behavior types are supported for {{ destination.display_name }} destinations: - {% for loading-behavior-type in site.data.destinations.microsoft-azure.v1.replication.loading-behavior-types %} + {% for loading-behavior-type in site.data.destinations.snowflake.v1.replication.loading-behavior-types %} - {{ loading-behavior-type }} {% endfor %} diff --git a/_data/tooltips.yml b/_data/tooltips.yml index d66cfbd3b..61bc6dd8d 100755 --- a/_data/tooltips.yml +++ b/_data/tooltips.yml @@ -45,6 +45,8 @@ destination: "Destination is the Stitch word for 'data warehouse.' A data wareho extraction-logs: "Logs detailing the Extraction phase of the replication process for a given integration. Includes error info, should an error occur." +historical: "When data is loaded using the Historical behavior, records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time." + historical-replication-job: "A Stitch replication job that replicates historical data." historical-sync: "Historical syncs are how far back from the Stitch connection date, by default, Stitch will fetch historical data." diff --git a/_destinations/choosing-a-stitch-destination.md b/_destinations/choosing-a-stitch-destination.md index db3622bbd..858ca6613 100755 --- a/_destinations/choosing-a-stitch-destination.md +++ b/_destinations/choosing-a-stitch-destination.md @@ -76,10 +76,11 @@ sections: content: | Loading behavior determines how data is loaded into your destination. Specifically, how updates are made to existing rows in the destination. - Stitch supports two loading behavior types: + Stitch supports three loading behavior types: - **Upsert**: {{ site.data.tooltips.upsert }} - **Append-Only**: {{ site.data.tooltips.append-only }} + - **Historical**: {{ site.data.tooltips.historical }} The table below lists the default loading behavior for each destination and whether it can be configured. diff --git a/_destinations/snowflake/guides/connecting-snowflake.md b/_destinations/snowflake/guides/connecting-snowflake.md index d784b77e7..22d1348da 100755 --- a/_destinations/snowflake/guides/connecting-snowflake.md +++ b/_destinations/snowflake/guides/connecting-snowflake.md @@ -194,6 +194,11 @@ steps: content: | {% include shared/database-connection-settings.html type="general" %} + - title: "Define loading behavior" + anchor: "define-loading-behavior" + content: | + {% include destinations/historical-mode.html %} + - title: "Save the destination" anchor: "save-destination" content: | diff --git a/_includes/destinations/historical-mode.html b/_includes/destinations/historical-mode.html new file mode 100644 index 000000000..94a6cd6df --- /dev/null +++ b/_includes/destinations/historical-mode.html @@ -0,0 +1,15 @@ +{% capture loading-setting-note %} +**Note**: Loading behavior can't be changed after the destination is created. To change {{ destination.display_name }} loading behavior, you'll need to [delete and re-create the destination]({{ link.destinations.switch-destinations | prepend: site.baseurl }}). +{% endcapture %} + +{% include note.html type="single-line" content=loading-setting-note %} + +The last step is to define how Stitch will handle changes to existing records in your {{ destination.display_name }} destination: + +- **Upsert**: Existing rows will be updated with the most recent version of the record from the source. With this option, only the most recent version of a record will exist in {{ destination.display_name }}. + +- **Historical**: Newer versions of existing records are added as new rows to the end of tables. Each time a new version of a record is added, the `_sdc_end_date` column is updated in the previous version of the same record to indicate that it is no longer the most recent version. + +Refer to the [Understanding loading behavior guide]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl }}) for more info and examples. + +**Note**: This setting may impact your {{ destination.display_name }} costs. [Learn more]({{ link.destinations.overviews.bigquery-pricing | prepend: site.baseurl }}). diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 958525f72..388f57ce3 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -58,6 +58,15 @@ sections: content: | {{ site.data.tooltips.append-only }} + - title: "Historical" + anchor: "loading-behavior-types--historical" + content: | + {{ site.data.tooltips.historical }} + + When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31`. + When a new verson of the same record is added, the `_sdc_end_date` value of the previous version is updated to the loading date of the new version. + This allows you to create a query that returns the version of the record for a specific date or date range. + - title: "Determining loading behavior" anchor: "loading-behavior-determined" summary: "How loading behavior is determined" @@ -89,6 +98,11 @@ sections: - The data doesn't have defined Primary Keys in the source **or** destination, **or** - The integration or table is pre-configured to use Append-Only loading + - title: "Historical loading" + anchor: "historical-conditions" + content: | + Historical loading is used when the destination only supports or is configured to use Historical loading. + - title: "Examples" anchor: "examples" summary: "Examples of each loading behavior type" From 69d1f835200b37e68aeefedd1337eab44bfcd944 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Fri, 9 Dec 2022 09:19:56 +0100 Subject: [PATCH 02/13] updated docs after review --- .../loading/understanding-loading-behavior.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 388f57ce3..95b10e62b 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -61,11 +61,12 @@ sections: - title: "Historical" anchor: "loading-behavior-types--historical" content: | - {{ site.data.tooltips.historical }} - - When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31`. + When data is loaded using the Historical behavior, records are appended to the end of the table as new rows. + + When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31 0:00 +00:00`. When a new verson of the same record is added, the `_sdc_end_date` value of the previous version is updated to the loading date of the new version. - This allows you to create a query that returns the version of the record for a specific date or date range. + + Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. This means you can create a query that returns the version of the record for a specific date or date range. - title: "Determining loading behavior" anchor: "loading-behavior-determined" @@ -101,7 +102,7 @@ sections: - title: "Historical loading" anchor: "historical-conditions" content: | - Historical loading is used when the destination only supports or is configured to use Historical loading. + Historical loading is only used when the destination is configured to use Historical loading. - title: "Examples" anchor: "examples" From d602653244eae6deaf0ebcf28809915f660caf0f Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Fri, 9 Dec 2022 09:49:53 +0100 Subject: [PATCH 03/13] TDL-20828 added note about column limits --- _replication/loading/understanding-loading-behavior.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 95b10e62b..5db9cfba1 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -63,11 +63,13 @@ sections: content: | When data is loaded using the Historical behavior, records are appended to the end of the table as new rows. - When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31 0:00 +00:00`. + When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31` (UTC time). When a new verson of the same record is added, the `_sdc_end_date` value of the previous version is updated to the loading date of the new version. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. This means you can create a query that returns the version of the record for a specific date or date range. + **Note**: Since this loading type adds two system columns in the destination table, it will decrease the maximum number of columns available for your data if the destination has a limited number of columns per table. + - title: "Determining loading behavior" anchor: "loading-behavior-determined" summary: "How loading behavior is determined" From c15b851170227b0c258bc3facdd8c5562ce85d46 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Mon, 12 Dec 2022 09:03:21 +0100 Subject: [PATCH 04/13] TDL-20829 created page for query examples --- .../loading/querying-historical-tables.md | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 _replication/loading/querying-historical-tables.md diff --git a/_replication/loading/querying-historical-tables.md b/_replication/loading/querying-historical-tables.md new file mode 100644 index 000000000..475b0b7be --- /dev/null +++ b/_replication/loading/querying-historical-tables.md @@ -0,0 +1,100 @@ +--- +# -------------------------- # +# PAGE INFO # +# -------------------------- # + +title: Querying Historical Tables +permalink: /replication/loading/querying-historical-tables +keywords: bigquery, google bigquery data warehouse, bigquery data warehouse, bigquery etl, etl to bigquery, historical +summary: "Learn how Historical Loading works and how to account for it in your queries." + +key: "historical-querying" +type: "" + +layout: general +toc: true +order: 1 +content-type: "guide" + + +# -------------------------- # +# INTRO # +# -------------------------- # + +intro: | + {% capture note %} + - [Destinations configured to use Historical Loading]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}) + + {% include note.html first-line="**This guide is applicable to:**" content=note %} + + When data is loaded using [Historical Loading]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}), records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. + + In this guide, we'll cover: + + {% for section in page.sections %} + - [{{ section.summary }}](#{{ section.anchor }}) + {% endfor %} + + +# -------------------------- # +# CONTENT # +# -------------------------- # + +sections: + - title: "Before using this guide" + anchor: "before-using-guide" + summary: "Things to know before using this guide" + content: | + Before using this guide, note that: + + - You may need to modify the queries in this guide to use them yourself + - Stitch Support's expertise lies in replicating data, and as such does not provide data analysis or querying assistance. We can, however, help with data discrepancies. + + If you'd like assistance with analysis or business intelligence solutions, we recommend reaching out to one of our [analytics partners]({{ site.partners }}){:target="new"}. + + - title: "Retrieving the latest version of every record" + anchor: "latest-version" + summary: "A querying strategy that retrieves the latest version of every record" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + - title: "Retrieving the version of every record for a specific date" + anchor: "specific-date" + summary: "A querying strategy that retrieves the version of every record for a specific date" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + - title: "Retrieving the version of a specific record for a date range" + anchor: "date-range" + summary: "A querying strategy that retrieves the version of a specific record for a date range" + content: | + {% include note.html type="single-line" content="**Note**: The queries in this section are only intended to demonstrate one approach to querying. You may need to modify the queries to use them yourself." %} + + Let's take a look at an example. Assume we have an `orders` table that contains: + + - A Primary Key of `id`, + - The system `{{ system-column.prefix }}` columns added by Stitch, and + - Other order attribute columns + + - title: "Create views in your destination" + anchor: "create-destination-views" + summary: "How to simplify querying by creating a view in your destination" + content: | + To make this easier, you can turn queries like the one above into a view. We recommend this approach because a view will encapsulate all the logic and simplify the process of querying against the latest version of your data. + + Refer to the documentation for your destination for more info on creating views: + + - [Snowflake]({{ site.data.destinations.snowflake.resource-links.create-views }}){:target="new"} +--- \ No newline at end of file From 45be3153dcc87beff9a89a9563466f9ec58759f0 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Mon, 12 Dec 2022 10:16:05 +0100 Subject: [PATCH 05/13] Update querying-historical-tables.md --- .../loading/querying-historical-tables.md | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/_replication/loading/querying-historical-tables.md b/_replication/loading/querying-historical-tables.md index 475b0b7be..82910201b 100644 --- a/_replication/loading/querying-historical-tables.md +++ b/_replication/loading/querying-historical-tables.md @@ -24,6 +24,7 @@ content-type: "guide" intro: | {% capture note %} - [Destinations configured to use Historical Loading]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}) + {% endcapture %} {% include note.html first-line="**This guide is applicable to:**" content=note %} @@ -64,6 +65,21 @@ sections: - The system `{{ system-column.prefix }}` columns added by Stitch, and - Other order attribute columns + If you wanted to get all current records, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_end_date = "9999-12-31 0:00 +00:00" + {% endcapture %} + + {% assign description = "Querying all current records" %} + + {% include layout/code-snippet.html code=code code-description=description %} + + {% include note.html type="single-line" content="**Note**: Since the `_sdc_end_date` value for current records is set to `9999-12-31` UTC, it is recommended to use `9999-12-31 0:00 +00:00` in your queries to make sure you get the correct result regardless of your local time." %} + + - title: "Retrieving the version of every record for a specific date" anchor: "specific-date" summary: "A querying strategy that retrieves the version of every record for a specific date" @@ -76,6 +92,19 @@ sections: - The system `{{ system-column.prefix }}` columns added by Stitch, and - Other order attribute columns + If you wanted to get all records valid on December 1st 2022, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_start_date <= "2022-12-01" + AND _sdc_end_date > "2022-12-01" + {% endcapture %} + + {% assign description = "Querying all records for a specific date" %} + + {% include layout/code-snippet.html code=code code-description=description %} + - title: "Retrieving the version of a specific record for a date range" anchor: "date-range" summary: "A querying strategy that retrieves the version of a specific record for a date range" @@ -88,6 +117,20 @@ sections: - The system `{{ system-column.prefix }}` columns added by Stitch, and - Other order attribute columns + If you wanted to get versions of a record with the id `694` valid in all of December 2022, you could use the following query: + + {% capture code %} + SELECT * FROM orders + WHERE + id = 694 + AND _sdc_start_date <= "2022-12-01" + AND _sdc_end_date >= "2022-12-31" + {% endcapture %} + + {% assign description = "Querying the version of a specific record valid for a date range" %} + + {% include layout/code-snippet.html code=code code-description=description %} + - title: "Create views in your destination" anchor: "create-destination-views" summary: "How to simplify querying by creating a view in your destination" From 7f5ab15acfa7e149a51bef75c10d9b1fec307466 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Mon, 12 Dec 2022 10:23:11 +0100 Subject: [PATCH 06/13] added links to new guide --- _data/sidebars/stitchnav.yml | 3 +++ _data/urls.yaml | 1 + _replication/loading/loading-category.md | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/_data/sidebars/stitchnav.yml b/_data/sidebars/stitchnav.yml index d2581592e..3d2b11c2c 100755 --- a/_data/sidebars/stitchnav.yml +++ b/_data/sidebars/stitchnav.yml @@ -427,6 +427,9 @@ all-docs: - title: Querying append-only tables url: "{{ link.replication.append-only-querying }}" + + - title: Querying historical tables + url: "{{ link.replication.historical-querying }}" - title: Resolving record rejections url: "{{ link.destinations.storage.rejected-records }}" diff --git a/_data/urls.yaml b/_data/urls.yaml index 62794e366..251915046 100755 --- a/_data/urls.yaml +++ b/_data/urls.yaml @@ -277,6 +277,7 @@ replication: full-table: /replication/replication-methods/full-table append-only-querying: /replication/loading/querying-append-only-tables + historical-querying: /replication/loading/querying-historical-tables deleted-records: "/replication/deleted-record-handling" ## Replication Keys diff --git a/_replication/loading/loading-category.md b/_replication/loading/loading-category.md index f092c4da5..c6a4a6ee8 100755 --- a/_replication/loading/loading-category.md +++ b/_replication/loading/loading-category.md @@ -78,6 +78,10 @@ sections: - title: "Querying Append-Only Tables" url: "{{ link.replication.append-only-querying }}" weight: 4 + + - title: "Querying Historical Tables" + url: "{{ link.replication.historical-querying }}" + weight: 5 content: | Resources and tutorials for interacting with data loaded by Stitch into your destination. From d007d934bc1ef06c47bc2aac5ef6e150e6ef9d4a Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Mon, 12 Dec 2022 10:35:58 +0100 Subject: [PATCH 07/13] fixed indent issue --- .../loading/querying-historical-tables.md | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/_replication/loading/querying-historical-tables.md b/_replication/loading/querying-historical-tables.md index 82910201b..a40ba838f 100644 --- a/_replication/loading/querying-historical-tables.md +++ b/_replication/loading/querying-historical-tables.md @@ -67,15 +67,15 @@ sections: If you wanted to get all current records, you could use the following query: - {% capture code %} - SELECT * FROM orders - WHERE - _sdc_end_date = "9999-12-31 0:00 +00:00" - {% endcapture %} + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_end_date = "9999-12-31 0:00 +00:00" + {% endcapture %} - {% assign description = "Querying all current records" %} + {% assign description = "Querying all current records" %} - {% include layout/code-snippet.html code=code code-description=description %} + {% include layout/code-snippet.html code=code code-description=description %} {% include note.html type="single-line" content="**Note**: Since the `_sdc_end_date` value for current records is set to `9999-12-31` UTC, it is recommended to use `9999-12-31 0:00 +00:00` in your queries to make sure you get the correct result regardless of your local time." %} @@ -94,16 +94,16 @@ sections: If you wanted to get all records valid on December 1st 2022, you could use the following query: - {% capture code %} - SELECT * FROM orders - WHERE - _sdc_start_date <= "2022-12-01" - AND _sdc_end_date > "2022-12-01" - {% endcapture %} + {% capture code %} + SELECT * FROM orders + WHERE + _sdc_start_date <= "2022-12-01" + AND _sdc_end_date > "2022-12-01" + {% endcapture %} - {% assign description = "Querying all records for a specific date" %} + {% assign description = "Querying all records for a specific date" %} - {% include layout/code-snippet.html code=code code-description=description %} + {% include layout/code-snippet.html code=code code-description=description %} - title: "Retrieving the version of a specific record for a date range" anchor: "date-range" @@ -119,17 +119,17 @@ sections: If you wanted to get versions of a record with the id `694` valid in all of December 2022, you could use the following query: - {% capture code %} - SELECT * FROM orders - WHERE - id = 694 - AND _sdc_start_date <= "2022-12-01" - AND _sdc_end_date >= "2022-12-31" - {% endcapture %} + {% capture code %} + SELECT * FROM orders + WHERE + id = 694 + AND _sdc_start_date <= "2022-12-01" + AND _sdc_end_date >= "2022-12-31" + {% endcapture %} - {% assign description = "Querying the version of a specific record valid for a date range" %} + {% assign description = "Querying the version of a specific record valid for a date range" %} - {% include layout/code-snippet.html code=code code-description=description %} + {% include layout/code-snippet.html code=code code-description=description %} - title: "Create views in your destination" anchor: "create-destination-views" From 32798bc751a05863323edad3226fe9e4cdc842db Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Mon, 12 Dec 2022 10:43:37 +0100 Subject: [PATCH 08/13] Update querying-historical-tables.md --- _replication/loading/querying-historical-tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_replication/loading/querying-historical-tables.md b/_replication/loading/querying-historical-tables.md index a40ba838f..00d0fdc70 100644 --- a/_replication/loading/querying-historical-tables.md +++ b/_replication/loading/querying-historical-tables.md @@ -117,7 +117,7 @@ sections: - The system `{{ system-column.prefix }}` columns added by Stitch, and - Other order attribute columns - If you wanted to get versions of a record with the id `694` valid in all of December 2022, you could use the following query: + If you wanted to get the version of a record with the ID `694` valid in all of December 2022, you could use the following query: {% capture code %} SELECT * FROM orders From be9e1068e3aa0569819ce41ba85f9d2d91b57154 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Thu, 15 Dec 2022 13:37:24 +0100 Subject: [PATCH 09/13] added historical loading example --- .../loading/understanding-loading-behavior.md | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 5db9cfba1..4f1306273 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -148,6 +148,28 @@ sections: {% include layout/image.html enlarge=true file="/replication/append-only-no-primary-key.png" alt="Click to enlarge: Append-Only loading as a result of no defined Primary Keys" %} + - title: "Historical loading example" + anchor: "example--historical-loading" + summary: "Historical loading" + content: | + In this example, the destination is configured to use Historical loading. The `id` column is the table's Primary Key. + + The following records are added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that these are the latest versions of these records: + + |id|status|_sdc_start_date|_sdc_end_date| + |---|---|---|---| + |abc-123|Pending|2022-10-21|**9999-12-31**| + |def-456|Pending|2022-06-25|**9999-12-31**| + + + One of the records is then updated in the source. A second replication job creates a new version of the existing record on December 14, 2022. The previous version's `_sdc_end_date` value is updated and the new version is added to the table. The destination table now looks like this: + + |id|status|_sdc_start_date|_sdc_end_date| + |---|---|---|---| + |abc-123|Pending|2022-10-21|**2022-12-14**| + |def-456|Pending|2022-06-25|9999-12-31| + |abc-123|In progress|**2022-12-14**|9999-12-31| + - title: "Reference" anchor: "reference" summary: "References lists for destinations, integrations, and loading behavior" From 34ae7c35e4ef6fe2c2c5531f60af79bb9791c168 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Thu, 15 Dec 2022 13:51:12 +0100 Subject: [PATCH 10/13] updated example --- _replication/loading/understanding-loading-behavior.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 4f1306273..2b8b1cc31 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -154,20 +154,16 @@ sections: content: | In this example, the destination is configured to use Historical loading. The `id` column is the table's Primary Key. - The following records are added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that these are the latest versions of these records: - + The following record is added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that this is the latest version of this record: |id|status|_sdc_start_date|_sdc_end_date| |---|---|---|---| |abc-123|Pending|2022-10-21|**9999-12-31**| - |def-456|Pending|2022-06-25|**9999-12-31**| - One of the records is then updated in the source. A second replication job creates a new version of the existing record on December 14, 2022. The previous version's `_sdc_end_date` value is updated and the new version is added to the table. The destination table now looks like this: - + The record is then updated in the source. A second replication job creates a new version of the existing record on December 14, 2022. The previous version's `_sdc_end_date` value is updated and the new version is added to the table. The destination table now looks like this: |id|status|_sdc_start_date|_sdc_end_date| |---|---|---|---| |abc-123|Pending|2022-10-21|**2022-12-14**| - |def-456|Pending|2022-06-25|9999-12-31| |abc-123|In progress|**2022-12-14**|9999-12-31| - title: "Reference" From 9d01d401d15672d07e765ad65e6d5f4c5943aa36 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Thu, 15 Dec 2022 15:50:58 +0100 Subject: [PATCH 11/13] Update understanding-loading-behavior.md --- _replication/loading/understanding-loading-behavior.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index 2b8b1cc31..bbad5adf5 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -155,12 +155,14 @@ sections: In this example, the destination is configured to use Historical loading. The `id` column is the table's Primary Key. The following record is added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that this is the latest version of this record: + |id|status|_sdc_start_date|_sdc_end_date| |---|---|---|---| |abc-123|Pending|2022-10-21|**9999-12-31**| The record is then updated in the source. A second replication job creates a new version of the existing record on December 14, 2022. The previous version's `_sdc_end_date` value is updated and the new version is added to the table. The destination table now looks like this: + |id|status|_sdc_start_date|_sdc_end_date| |---|---|---|---| |abc-123|Pending|2022-10-21|**2022-12-14**| From de91001c18a7199badd027a187f6a04ab9ca1734 Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Fri, 24 Feb 2023 09:48:24 +0100 Subject: [PATCH 12/13] updated wording to History mode --- .../destinations/snowflake/v1/replication.yml | 2 +- _data/sidebars/stitchnav.yml | 2 +- _data/tooltips.yml | 2 +- _data/urls.yaml | 2 +- .../choosing-a-stitch-destination.md | 2 +- .../snowflake/guides/connecting-snowflake.md | 2 +- ...historical-mode.html => history-mode.html} | 2 +- _replication/loading/loading-category.md | 4 ++-- ...les.md => querying-history-mode-tables.md} | 12 +++++------ .../loading/understanding-loading-behavior.md | 20 +++++++++---------- 10 files changed, 25 insertions(+), 25 deletions(-) rename _includes/destinations/{historical-mode.html => history-mode.html} (78%) rename _replication/loading/{querying-historical-tables.md => querying-history-mode-tables.md} (87%) diff --git a/_data/destinations/snowflake/v1/replication.yml b/_data/destinations/snowflake/v1/replication.yml index ea7cc8137..7fe9d4a74 100644 --- a/_data/destinations/snowflake/v1/replication.yml +++ b/_data/destinations/snowflake/v1/replication.yml @@ -29,7 +29,7 @@ default-loading-behavior: "Upsert" loading-behavior-types: - "Upsert" - - "Historical" + - "History Mode" loading-behavior-description: | Loading behavior can be configured for {{ destination.display_name }} destinations. diff --git a/_data/sidebars/stitchnav.yml b/_data/sidebars/stitchnav.yml index 3d2b11c2c..b52ffc36d 100755 --- a/_data/sidebars/stitchnav.yml +++ b/_data/sidebars/stitchnav.yml @@ -429,7 +429,7 @@ all-docs: url: "{{ link.replication.append-only-querying }}" - title: Querying historical tables - url: "{{ link.replication.historical-querying }}" + url: "{{ link.replication.history-mode-querying }}" - title: Resolving record rejections url: "{{ link.destinations.storage.rejected-records }}" diff --git a/_data/tooltips.yml b/_data/tooltips.yml index 61bc6dd8d..604f7ca6c 100755 --- a/_data/tooltips.yml +++ b/_data/tooltips.yml @@ -45,7 +45,7 @@ destination: "Destination is the Stitch word for 'data warehouse.' A data wareho extraction-logs: "Logs detailing the Extraction phase of the replication process for a given integration. Includes error info, should an error occur." -historical: "When data is loaded using the Historical behavior, records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time." +history-mode: "When data is loaded using the History mode, records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time." historical-replication-job: "A Stitch replication job that replicates historical data." diff --git a/_data/urls.yaml b/_data/urls.yaml index 251915046..4682d4a6d 100755 --- a/_data/urls.yaml +++ b/_data/urls.yaml @@ -277,7 +277,7 @@ replication: full-table: /replication/replication-methods/full-table append-only-querying: /replication/loading/querying-append-only-tables - historical-querying: /replication/loading/querying-historical-tables + history-mode-querying: /replication/loading/querying-history-mode-tables deleted-records: "/replication/deleted-record-handling" ## Replication Keys diff --git a/_destinations/choosing-a-stitch-destination.md b/_destinations/choosing-a-stitch-destination.md index 858ca6613..bf180b53b 100755 --- a/_destinations/choosing-a-stitch-destination.md +++ b/_destinations/choosing-a-stitch-destination.md @@ -80,7 +80,7 @@ sections: - **Upsert**: {{ site.data.tooltips.upsert }} - **Append-Only**: {{ site.data.tooltips.append-only }} - - **Historical**: {{ site.data.tooltips.historical }} + - **History Mode**: {{ site.data.tooltips.history-mode }} The table below lists the default loading behavior for each destination and whether it can be configured. diff --git a/_destinations/snowflake/guides/connecting-snowflake.md b/_destinations/snowflake/guides/connecting-snowflake.md index 22d1348da..be137a3ad 100755 --- a/_destinations/snowflake/guides/connecting-snowflake.md +++ b/_destinations/snowflake/guides/connecting-snowflake.md @@ -197,7 +197,7 @@ steps: - title: "Define loading behavior" anchor: "define-loading-behavior" content: | - {% include destinations/historical-mode.html %} + {% include destinations/history-mode.html %} - title: "Save the destination" anchor: "save-destination" diff --git a/_includes/destinations/historical-mode.html b/_includes/destinations/history-mode.html similarity index 78% rename from _includes/destinations/historical-mode.html rename to _includes/destinations/history-mode.html index 94a6cd6df..6e33d659b 100644 --- a/_includes/destinations/historical-mode.html +++ b/_includes/destinations/history-mode.html @@ -8,7 +8,7 @@ - **Upsert**: Existing rows will be updated with the most recent version of the record from the source. With this option, only the most recent version of a record will exist in {{ destination.display_name }}. -- **Historical**: Newer versions of existing records are added as new rows to the end of tables. Each time a new version of a record is added, the `_sdc_end_date` column is updated in the previous version of the same record to indicate that it is no longer the most recent version. +- **History Mode**: Newer versions of existing records are added as new rows to the end of tables. Each time a new version of a record is added, the `_sdc_end_date` column is updated in the previous version of the same record to indicate that it is no longer the most recent version. Refer to the [Understanding loading behavior guide]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl }}) for more info and examples. diff --git a/_replication/loading/loading-category.md b/_replication/loading/loading-category.md index c6a4a6ee8..69092848b 100755 --- a/_replication/loading/loading-category.md +++ b/_replication/loading/loading-category.md @@ -79,8 +79,8 @@ sections: url: "{{ link.replication.append-only-querying }}" weight: 4 - - title: "Querying Historical Tables" - url: "{{ link.replication.historical-querying }}" + - title: "Querying History Mode Tables" + url: "{{ link.replication.history-mode-querying }}" weight: 5 content: | Resources and tutorials for interacting with data loaded by Stitch into your destination. diff --git a/_replication/loading/querying-historical-tables.md b/_replication/loading/querying-history-mode-tables.md similarity index 87% rename from _replication/loading/querying-historical-tables.md rename to _replication/loading/querying-history-mode-tables.md index 00d0fdc70..3e45b20ac 100644 --- a/_replication/loading/querying-historical-tables.md +++ b/_replication/loading/querying-history-mode-tables.md @@ -3,12 +3,12 @@ # PAGE INFO # # -------------------------- # -title: Querying Historical Tables -permalink: /replication/loading/querying-historical-tables +title: Querying History Mode Tables +permalink: /replication/loading/querying-history-mode-tables keywords: bigquery, google bigquery data warehouse, bigquery data warehouse, bigquery etl, etl to bigquery, historical -summary: "Learn how Historical Loading works and how to account for it in your queries." +summary: "Learn how History mode works and how to account for it in your queries." -key: "historical-querying" +key: "history-mode-querying" type: "" layout: general @@ -23,12 +23,12 @@ content-type: "guide" intro: | {% capture note %} - - [Destinations configured to use Historical Loading]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}) + - [Destinations configured to use History mode]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}) {% endcapture %} {% include note.html first-line="**This guide is applicable to:**" content=note %} - When data is loaded using [Historical Loading]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}), records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. + When data is loaded using [History mode]({{ link.destinations.storage.loading-behavior | prepend: site.baseurl | append:"#reference--destinations-loading-behavior" }}), records are appended to the end of the table as new rows. Only the `_sdc_end_date` column is updated in existing rows, to indicate when a new version was added. Multiple versions of a row can exist in a table, creating a log of how a record has changed over time. In this guide, we'll cover: diff --git a/_replication/loading/understanding-loading-behavior.md b/_replication/loading/understanding-loading-behavior.md index bbad5adf5..5d09c2db3 100644 --- a/_replication/loading/understanding-loading-behavior.md +++ b/_replication/loading/understanding-loading-behavior.md @@ -58,10 +58,10 @@ sections: content: | {{ site.data.tooltips.append-only }} - - title: "Historical" - anchor: "loading-behavior-types--historical" + - title: "History Mode" + anchor: "loading-behavior-types--history-mode" content: | - When data is loaded using the Historical behavior, records are appended to the end of the table as new rows. + When data is loaded using the History mode, records are appended to the end of the table as new rows. When a record is added, the `_sdc_start_date` column is set to the loading date, and the `sdc_end_date` column is set to `9999-12-31` (UTC time). When a new verson of the same record is added, the `_sdc_end_date` value of the previous version is updated to the loading date of the new version. @@ -101,10 +101,10 @@ sections: - The data doesn't have defined Primary Keys in the source **or** destination, **or** - The integration or table is pre-configured to use Append-Only loading - - title: "Historical loading" - anchor: "historical-conditions" + - title: "History mode" + anchor: "history-mode-conditions" content: | - Historical loading is only used when the destination is configured to use Historical loading. + History mode is only used when the destination is configured to use History mode. - title: "Examples" anchor: "examples" @@ -148,11 +148,11 @@ sections: {% include layout/image.html enlarge=true file="/replication/append-only-no-primary-key.png" alt="Click to enlarge: Append-Only loading as a result of no defined Primary Keys" %} - - title: "Historical loading example" - anchor: "example--historical-loading" - summary: "Historical loading" + - title: "History mode example" + anchor: "example--history-mode-loading" + summary: "History mode" content: | - In this example, the destination is configured to use Historical loading. The `id` column is the table's Primary Key. + In this example, the destination is configured to use History mode. The `id` column is the table's Primary Key. The following record is added to the destination table in a first replication job. The `_sdc_end_date` column is set to `9999-12-31` to indicate that this is the latest version of this record: From 65664d62b09f29a226ac1d8f8bf30252b1388cdf Mon Sep 17 00:00:00 2001 From: Lucie Milan Date: Fri, 24 Feb 2023 09:50:02 +0100 Subject: [PATCH 13/13] Update stitchnav.yml --- _data/sidebars/stitchnav.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_data/sidebars/stitchnav.yml b/_data/sidebars/stitchnav.yml index b52ffc36d..27fdc0092 100755 --- a/_data/sidebars/stitchnav.yml +++ b/_data/sidebars/stitchnav.yml @@ -428,7 +428,7 @@ all-docs: - title: Querying append-only tables url: "{{ link.replication.append-only-querying }}" - - title: Querying historical tables + - title: Querying History mode tables url: "{{ link.replication.history-mode-querying }}" - title: Resolving record rejections