forked from DataExpert-io/data-engineer-handbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request DataExpert-io#181 from isangwanrahul/main
Add Week2 Lab 3 sql files
- Loading branch information
Showing
2 changed files
with
65 additions
and
0 deletions.
There are no files selected for viewing
59 changes: 59 additions & 0 deletions
59
bootcamp/materials/2-fact-data-modeling/lecture-lab/array_metrics_analysis.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
-- Insert aggregated metrics into the array_metrics table | ||
INSERT INTO array_metrics | ||
WITH daily_aggregate AS ( | ||
-- Aggregate daily site hits per user | ||
SELECT | ||
user_id, | ||
DATE(event_time) AS date, | ||
COUNT(1) AS num_site_hits | ||
FROM events | ||
WHERE DATE(event_time) = DATE('2023-01-01') | ||
AND user_id IS NOT NULL | ||
GROUP BY user_id, DATE(event_time) | ||
), | ||
yesterday_array AS ( | ||
-- Retrieve existing metrics for the month starting from '2023-01-01' | ||
SELECT * | ||
FROM array_metrics | ||
WHERE month_start = DATE('2023-01-01') | ||
) | ||
SELECT | ||
-- Select user_id from either daily_aggregate or yesterday_array | ||
COALESCE( da.user_id, ya.user_id) AS user_id, | ||
-- Determine month_start date | ||
COALESCE(ya.month_start, DATE_TRUNC('month', da.date)) AS month_start, | ||
-- Set metric name to 'site_hits' | ||
'site_hits' AS metric_name, | ||
-- Update metric_array based on existing data and new daily aggregates | ||
CASE | ||
WHEN ya.metric_array IS NOT NULL THEN | ||
ya.metric_array || ARRAY[COALESCE(da.num_site_hits,0)] | ||
WHEN ya.metric_array IS NULL THEN | ||
ARRAY_FILL(0, ARRAY[COALESCE (date - DATE(DATE_TRUNC('month', date)), 0)]) | ||
|| ARRAY[COALESCE(da.num_site_hits,0)] | ||
END AS metric_array | ||
FROM daily_aggregate da | ||
FULL OUTER JOIN yesterday_array ya | ||
ON da.user_id = ya.user_id | ||
ON CONFLICT (user_id, month_start, metric_name) | ||
DO | ||
UPDATE SET metric_array = EXCLUDED.metric_array; | ||
|
||
-- Uncomment and run the following query to verify the cardinality of metric_array | ||
-- SELECT cardinality(metric_array), COUNT(1) | ||
-- FROM array_metrics | ||
-- GROUP BY 1; | ||
|
||
-- Aggregate metrics by summing specific elements in the metric_array | ||
WITH agg AS ( | ||
SELECT metric_name, month_start, ARRAY[SUM(metric_array[1]), SUM(metric_array[2]), SUM(metric_array[3])] AS summed_array | ||
FROM array_metrics | ||
GROUP BY metric_name, month_start | ||
) | ||
-- Select and display the metric_name, date (adjusted by index), and summed value | ||
SELECT | ||
metric_name, | ||
month_start + CAST(CAST(index - 1 AS TEXT) || ' day' AS INTERVAL) AS adjusted_date, | ||
elem AS value | ||
FROM agg | ||
CROSS JOIN UNNEST(agg.summed_array) WITH ORDINALITY AS a(elem, index); |
6 changes: 6 additions & 0 deletions
6
bootcamp/materials/2-fact-data-modeling/tables/array_metrics_ddl.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
CREATE TABLE array_metrics ( | ||
user_id NUMERIC, | ||
month_start DATE, | ||
metric_name TEXT, | ||
metric_array REAL[], | ||
PRIMARY KEY (user_id, month_start, metric_name)) |