From 61df6bf9a3419da340c38cf342c269a2295b1c8f Mon Sep 17 00:00:00 2001 From: rafapereirabr Date: Wed, 18 Dec 2024 08:58:07 -0300 Subject: [PATCH] rafa tests exploring weighted_avg --- tests/tests_rafa/tests_arrow_vs_duckdb.R | 3 ++- tests/tests_rafa/weighted_avg.R | 32 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 tests/tests_rafa/weighted_avg.R diff --git a/tests/tests_rafa/tests_arrow_vs_duckdb.R b/tests/tests_rafa/tests_arrow_vs_duckdb.R index bbea656..051ed5b 100644 --- a/tests/tests_rafa/tests_arrow_vs_duckdb.R +++ b/tests/tests_rafa/tests_arrow_vs_duckdb.R @@ -64,7 +64,8 @@ df_duck_dani <- geocodebr:::geocode2( addresses_table = input_df, address_fields = fields, n_cores = 7, - progress = T ) + progress = T + ) tictoc::toc() # 900K: 13 secs diff --git a/tests/tests_rafa/weighted_avg.R b/tests/tests_rafa/weighted_avg.R new file mode 100644 index 0000000..c611934 --- /dev/null +++ b/tests/tests_rafa/weighted_avg.R @@ -0,0 +1,32 @@ +I have two tables: + + tableA looks like this: + + id | number +1 | 100 +2 | 120 +3 | 300 + +table B looks like this: + number | longitude +100 | -41.1 +100 | -41.101 +150 | -41.3 +300 | -41.2 + +Write the code in SQL with duckdb to perform left join between tables A and B based on column 'id' so that the logitude of an 'id' will be the average of longitudes in table B weighted by the difference between the numbers in table A and table B + + + + +"SELECT + A.id, + SUM((1/ABS(A.number - B.number) * B.longitude)) / SUM(1/ABS(A.number - B.number)) AS weighted_avg_longitude +FROM + tableA AS A +LEFT JOIN + tableB AS B +ON + A.number = B.number +GROUP BY + A.id;"