From 4546f1d936d42133dee61878520a172f1dccc9c6 Mon Sep 17 00:00:00 2001 From: rafapereirabr Date: Mon, 16 Dec 2024 23:27:03 -0300 Subject: [PATCH] draft query_aggregate_and_match with precision --- R/geocode.R | 14 ++-------- tests/tests_rafa/tests_arrow_vs_duckdb.R | 35 +++++++++++++++--------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/R/geocode.R b/R/geocode.R index 824eb0b..5d21c29 100644 --- a/R/geocode.R +++ b/R/geocode.R @@ -58,8 +58,7 @@ geocode <- function(input_table, checkmate::assert_logical(cache) checkmate::assert_names( names(input_table), - must.include = "ID", - .var.name = "input_table" + must.include = "ID" ) @@ -572,16 +571,7 @@ geocode <- function(input_table, # DBI::dbReadTable(con, 'output_caso_01') # DBI::dbRemoveTable(con, 'output_caso_01') - # DBI::dbRemoveTable(con, 'output_caso_02') - # DBI::dbRemoveTable(con, 'output_caso_03') - # DBI::dbRemoveTable(con, 'output_caso_04') - # DBI::dbRemoveTable(con, 'output_caso_05') - # DBI::dbRemoveTable(con, 'output_caso_06') - # DBI::dbRemoveTable(con, 'output_caso_07') - # DBI::dbRemoveTable(con, 'output_caso_08') - # DBI::dbRemoveTable(con, 'output_caso_09') - # DBI::dbRemoveTable(con, 'output_caso_10') - # DBI::dbRemoveTable(con, 'output_caso_11') + diff --git a/tests/tests_rafa/tests_arrow_vs_duckdb.R b/tests/tests_rafa/tests_arrow_vs_duckdb.R index 2f223e6..db3f02c 100644 --- a/tests/tests_rafa/tests_arrow_vs_duckdb.R +++ b/tests/tests_rafa/tests_arrow_vs_duckdb.R @@ -113,19 +113,28 @@ range_lat <- sd(df$lat) *2 -query_aggregate_and_match <- glue::glue( - "CREATE TABLE {output_tb} AS - WITH pre_aggregated_cnefe AS ( - SELECT {cols_select}, AVG(lon) AS lon, AVG(lat) AS lat, - MAX(lon) - MIN(lon) as range_lon, MAX(lat) - MIN(lat) as range_lat, - FROM {y} - GROUP BY {cols_group} - ) - SELECT {x}.ID, pre_aggregated_cnefe.lon, pre_aggregated_cnefe.lat - FROM {x} AS {x} - LEFT JOIN pre_aggregated_cnefe - ON {join_condition} - WHERE pre_aggregated_cnefe.lon IS NOT NULL;" +query_aggregate_and_match <- sprintf( + "CREATE TABLE %s AS + WITH pre_aggregated_cnefe AS ( + SELECT %s AVG(lon) AS lon, AVG(lat) AS lat, + 2 * STDDEV_SAMP(lon) as range_lon, 2 * STDDEV_SAMP(lat) as range_lat + FROM %s + GROUP BY %s + ) + SELECT %s.ID, pre_aggregated_cnefe.lon, pre_aggregated_cnefe.lat, + pre_aggregated_cnefe.range_lon, pre_aggregated_cnefe.range_lat + FROM %s AS %s + LEFT JOIN pre_aggregated_cnefe + ON %s + WHERE pre_aggregated_cnefe.lon IS NOT NULL;", + output_tb, # new table + cols_select, # select + y, # from + cols_group, # group + x, # select + x, x, # from + join_condition # on ) +