Skip to content

Commit

Permalink
closes #24
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Jan 22, 2025
1 parent 622f6d7 commit d845920
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 19 deletions.
27 changes: 17 additions & 10 deletions R/geocode.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,21 @@ geocode <- function(enderecos,
# keep and rename colunms of input_padrao to use the
# same column names used in cnefe data set
data.table::setDT(input_padrao)
cols_to_keep <- names(input_padrao)[! names(input_padrao) %in% campos_endereco]
cols_to_keep <- names(input_padrao)[ names(input_padrao) %like% '_padr']
input_padrao <- input_padrao[, .SD, .SDcols = c(cols_to_keep)]
names(input_padrao) <- c(gsub("_padr", "", names(input_padrao)))

if ('logradouro' %in% names(input_padrao)) {
data.table::setnames(
x = input_padrao,
old = c('logradouro', 'bairro'),
new = c('logradouro_sem_numero', 'localidade'))
x = input_padrao, old = 'logradouro', new = 'logradouro_sem_numero'
)
}

if ('bairro' %in% names(input_padrao)) {
data.table::setnames(
x = input_padrao, old = 'bairro', new = 'localidade'
)
}

# create temp id
input_padrao[, tempidgeocodebr := 1:nrow(input_padrao) ]
Expand All @@ -123,17 +130,17 @@ geocode <- function(enderecos,

# Convert input data frame to DuckDB table
duckdb::dbWriteTable(con, "input_padrao_db", input_padrao,
overwrite = TRUE, temporary = TRUE)

overwrite = TRUE, temporary = TRUE
)

# create an empty output table that will be populated -----------------------------------------------

additional_cols <- ""
if (isTRUE(resultado_completo)) {
additional_cols <- glue::glue(
", endereco_encontrado VARCHAR, logradouro_encontrado VARCHAR, ",
"numero_encontrado VARCHAR, localidade_encontrada VARCHAR, ",
"cep_encontrado VARCHAR, municipio_encontrado VARCHAR, estado_encontrado VARCHAR"
", endereco_encontrado VARCHAR, logradouro_encontrado VARCHAR,
numero_encontrado VARCHAR, localidade_encontrada VARCHAR,
cep_encontrado VARCHAR, municipio_encontrado VARCHAR, estado_encontrado VARCHAR"
)
}

Expand All @@ -143,7 +150,7 @@ geocode <- function(enderecos,
lat NUMERIC,
lon NUMERIC,
tipo_resultado VARCHAR {additional_cols});"
)
)

DBI::dbExecute(con, query_create_empty_output_db)

Expand Down
4 changes: 3 additions & 1 deletion R/match_weighted_cases.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ match_weighted_cases <- function(con,
# read corresponding parquet file
table_name <- paste(key_cols, collapse = "_")
table_name <- gsub('estado_municipio', 'municipio', table_name)
table_name <- gsub('logradouro_sem_numero', 'logradouro_numero', table_name)
table_name <- gsub('logradouro_sem_numero', 'logradouro', table_name)

# build path to local file
path_to_parquet <- paste0(geocodebr::listar_pasta_cache(), "/", table_name, ".parquet")
Expand All @@ -29,6 +29,8 @@ match_weighted_cases <- function(con,
# register filtered_cnefe to db
duckdb::duckdb_register_arrow(con, "filtered_cnefe", filtered_cnefe)

# remove numero from key cols to allow for the matching
key_cols <- key_cols[key_cols != 'numero']

# Create the JOIN condition by concatenating the key columns
join_condition <- paste(
Expand Down
16 changes: 8 additions & 8 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -232,21 +232,21 @@ get_relevant_cols_rafa <- function(case) {


get_relevant_cols_arrow <- function(case) {
relevant_cols <- if (case %in% c('en01', 'pn01') ) {
relevant_cols <- if (case %in% c('en01', 'ei01', 'pn01') ) {
c("estado", "municipio", "logradouro_sem_numero", "numero", "cep", "localidade")
} else if (case %in% c('en02', 'pn02')) {
} else if (case %in% c('en02', 'ei02', 'pn02')) {
c("estado", "municipio", "logradouro_sem_numero", "numero", "cep")
} else if (case %in% c('en03', 'pn03')) {
} else if (case %in% c('en03', 'ei03', 'pn03')) {
c("estado", "municipio", "logradouro_sem_numero", "numero", "localidade")
} else if (case %in% c('en04', 'pn04')) {
} else if (case %in% c('en04', 'ei04', 'pn04')) {
c("estado", "municipio", "logradouro_sem_numero", "numero")
} else if (case %in% c('er01', 'pr01', 'ei01', 'pi01')) {
} else if (case %in% c('er01', 'pr01', 'pi01')) {
c("estado", "municipio", "logradouro_sem_numero", "cep", "localidade")
} else if (case %in% c('er02', 'pr02', 'ei02', 'pi02')) {
} else if (case %in% c('er02', 'pr02', 'pi02')) {
c("estado", "municipio", "logradouro_sem_numero", "cep")
} else if (case %in% c('er03', 'pr03', 'ei03', 'pi03')) {
} else if (case %in% c('er03', 'pr03', 'pi03')) {
c("estado", "municipio", "logradouro_sem_numero", "localidade")
} else if (case %in% c('er04', 'pr04', 'ei04', 'pi04')) {
} else if (case %in% c('er04', 'pr04', 'pi04')) {
c("estado", "municipio", "logradouro_sem_numero")
} else if (case == 'ec01') {
c("estado", "municipio", "cep", "localidade")
Expand Down

0 comments on commit d845920

Please sign in to comment.