Skip to content

Commit

Permalink
factor analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandrohagan committed Aug 31, 2024
1 parent 14823ad commit e8cec07
Show file tree
Hide file tree
Showing 15 changed files with 681 additions and 205 deletions.
6 changes: 5 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ export("%>%")
export(abc)
export(abc_graph)
export(clean_file_names)
export(contonso_duckdb)
export(convert_dots_to_string)
export(convert_input_to_string)
export(count_plus)
export(divide)
export(make_aggregation_sql)
export(is_yyyy_mm_dd)
export(make_aggregation_tbl)
export(make_cohort_tbl)
export(make_segmentation)
Expand All @@ -17,6 +20,7 @@ export(totalmtd)
export(totalqtd)
export(totalwtd)
export(totalytd)
export(with)
export(wow)
export(yoy)
importFrom(magrittr,"%>%")
120 changes: 120 additions & 0 deletions R/factor.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@

#
# sales_tbl <- fpaR::sales |>
# mutate(
# month=month(order_date)
# ) |>
# group_by(
# month
# ,product_key
# ) |>
# summarize(
# quantity=sum(quantity)
# ,net_price=sum(net_price)
# ,mean=mean(unit_price)
# ,.groups="drop"
# ) |>
# group_by(month) |>
# arrange(product_key,.by_group = TRUE) |>
# ungroup() |>
# mutate(
# price_realization=net_price/quantity
# # ,quantity_lead=lead(quantity,2517)
# # ,net_price_lead=lead(quantity,2517)
# # ,mean_price_lead=lead(mean,2517)
# # ,price_realization_lead=lead(price_realization,2517)
# # ,quantity_delta=lead(quantity,2517)-quantity
# # ,net_price_delta=lead(quantity,2517)-net_price
# # ,mean_delta=lead(mean,2517)-mean
# # ,price_realization_delta=lead(price_realization,2517)-price_realization
# ) |>
# filter(
# month %in% c(1,2)
# )
#
# mod <- lm(net_price_delta~quantity_delta+price_realization_delta-1,sales_tbl |> filter(month==1))
#
#
# sales_wide_tbl <- sales_tbl |>
# group_split(month) |>
# map2(
# .y = c("jan", "feb"),
# .f = \(.x,.y) {
# x <- .x # Assign .x to x for clarity
# y <- .y # Assign .y to y for clarity
#
# x |>
# rename_with(
# .fn = ~ glue::glue("{y}_{.x}"), # Use y and .col explicitly
# .cols = c(quantity, net_price, mean, price_realization)
# ) |>
# select(-month)
# }
# ) |>
# reduce(
# \(.x,prev){
# .x |>
# left_join(
# prev
# ,by=join_by(product_key)
# )
# }
# ) |>
# mutate(
# quantity_delta=jan_quantity-feb_quantity
# ,net_price_delta=jan_net_price-feb_net_price
# ,mean_delta=jan_mean-feb_mean
# ,pr_delta=jan_price_realization-feb_price_realization
# )
#
#
# factor_tbl <- sales_wide_tbl |>
# mutate(
# vol=quantity_delta*jan_price_realization
# ,price=pr_delta*feb_quantity
# ,total_var=vol+price
# ) |>
# relocate(
# vol
# ,price
# ,net_price_delta
# ,total_var
# ) |>
# summarise(
# vol=sum(vol,na.rm=TRUE)
# ,price=sum(price,na.rm = TRUE)
# ,net_price=sum(net_price_delta,na.rm = TRUE)
# ,total_var=sum(total_var,na.rm=TRUE)
# ,quantity=sum(quantity_delta,na.rm=TRUE)
# )
#
#
# stats::model.frame()
#
# stats::terms()
#
# test <- stats::terms(out~inside+inner+outter)
#
#
#
# str(test)
#
# ## pull in columns
# variables_vec <- attr(test,"variables")
# target_vec <- as.character(variables_vec[[2]])
#
# input_vec <- attr(test,"term.labels")
#
# # extract the data base don columns
# model.frame(formula(mpg~cyl+am+vs),data=mtcars)
#
#
# factor_tbl
# sales_tbl |>
# lm(net_price~0+quantity+mean,data=_)
#
#
# sales_wide_tbl |>
# lm(net_price_delta~quantity_delta+jan_price_realization+pr_delta+feb_quantity+product_key-1,data=_) |>
# # broom::tidy()
# broom::augment()
Empty file added R/rlang.R
Empty file.
63 changes: 63 additions & 0 deletions R/utils-misc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' Validate an input is YYYY-MM-DD format
#'
#' @param x date column
#'
#' @return logical
#' @export
#'
#' @examples
#' is_yyyy_mm_dd("2024-01-01")
is_yyyy_mm_dd <- function(x) {
grepl("^\\d{4}-\\d{2}-\\d{2}$", x)
}



#' Convert quoted or unquoted input to string
#'
#' @param x quoted or unquoted input
#'
#' @return string
#' @export
#'
#' @examples
#' convert_input_to_string(hello)
convert_input_to_string <- function(x) {


var_quo <- rlang::enquo(x)
var_expr <- rlang::quo_get_expr(var_quo)


if (rlang::is_symbolic(var_expr) || rlang::is_call(var_expr)) {
out <- as_label(var_expr)

return(out)
} else {
out <- x

return(out)
}

}



#' Capture dot arguments and turn into strings
#'
#' @param ... dot args
#'
#' @return strings
#' @export
#'
#' @examples
#' convert_dots_to_string(hello,how,are,you)
convert_dots_to_string <- function(...){

args <- rlang::enquos(...)

group_var <- map_chr(args,\(x) convert_input_to_string({{x}}))

return(group_var)

}
105 changes: 105 additions & 0 deletions R/utils-sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#' Helps creates CTE queries
#'
#' @param query sql query
#' @param query_name CTE name
#' @param order CTE order either first, middle, last or single
#'
#' @return sql text object
#' @export
#'
#' @examples
#' query <- dplyr::sql("select * from sales")
#' with(query=query,query_name=sales_cte,order='first')
with <- function(query,query_name,order=c("middle")){


query_name <- rlang::as_label(rlang::enquo(query_name))

initial <- dplyr::sql(
paste0("WITH ",query_name," AS ","(",query,"),")
)

middle <- dplyr::sql(
paste0(query_name," AS ","(",query,"),")
)


last <- dplyr::sql(
paste0(query_name," AS ","(",query,")")
)

single <- dplyr::sql(
paste0("WITH ",query_name," AS ","(",query,")")
)



ordec_vec <- match.arg(
stringr::str_to_lower(order)
,choices=c("first","last","middle","single")
)

if(ordec_vec=="first"){

return(initial)

}else if(ordec_vec=="middle"){

return(middle)

}else if(ordec_vec=="single"){

return(single)

}else{

return(last)

}
}




#' Create duckdb versions of Contoso datasets
#'
#' @return DBI objects
#' @export
#'
#' @examples
#' contonso_duckdb()
contonso_duckdb <- function(){

con <- suppressWarnings(DBI::dbConnect(duckdb::duckdb()))


duckdb::duckdb_register(con,"sales",fpaR::sales,overwrite = TRUE)
duckdb::duckdb_register(con,"product",fpaR::product,overwrite = TRUE)
duckdb::duckdb_register(con,"customer",fpaR::customer,overwrite = TRUE)
duckdb::duckdb_register(con,"date",fpaR::date,overwrite = TRUE)
duckdb::duckdb_register(con,"fx",fpaR::fx,overwrite = TRUE)
duckdb::duckdb_register(con,"store",fpaR::store,overwrite = TRUE)


sales <- dplyr::tbl(con,dplyr::sql("select * from sales"))
product <- dplyr::tbl(con,dplyr::sql("select * from product"))
customer <- dplyr::tbl(con,dplyr::sql("select * from customer"))
store <- dplyr::tbl(con,dplyr::sql("select * from store"))
fx <- dplyr::tbl(con,dplyr::sql("select * from fx"))
date <- dplyr::tbl(con,dplyr::sql("select * from date"))


out <- base::list(
sales=sales
,product=product
,customer=customer
,store=store
,fx=fx
,date=date
)


return(out)
}


10 changes: 6 additions & 4 deletions README.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,18 @@ format: gfm
- translated make_aggregation_tbl to sql (without group logic)
- fixed how to add sub query that contains all previous steps
## 2024-08-15
- fixed with and cte functions to build queries quicker


## to do list
- Figure out how to delay unquoted evaluation so that we can pass args to make aggregtation tbl
- replicate time intellitgence functions
- update examples with contoso exmaple
- Figure out how to delay unquoted evaluation so that we can pass args to make aggregation tbl
- replicate time intelligence functions
- update examples with contoso example
- covert lm to sql equivalents
- design and create robust unit tests
- make 554 calendar date table
- create documentation with examples
- investigate factor anlaysis
- investigate factor analysis

# Introduction to fpaR

Expand Down
Loading

0 comments on commit e8cec07

Please sign in to comment.