Skip to content

Commit

Permalink
new athena infrastructure updates
Browse files Browse the repository at this point in the history
  • Loading branch information
k-florek committed Dec 14, 2023
1 parent 93fd875 commit 15e0874
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 33 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y \
libpng-dev \
libssl-dev \
libcurl4-openssl-dev \
libxml2-dev \
libgdal-dev \
libudunits2-dev \
libjq-dev \
Expand All @@ -44,7 +45,8 @@ RUN R -e "install.packages(c(\
'tidyr', \
'geojsonio', \
'sf', \
'RAthena', \
'paws', \
'noctua', \
'lubridate', \
'htmltools' \
), repos = 'http://cran.us.r-project.org')"
Expand Down
23 changes: 17 additions & 6 deletions seqTime/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
library(shiny)
library(shinycssloaders)
library(plotly)
library(RAthena)
library(noctua)
library(lubridate)

databytime <- NULL
Expand All @@ -13,12 +13,23 @@ getData <- function(){
print('Fetching data from AWS')
print(Sys.time())
# athena connection
athenaConnection <- dbConnect(athena(),
s3_staging_dir = "s3://prod-wslh-public-d/sc2dashboard/",
work_group = 'prod-sc2dashboard',
region_name='us-east-2')
d <- dbGetQuery(athenaConnection,"SELECT covv_collection_date,total FROM \"sc2dataportal\".\"prod_gisaid_sars_cov_2_variant_counts\"")
pathena = paws::athena()

# get the named query
NamedQuery = pathena$get_named_query("a0147610-9f50-440c-b68e-16f347adda4e")
query = pathena$start_query_execution(
QueryString = NamedQuery$NamedQuery$QueryString,
WorkGroup = "sc2dashboard"
)
# setup athena connection
athenaConnection <- dbConnect(noctua::athena(), work_group = 'sc2dashboard')

# query data
d <- dbGetQuery(athenaConnection, NamedQuery$NamedQuery$QueryString)
dbDisconnect(athenaConnection)

d$covv_collection_date <- as.Date(d$covv_collection_date)

d <- d[order(covv_collection_date),]
d <- within(d, {
weeks <- paste(epiweek(covv_collection_date),epiyear(covv_collection_date),sep='-')
Expand Down
23 changes: 17 additions & 6 deletions seqTotal/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
library(shiny)
library(shinycssloaders)
library(plotly)
library(RAthena)
library(noctua)
library(lubridate)

data <- NULL
Expand All @@ -13,12 +13,23 @@ getData <- function(){
print('Fetching data from AWS')
print(Sys.time())
# athena connection
athenaConnection <- dbConnect(athena(),
s3_staging_dir = "s3://prod-wslh-public-d/sc2dashboard/",
work_group = 'prod-sc2dashboard',
region_name='us-east-2')
d <- dbGetQuery(athenaConnection,"SELECT covv_collection_date,total FROM \"sc2dataportal\".\"prod_gisaid_sars_cov_2_variant_counts\"")
pathena = paws::athena()

# get the named query
NamedQuery = pathena$get_named_query("a0147610-9f50-440c-b68e-16f347adda4e")
query = pathena$start_query_execution(
QueryString = NamedQuery$NamedQuery$QueryString,
WorkGroup = "sc2dashboard"
)
# setup athena connection
athenaConnection <- dbConnect(noctua::athena(), work_group = 'sc2dashboard')

# query data
d <- dbGetQuery(athenaConnection, NamedQuery$NamedQuery$QueryString)
dbDisconnect(athenaConnection)

d$covv_collection_date <- as.Date(d$covv_collection_date)

d <- d[order(covv_collection_date),]
data <<- d
}
Expand Down
32 changes: 25 additions & 7 deletions varGeo/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ library(shiny)
library(shinycssloaders)
library(leaflet)
library(leaflet.minicharts)
library(RAthena)
library(noctua)
library(lubridate)
library(sf)
library(dplyr)
Expand All @@ -14,12 +14,23 @@ library(htmltools)
# data fetch and light processing function
getData <- function(){
# athena connection
athenaConnection <- dbConnect(athena(),
s3_staging_dir = "s3://prod-wslh-public-data/sc2dashboard/",
work_group = 'prod-sc2dashboard',
region_name='us-east-2')
data <- dbGetQuery(athenaConnection,"SELECT covv_collection_date,variant,total,lat,long,county FROM \"sc2dataportal\".\"prod_gisaid_sars_cov_2_variant_counts_county\"")
pathena = paws::athena()

# get the named query
NamedQuery = pathena$get_named_query("214540e5-dcb7-42ea-96c2-dc6cddeec53c")
query = pathena$start_query_execution(
QueryString = NamedQuery$NamedQuery$QueryString,
WorkGroup = "sc2dashboard"
)
# setup athena connection
athenaConnection <- dbConnect(noctua::athena(), work_group = 'sc2dashboard')

# query data
data <- dbGetQuery(athenaConnection, NamedQuery$NamedQuery$QueryString)
dbDisconnect(athenaConnection)

data$covv_collection_date <- as.Date(data$covv_collection_date)

data <- data[!(is.na(data$variant) | data$variant=="" | data$variant=="Unassigned"), ]
data <- data[!(is.na(data$lat) | data$lat=="" | is.na(data$long) | data$long==""), ]
data <- data %>% mutate(week = floor_date(covv_collection_date, unit = 'week', week_start = 1))
Expand Down Expand Up @@ -106,6 +117,13 @@ server <- function(input, output, session) {

# number of VOCs
voc_count = length(unique(data$lineage))

# order factor levels with Other last
vocs = as.character(unique(data$lineage))
vocs = vocs[!vocs == "Other"]

# order the columns in the data frame so other is last
chartData <- chartData[,c("county","lat","lng",vocs,"Other")]

# create count matrix for pie chart
chartDataM <- chartData[,-which(names(chartData) %in% c('county','lat','lng'))]
Expand Down Expand Up @@ -140,7 +158,7 @@ server <- function(input, output, session) {
map %>%
addMinicharts(
type="pie",
colorPalette = c(hcl.colors(voc_count-1, "viridis"),"#CCCCCC"),
colorPalette = voc_pallet <- c(hcl.colors(voc_count-1, "viridis"),"#CCCCCC"),
lng = chartData$lng,
lat = chartData$lat,
chartdata = chartDataM,
Expand Down
25 changes: 18 additions & 7 deletions variantSearch/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
library(shiny)
library(shinycssloaders)
library(plotly)
library(RAthena)
library(noctua)
library(lubridate)
library(dplyr)

Expand All @@ -19,14 +19,25 @@ getData <- function(){
print(Sys.time())
updateTS <<- format(Sys.time(),"%Y-%m-%d")
# athena connection
athenaConnection <- dbConnect(athena(),
s3_staging_dir = "s3://prod-wslh-public-data/sc2dashboard/",
work_group = 'prod-sc2dashboard',
region_name='us-east-2')
d <- dbGetQuery(athenaConnection,"SELECT covv_collection_date,covv_lineage,total FROM \"sc2dataportal\".\"prod_gisaid_sars_cov_2_variant_counts\"")
pathena = paws::athena()

# get the named query
NamedQuery = pathena$get_named_query("a0147610-9f50-440c-b68e-16f347adda4e")
query = pathena$start_query_execution(
QueryString = NamedQuery$NamedQuery$QueryString,
WorkGroup = "sc2dashboard"
)
# setup athena connection
athenaConnection <- dbConnect(noctua::athena(), work_group = 'sc2dashboard')

# query data
d <- dbGetQuery(athenaConnection, NamedQuery$NamedQuery$QueryString)
dbDisconnect(athenaConnection)

d$covv_collection_date <- as.Date(d$covv_collection_date)

d <- d[!(is.na(d$covv_lineage) | d$covv_lineage=="" | d$covv_lineage=="Unassigned"), ]
latestDataPoint <<- as.character(max(d$covv_collection_date))
latestDataPoint <<- as.character(max(d$covv_collection_date, na.rm = TRUE))
d <- d %>% mutate(week = floor_date(covv_collection_date, unit = 'week', week_start = 1))
d <- aggregate(d$total, by=list(week=d$week,lineage=d$covv_lineage),FUN=sum)
d <- d[order(d$week),]
Expand Down
26 changes: 20 additions & 6 deletions variants/app.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
library(shiny)
library(shinycssloaders)
library(plotly)
library(RAthena)
library(noctua)
library(paws)
library(lubridate)
library(dplyr)

Expand All @@ -14,13 +15,26 @@ data <- NULL
getData <- function(){
print('Fetching data from AWS')
print(Sys.time())

# athena connection
athenaConnection <- dbConnect(athena(),
s3_staging_dir = "s3://prod-wslh-public-data/sc2dashboard/",
work_group = 'prod-sc2dashboard',
region_name='us-east-2')
d <- dbGetQuery(athenaConnection,"SELECT covv_collection_date,variant,total FROM \"sc2dataportal\".\"prod_gisaid_sars_cov_2_variant_counts_voc\"")
pathena = paws::athena()

# get the named query
NamedQuery = pathena$get_named_query("0be277ac-e21a-4a09-9a97-ee55ea707740")
query = pathena$start_query_execution(
QueryString = NamedQuery$NamedQuery$QueryString,
WorkGroup = "sc2dashboard"
)
# setup athena connection
athenaConnection <- dbConnect(noctua::athena(), work_group = 'sc2dashboard')

# query data
d <- dbGetQuery(athenaConnection, NamedQuery$NamedQuery$QueryString)
dbDisconnect(athenaConnection)

d$covv_collection_date <- as.Date(d$covv_collection_date)

# parse data
d <- d[!(is.na(d$variant) | d$variant=="" | d$variant=="Unassigned"), ]
d <- d %>% mutate(week = floor_date(covv_collection_date, unit = 'week', week_start = 1))
d <- aggregate(d$total, by=list(week=d$week,lineage=d$variant),FUN=sum)
Expand Down

0 comments on commit 15e0874

Please sign in to comment.