Skip to content

Commit

Permalink
Change references to column names to match new standard in PortalData
Browse files Browse the repository at this point in the history
  • Loading branch information
gmyenni committed Aug 11, 2017
1 parent 1195aea commit a1c4fd4
Show file tree
Hide file tree
Showing 9 changed files with 69 additions and 69 deletions.
4 changes: 2 additions & 2 deletions PortalForecasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ portalr::download_observations()
moons = get_moon_data()

#Beginning and end of the forecast timeperiod
most_recent_newmoon = moons$NewMoonNumber[which.max(moons$Period)]
most_recent_newmoon = moons$newmoonnumber[which.max(moons$period)]
first_forecast_newmoon=most_recent_newmoon+1
last_forecast_newmoon=first_forecast_newmoon + 11
forecast_newmoons = first_forecast_newmoon:last_forecast_newmoon
Expand All @@ -26,7 +26,7 @@ weather_data = get_weather_data(moons, rodent_data$all, first_forecast_newmoon,

#Get only relevent columns now that this is isn't needed to subset weather.
rodent_data$all = rodent_data$all %>%
select(-NewMoonDate,-CensusDate,-Period,-Year,-Month)
select(-newmoondate,-censusdate,-period,-year,-month)

#tscount::tsglm() will not model a timeseries of all 0's. So for those species, which are
#ones that just haven't been observed in a while, make a forecast of all 0's.
Expand Down
14 changes: 7 additions & 7 deletions PortalHindcasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ initial_time_NewMoons=403:490
for(this_NewMoon in initial_time_NewMoons){

moons = get_moon_data() %>%
filter(NewMoonNumber<=this_NewMoon)
filter(newmoonnumber<=this_NewMoon)

#Beginning and end of the forecast timeperiod
most_recent_newmoon = moons$NewMoonNumber[which.max(moons$Period)]
most_recent_newmoon_date = as.Date(moons$NewMoonDate[which.max(moons$Period)])
most_recent_newmoon = moons$newmoonnumber[which.max(moons$period)]
most_recent_newmoon_date = as.Date(moons$newmoondate[which.max(moons$period)])
first_forecast_newmoon=most_recent_newmoon+1
last_forecast_newmoon=first_forecast_newmoon + 11
forecast_newmoons = first_forecast_newmoon:last_forecast_newmoon
Expand All @@ -33,16 +33,16 @@ for(this_NewMoon in initial_time_NewMoons){

rodent_data = get_rodent_data(moons, forecast_date, filename_suffix)
rodent_data$all = rodent_data$all %>%
filter(NewMoonNumber <= this_NewMoon)
filter(newmoonnumber <= this_NewMoon)
rodent_data$controls = rodent_data$controls %>%
filter(NewMoonNumber <= this_NewMoon)
filter(newmoonnumber <= this_NewMoon)

weather_data = get_weather_data(moons, rodent_data$all, first_forecast_newmoon, last_forecast_newmoon) %>%
filter(NewMoonNumber <= this_NewMoon)
filter(newmoonnumber <= this_NewMoon)

#Get only relevent columns now that this is isn't needed to subset weather.
rodent_data$all = rodent_data$all %>%
select(-NewMoonDate,-CensusDate,-Period,-Year,-Month)
select(-newmoondate,-censusdate,-period,-year,-month)

#tscount::tsglm() will not model a timeseries of all 0's. So for those species, which are
#ones that just haven't been observed in a while, make a forecast of all 0's.
Expand Down
28 changes: 14 additions & 14 deletions forecast_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ make_ensemble=function(all_forecasts, models_to_use=NA, CI_level = 0.9){
weighted_estimates = all_forecasts %>%
mutate(model_var = ((UpperPI - estimate)/CI_quantile)^2) %>%
left_join(weights, by=c('date','model','currency','level','species','fit_start_newmoon','fit_end_newmoon','initial_newmoon')) %>%
group_by(date, NewMoonNumber, forecastmonth, forecastyear,level, currency, species, fit_start_newmoon, fit_end_newmoon, initial_newmoon) %>%
group_by(date, newmoonnumber, forecastmonth, forecastyear,level, currency, species, fit_start_newmoon, fit_end_newmoon, initial_newmoon) %>%
summarise(ensemble_estimate = sum(estimate*weight),
weighted_ss = sum(weight * (estimate - ensemble_estimate)^2) ,
ensemble_var = sum(model_var * weight) + weighted_ss / (n()*sum(weight)-1),
Expand All @@ -78,17 +78,17 @@ get_sp_predicts = function(data, lvl, lead_time) {
""), format = "%m/%Y")) %>% transform(date = as.Date(date, "%Y-%m-%d"))
data1 = filter(data, level == lvl,
date == max(as.Date(date)))
target_moon = min(data1$NewMoonNumber) + (lead_time - 1)
data2 = filter(data1, NewMoonNumber == target_moon)
target_moon = min(data1$newmoonnumber) + (lead_time - 1)
data2 = filter(data1, newmoonnumber == target_moon)
}

plot_data = function(data) {
newmoons_table = read.csv(
text = getURL(
"https://raw.githubusercontent.com/weecology/PortalData/master/Rodents/moon_dates.csv"))
target_moon=unique(data$NewMoonNumber)
period_code = dplyr::filter(newmoons_table, newmoons_table$NewMoonNumber == target_moon) %>%
dplyr::select(Period) %>%
target_moon=unique(data$newmoonnumber)
period_code = dplyr::filter(newmoons_table, newmoons_table$newmoonnumber == target_moon) %>%
dplyr::select(period) %>%
as.integer()
sp_predict = ggplot(data,
aes(
Expand Down Expand Up @@ -138,7 +138,7 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE',

if(!forecast_is_valid(forecasts)) stop('Forecast dataframe not valid')

valid_observation_columns = c('NewMoonNumber','currency','level','species','actual')
valid_observation_columns = c('newmoonnumber','currency','level','species','actual')
if(!all(valid_observation_columns %in% colnames(observations))) stop('observation data.frame does not have valid column names')

#At least 1 matching value must be in each of these columns in the observations and forecasts
Expand All @@ -152,8 +152,8 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE',
#Calculate error
if(error_metric == 'MSE'){
comparisons = forecasts %>%
inner_join(observations, by=c('NewMoonNumber','currency','level','species')) %>%
group_by(date, model, NewMoonNumber, currency, level, species) %>%
inner_join(observations, by=c('newmoonnumber','currency','level','species')) %>%
group_by(date, model, newmoonnumber, currency, level, species) %>%
summarise(error=(estimate-actual)^2) %>%
ungroup()
} else if(error_metric == 'Likelihood') {
Expand All @@ -167,13 +167,13 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE',
#TODO: Make the lead time the actual days or weeks once more frequent forecasts are being made( see #37)
forecast_date_new_moon_number = comparisons %>%
group_by(date) %>%
summarise(new_moon_of_forecast = min(NewMoonNumber)-1) %>%
summarise(new_moon_of_forecast = min(newmoonnumber)-1) %>%
ungroup()

comparisons_with_lead_time = comparisons %>%
left_join(forecast_date_new_moon_number, by='date') %>%
mutate(lead_time=NewMoonNumber - new_moon_of_forecast) %>%
select(-new_moon_of_forecast, -NewMoonNumber, -date)
mutate(lead_time=newmoonnumber - new_moon_of_forecast) %>%
select(-new_moon_of_forecast, -newmoonnumber, -date)

comparisons_model_summary = comparisons_with_lead_time %>%
group_by(model, currency, level, species, lead_time) %>%
Expand Down Expand Up @@ -215,7 +215,7 @@ forecast_is_valid=function(forecast_df, verbose=FALSE){
is_valid=TRUE
violations=c()
#Define valid valeus
valid_columns = c('date','forecastmonth','forecastyear','NewMoonNumber','model','currency',
valid_columns = c('date','forecastmonth','forecastyear','newmoonnumber','model','currency',
'level','species','estimate','LowerPI','UpperPI','fit_start_newmoon',
'fit_end_newmoon','initial_newmoon')
valid_currencies = c('abundance','richness','biomass','energy')
Expand Down Expand Up @@ -318,7 +318,7 @@ forecast_viz <- function(obs_data, obs_date_col_name, obs_val_col_name, for_data
for_date_col_name, for_val_col_name, for_model_name,
for_lowerpi_col_name, for_upperpi_col_name, start_newmoon){
for_data_sub = filter(for_data, species == obs_val_col_name, model == for_model_name)
obs_data_sub = filter(obs_data, NewMoonNumber >= start_newmoon)
obs_data_sub = filter(obs_data, newmoonnumber >= start_newmoon)
ggplot(obs_data_sub, aes_string(x = obs_date_col_name)) +
geom_ribbon(data = for_data_sub, mapping = aes_string(x = for_date_col_name, ymin = for_lowerpi_col_name, ymax = for_upperpi_col_name), fill = "lightblue") +
geom_line(aes_string(y = obs_val_col_name)) +
Expand Down
10 changes: 5 additions & 5 deletions index.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ obs_data$total = rowSums(select(obs_data, -period))
new_moon_file = portalr::FullPath('PortalData/Rodents/moon_dates.csv',
'~')
new_moons = read.csv(new_moon_file)
obs_data_newmoon = inner_join(obs_data, new_moons, by = c("period" = "Period"))
obs_data_newmoon = inner_join(obs_data, new_moons, by = c("period" = "period"))
#for_data = read.csv("predictions/2017-02-24Allforecasts.csv")
for_data = compile_forecasts() %>%
Expand All @@ -31,10 +31,10 @@ for_data = filter(for_data, date == most_recent_forecast)
for_data$model = "ensemble"
forecast_viz(obs_data = obs_data_newmoon,
obs_date_col_name = "NewMoonNumber",
obs_date_col_name = "newmoonnumber",
obs_val_col_name = "total",
for_data = for_data,
for_date_col_name = "NewMoonNumber",
for_date_col_name = "newmoonnumber",
for_val_col_name = "estimate",
for_model_name = "ensemble",
for_lowerpi_col_name = "LowerPI",
Expand All @@ -61,10 +61,10 @@ most_abund_sp = sp_predictions %>%
for (species in as.vector(most_abund_sp$species)) {
species_forecast = forecast_viz(obs_data = obs_data_newmoon,
obs_date_col_name = "NewMoonNumber",
obs_date_col_name = "newmoonnumber",
obs_val_col_name = species,
for_data = for_data,
for_date_col_name = "NewMoonNumber",
for_date_col_name = "newmoonnumber",
for_val_col_name = "estimate",
for_model_name = "ensemble",
for_lowerpi_col_name = "LowerPI",
Expand Down
56 changes: 28 additions & 28 deletions models/model_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ get_moon_data <- function(){
moons <- read.csv(FullPath('PortalData/Rodents/moon_dates.csv', '~'), header=T)

#Add in year and month to join with the rest of the data
moons$Year=year(moons$NewMoonDate)
moons$Month=month(moons$NewMoonDate)
moons$year=year(moons$newmoondate)
moons$month=month(moons$newmoondate)
return(moons)
}

Expand All @@ -31,17 +31,17 @@ get_rodent_data <- function(moons, forecast_date, filename_suffix){
controls = controls %>%
filter(treatment == 'control') %>%
select(-treatment) %>%
inner_join(moons,by=c("period"="Period")) %>%
subset(NewMoonNumber >= historic_start_newmoon) %>%
select(-NewMoonDate,-CensusDate,-period,-Year,-Month)
inner_join(moons,by=c("period"="period")) %>%
subset(newmoonnumber >= historic_start_newmoon) %>%
select(-newmoondate,-censusdate,-period,-year,-month)

#All plots
all=portalr::abundance(level="Site",type="Rodents",length="all", incomplete = FALSE)
#The total rodent count across the entire site
all$total = rowSums(all[,-(1)])
all=inner_join(moons,all,by=c("Period"="period"))
all=inner_join(moons,all,by=c("period"="period"))

all=subset(all,Period >= historic_start_period)
all=subset(all,period >= historic_start_period)
rodent_data = list()
rodent_data$controls = controls
rodent_data$all = all
Expand All @@ -53,42 +53,42 @@ get_rodent_data <- function(moons, forecast_date, filename_suffix){
get_weather_data <- function(moons, all, first_forecast_newmoon, last_forecast_newmoon){
weather_data=portalr::weather("Monthly") %>%
ungroup() %>%
left_join(moons, by=c('Year','Month'))
left_join(moons, by=c('year','month'))

#Offset the NewMoonNumber to create a 6 month lag between
#rodent observations and weather
weather_data$NewMoonNumber_with_lag = weather_data$NewMoonNumber + 6
weather_data$NewMoonNumber_with_lag = weather_data$newmoonnumber + 6

#Assign weather using lag to rodent observations.
#This will match weather row numbers to corrosponding rows in all and controls
weather_data = weather_data %>%
select(-NewMoonDate, -CensusDate, -Period, -Year, -Month) %>%
right_join(all, by=c('NewMoonNumber_with_lag'='NewMoonNumber')) %>%
select(Year,Month,MinTemp,MaxTemp,MeanTemp,Precipitation,NDVI,NewMoonNumber, NewMoonNumber_with_lag)
select(-newmoondate, -censusdate, -period, -year, -month) %>%
right_join(all, by=c('NewMoonNumber_with_lag'='newmoonnumber')) %>%
select(year,month,mintemp,maxtemp,meantemp,precipitation,NDVI,newmoonnumber, NewMoonNumber_with_lag)

##Get 6 month weather forecast by combining stations data and monthly means of past 3 years
#Used to make predictions for the months 7-12 of a 12 month forecast using a 6 month lag.

#A data.frame of the months that will be in this weather forecast.
weather_forecast_months = moons %>%
filter(NewMoonNumber >= first_forecast_newmoon-5, NewMoonNumber <= last_forecast_newmoon-5)
filter(newmoonnumber >= first_forecast_newmoon-5, newmoonnumber <= last_forecast_newmoon-5)

# x=subset(weather,NewMoonNumber>=first_forecast_newmoon-5) %>%
# subset(NewMoonNumber<=last_forecast_newmoon-5)

weathermeans=weather_data[dim(weather_data)[1]-36:dim(weather_data)[1],] %>%
group_by(Month) %>%
summarize(MinTemp=mean(MinTemp,na.rm=T),MaxTemp=mean(MaxTemp,na.rm=T),MeanTemp=mean(MeanTemp,na.rm=T),
Precipitation=mean(Precipitation,na.rm=T),NDVI=mean(NDVI,na.rm=T)) %>%
slice(match(weather_forecast_months$Month, Month))
group_by(month) %>%
summarize(mintemp=mean(mintemp,na.rm=T),maxtemp=mean(maxtemp,na.rm=T),meantemp=mean(meantemp,na.rm=T),
precipitation=mean(precipitation,na.rm=T),NDVI=mean(NDVI,na.rm=T)) %>%
slice(match(weather_forecast_months$month, month))

#Insert longterm means where there is missing data in the historic weather
weather_data=weather_data %>%
mutate(NDVI = ifelse(is.na(NDVI), mean(NDVI, na.rm = T), NDVI)) %>%
mutate(MinTemp = ifelse(is.na(MinTemp), mean(MinTemp, na.rm = T), MinTemp)) %>%
mutate(MaxTemp = ifelse(is.na(MaxTemp), mean(MaxTemp, na.rm = T), MaxTemp)) %>%
mutate(MeanTemp = ifelse(is.na(MeanTemp), mean(MeanTemp, na.rm = T), MeanTemp)) %>%
mutate(Precipitation = ifelse(is.na(Precipitation), mean(Precipitation, na.rm = T), Precipitation))
mutate(mintemp = ifelse(is.na(mintemp), mean(mintemp, na.rm = T), mintemp)) %>%
mutate(maxtemp = ifelse(is.na(maxtemp), mean(maxtemp, na.rm = T), maxtemp)) %>%
mutate(meantemp = ifelse(is.na(meantemp), mean(meantemp, na.rm = T), meantemp)) %>%
mutate(precipitation = ifelse(is.na(precipitation), mean(precipitation, na.rm = T), precipitation))
}


Expand All @@ -102,7 +102,7 @@ forecastall <- function(abundances, level, weather_data, weatherforecast,
#forecasts is where we will append all forecasts for this level
#all_model_aic is where we will append all model aics for this level
forecasts = data.frame(date=as.Date(character()), forecastmonth=numeric(), forecastyear=numeric(),
NewMoonNumber=numeric(), currency=character(), model=character(), level=character(),
newmoonnumber=numeric(), currency=character(), model=character(), level=character(),
species=character(), estimate=numeric(), LowerPI=numeric(), UpperPI=numeric())
all_model_aic = data.frame(date=as.Date(character()),currency=character(),model=character(),level=character(),
species=character(), aic=numeric())
Expand Down Expand Up @@ -165,12 +165,12 @@ forecastall <- function(abundances, level, weather_data, weatherforecast,
bind_rows(data.frame(pets[2]))

#########Include columns describing the data used in the forecast###############
forecasts$fit_start_newmoon = min(abundances$NewMoonNumber)
forecasts$fit_end_newmoon = max(abundances$NewMoonNumber)
forecasts$initial_newmoon = max(abundances$NewMoonNumber)
all_model_aic$fit_start_newmoon = min(abundances$NewMoonNumber)
all_model_aic$fit_end_newmoon = max(abundances$NewMoonNumber)
all_model_aic$initial_newmoon = max(abundances$NewMoonNumber)
forecasts$fit_start_newmoon = min(abundances$newmoonnumber)
forecasts$fit_end_newmoon = max(abundances$newmoonnumber)
forecasts$initial_newmoon = max(abundances$newmoonnumber)
all_model_aic$fit_start_newmoon = min(abundances$newmoonnumber)
all_model_aic$fit_end_newmoon = max(abundances$newmoonnumber)
all_model_aic$initial_newmoon = max(abundances$newmoonnumber)

#########Write forecasts to file and aics to separate files###############
#Appending a csv without re-writing the header. Needed for hindcasting
Expand Down
2 changes: 1 addition & 1 deletion models/naive01.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ naive01=function(abundances,forecast_date,forecast_months,forecast_years,forecas

model01=forecast(abundances$total,h=num_forecast_months,level=CI_level,BoxCox.lambda(0),allow.multiplicative.trend=T)

forecasts01=data.frame(date=forecast_date, forecastmonth=forecast_months,forecastyear=forecast_years, NewMoonNumber=forecast_newmoons,
forecasts01=data.frame(date=forecast_date, forecastmonth=forecast_months,forecastyear=forecast_years, newmoonnumber=forecast_newmoons,
currency="abundance",model="Forecast", level=level, species="total", estimate=model01$mean,
LowerPI=model01$lower[,which(model01$level==CI_level*100)], UpperPI=model01$upper[,which(model01$level==CI_level*100)])
forecasts01[sapply(forecasts01, is.ts)] <- lapply(forecasts01[sapply(forecasts01, is.ts)],unclass)
Expand Down
2 changes: 1 addition & 1 deletion models/naive02.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ naive02=function(abundances,forecast_date,forecast_months,forecast_years,forecas

model=forecast(auto.arima(abundances$total,lambda = 0),h=num_forecast_months,level=CI_level,fan=T)

forecasts02=data.frame(date=forecast_date, forecastmonth=forecast_months, forecastyear=forecast_years, NewMoonNumber=forecast_newmoons,
forecasts02=data.frame(date=forecast_date, forecastmonth=forecast_months, forecastyear=forecast_years, newmoonnumber=forecast_newmoons,
currency="abundance", model="AutoArima", level=level, species="total", estimate=model$mean,
LowerPI=model$lower[,which(model$level==CI_level*100)], UpperPI=model$upper[,which(model$level==CI_level*100)])
forecasts02[sapply(forecasts02, is.ts)] <- lapply(forecasts02[sapply(forecasts02, is.ts)],unclass)
Expand Down
2 changes: 1 addition & 1 deletion models/neg_binom_ts.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ neg_binom_ts=function(abundances,forecast_date,forecast_months,forecast_years,fo
model_aic = tryCatch(summary(model)$AIC, error = function(x) {1e6})
}
newpred=data.frame(date=rep(forecast_date,num_forecast_months), forecastmonth=forecast_months, forecastyear=forecast_years,
NewMoonNumber=forecast_newmoons, currency="abundance", model=rep("NegBinom Time Series",num_forecast_months),
newmoonnumber=forecast_newmoons, currency="abundance", model=rep("NegBinom Time Series",num_forecast_months),
level=level, species=rep(s,num_forecast_months), estimate=pred$pred,
LowerPI=pred$interval[,1], UpperPI=pred$interval[,2])
allforecasts=rbind(allforecasts,newpred)
Expand Down
Loading

0 comments on commit a1c4fd4

Please sign in to comment.