From a1c4fd496253dcd494580280fd6cea584bf384d6 Mon Sep 17 00:00:00 2001 From: Glenda Yenni Date: Thu, 10 Aug 2017 22:52:41 -0600 Subject: [PATCH] Change references to column names to match new standard in PortalData --- PortalForecasts.R | 4 +-- PortalHindcasts.R | 14 +++++----- forecast_tools.R | 28 ++++++++++---------- index.Rmd | 10 +++---- models/model_functions.R | 56 ++++++++++++++++++++-------------------- models/naive01.R | 2 +- models/naive02.R | 2 +- models/neg_binom_ts.R | 2 +- models/pois_env_ts.R | 20 +++++++------- 9 files changed, 69 insertions(+), 69 deletions(-) diff --git a/PortalForecasts.R b/PortalForecasts.R index 4b41e50bcc..d307b37872 100644 --- a/PortalForecasts.R +++ b/PortalForecasts.R @@ -14,7 +14,7 @@ portalr::download_observations() moons = get_moon_data() #Beginning and end of the forecast timeperiod -most_recent_newmoon = moons$NewMoonNumber[which.max(moons$Period)] +most_recent_newmoon = moons$newmoonnumber[which.max(moons$period)] first_forecast_newmoon=most_recent_newmoon+1 last_forecast_newmoon=first_forecast_newmoon + 11 forecast_newmoons = first_forecast_newmoon:last_forecast_newmoon @@ -26,7 +26,7 @@ weather_data = get_weather_data(moons, rodent_data$all, first_forecast_newmoon, #Get only relevent columns now that this is isn't needed to subset weather. rodent_data$all = rodent_data$all %>% - select(-NewMoonDate,-CensusDate,-Period,-Year,-Month) + select(-newmoondate,-censusdate,-period,-year,-month) #tscount::tsglm() will not model a timeseries of all 0's. So for those species, which are #ones that just haven't been observed in a while, make a forecast of all 0's. diff --git a/PortalHindcasts.R b/PortalHindcasts.R index f394406239..9e672cdcc3 100644 --- a/PortalHindcasts.R +++ b/PortalHindcasts.R @@ -20,11 +20,11 @@ initial_time_NewMoons=403:490 for(this_NewMoon in initial_time_NewMoons){ moons = get_moon_data() %>% - filter(NewMoonNumber<=this_NewMoon) + filter(newmoonnumber<=this_NewMoon) #Beginning and end of the forecast timeperiod - most_recent_newmoon = moons$NewMoonNumber[which.max(moons$Period)] - most_recent_newmoon_date = as.Date(moons$NewMoonDate[which.max(moons$Period)]) + most_recent_newmoon = moons$newmoonnumber[which.max(moons$period)] + most_recent_newmoon_date = as.Date(moons$newmoondate[which.max(moons$period)]) first_forecast_newmoon=most_recent_newmoon+1 last_forecast_newmoon=first_forecast_newmoon + 11 forecast_newmoons = first_forecast_newmoon:last_forecast_newmoon @@ -33,16 +33,16 @@ for(this_NewMoon in initial_time_NewMoons){ rodent_data = get_rodent_data(moons, forecast_date, filename_suffix) rodent_data$all = rodent_data$all %>% - filter(NewMoonNumber <= this_NewMoon) + filter(newmoonnumber <= this_NewMoon) rodent_data$controls = rodent_data$controls %>% - filter(NewMoonNumber <= this_NewMoon) + filter(newmoonnumber <= this_NewMoon) weather_data = get_weather_data(moons, rodent_data$all, first_forecast_newmoon, last_forecast_newmoon) %>% - filter(NewMoonNumber <= this_NewMoon) + filter(newmoonnumber <= this_NewMoon) #Get only relevent columns now that this is isn't needed to subset weather. rodent_data$all = rodent_data$all %>% - select(-NewMoonDate,-CensusDate,-Period,-Year,-Month) + select(-newmoondate,-censusdate,-period,-year,-month) #tscount::tsglm() will not model a timeseries of all 0's. So for those species, which are #ones that just haven't been observed in a while, make a forecast of all 0's. diff --git a/forecast_tools.R b/forecast_tools.R index 8527c6c72f..2ebedcb2f0 100644 --- a/forecast_tools.R +++ b/forecast_tools.R @@ -51,7 +51,7 @@ make_ensemble=function(all_forecasts, models_to_use=NA, CI_level = 0.9){ weighted_estimates = all_forecasts %>% mutate(model_var = ((UpperPI - estimate)/CI_quantile)^2) %>% left_join(weights, by=c('date','model','currency','level','species','fit_start_newmoon','fit_end_newmoon','initial_newmoon')) %>% - group_by(date, NewMoonNumber, forecastmonth, forecastyear,level, currency, species, fit_start_newmoon, fit_end_newmoon, initial_newmoon) %>% + group_by(date, newmoonnumber, forecastmonth, forecastyear,level, currency, species, fit_start_newmoon, fit_end_newmoon, initial_newmoon) %>% summarise(ensemble_estimate = sum(estimate*weight), weighted_ss = sum(weight * (estimate - ensemble_estimate)^2) , ensemble_var = sum(model_var * weight) + weighted_ss / (n()*sum(weight)-1), @@ -78,17 +78,17 @@ get_sp_predicts = function(data, lvl, lead_time) { ""), format = "%m/%Y")) %>% transform(date = as.Date(date, "%Y-%m-%d")) data1 = filter(data, level == lvl, date == max(as.Date(date))) - target_moon = min(data1$NewMoonNumber) + (lead_time - 1) - data2 = filter(data1, NewMoonNumber == target_moon) + target_moon = min(data1$newmoonnumber) + (lead_time - 1) + data2 = filter(data1, newmoonnumber == target_moon) } plot_data = function(data) { newmoons_table = read.csv( text = getURL( "https://raw.githubusercontent.com/weecology/PortalData/master/Rodents/moon_dates.csv")) - target_moon=unique(data$NewMoonNumber) - period_code = dplyr::filter(newmoons_table, newmoons_table$NewMoonNumber == target_moon) %>% - dplyr::select(Period) %>% + target_moon=unique(data$newmoonnumber) + period_code = dplyr::filter(newmoons_table, newmoons_table$newmoonnumber == target_moon) %>% + dplyr::select(period) %>% as.integer() sp_predict = ggplot(data, aes( @@ -138,7 +138,7 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE', if(!forecast_is_valid(forecasts)) stop('Forecast dataframe not valid') - valid_observation_columns = c('NewMoonNumber','currency','level','species','actual') + valid_observation_columns = c('newmoonnumber','currency','level','species','actual') if(!all(valid_observation_columns %in% colnames(observations))) stop('observation data.frame does not have valid column names') #At least 1 matching value must be in each of these columns in the observations and forecasts @@ -152,8 +152,8 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE', #Calculate error if(error_metric == 'MSE'){ comparisons = forecasts %>% - inner_join(observations, by=c('NewMoonNumber','currency','level','species')) %>% - group_by(date, model, NewMoonNumber, currency, level, species) %>% + inner_join(observations, by=c('newmoonnumber','currency','level','species')) %>% + group_by(date, model, newmoonnumber, currency, level, species) %>% summarise(error=(estimate-actual)^2) %>% ungroup() } else if(error_metric == 'Likelihood') { @@ -167,13 +167,13 @@ calculate_forecast_error = function(observations, forecasts, error_metric='MSE', #TODO: Make the lead time the actual days or weeks once more frequent forecasts are being made( see #37) forecast_date_new_moon_number = comparisons %>% group_by(date) %>% - summarise(new_moon_of_forecast = min(NewMoonNumber)-1) %>% + summarise(new_moon_of_forecast = min(newmoonnumber)-1) %>% ungroup() comparisons_with_lead_time = comparisons %>% left_join(forecast_date_new_moon_number, by='date') %>% - mutate(lead_time=NewMoonNumber - new_moon_of_forecast) %>% - select(-new_moon_of_forecast, -NewMoonNumber, -date) + mutate(lead_time=newmoonnumber - new_moon_of_forecast) %>% + select(-new_moon_of_forecast, -newmoonnumber, -date) comparisons_model_summary = comparisons_with_lead_time %>% group_by(model, currency, level, species, lead_time) %>% @@ -215,7 +215,7 @@ forecast_is_valid=function(forecast_df, verbose=FALSE){ is_valid=TRUE violations=c() #Define valid valeus - valid_columns = c('date','forecastmonth','forecastyear','NewMoonNumber','model','currency', + valid_columns = c('date','forecastmonth','forecastyear','newmoonnumber','model','currency', 'level','species','estimate','LowerPI','UpperPI','fit_start_newmoon', 'fit_end_newmoon','initial_newmoon') valid_currencies = c('abundance','richness','biomass','energy') @@ -318,7 +318,7 @@ forecast_viz <- function(obs_data, obs_date_col_name, obs_val_col_name, for_data for_date_col_name, for_val_col_name, for_model_name, for_lowerpi_col_name, for_upperpi_col_name, start_newmoon){ for_data_sub = filter(for_data, species == obs_val_col_name, model == for_model_name) - obs_data_sub = filter(obs_data, NewMoonNumber >= start_newmoon) + obs_data_sub = filter(obs_data, newmoonnumber >= start_newmoon) ggplot(obs_data_sub, aes_string(x = obs_date_col_name)) + geom_ribbon(data = for_data_sub, mapping = aes_string(x = for_date_col_name, ymin = for_lowerpi_col_name, ymax = for_upperpi_col_name), fill = "lightblue") + geom_line(aes_string(y = obs_val_col_name)) + diff --git a/index.Rmd b/index.Rmd index 68f553adab..e3f479f6a3 100644 --- a/index.Rmd +++ b/index.Rmd @@ -19,7 +19,7 @@ obs_data$total = rowSums(select(obs_data, -period)) new_moon_file = portalr::FullPath('PortalData/Rodents/moon_dates.csv', '~') new_moons = read.csv(new_moon_file) -obs_data_newmoon = inner_join(obs_data, new_moons, by = c("period" = "Period")) +obs_data_newmoon = inner_join(obs_data, new_moons, by = c("period" = "period")) #for_data = read.csv("predictions/2017-02-24Allforecasts.csv") for_data = compile_forecasts() %>% @@ -31,10 +31,10 @@ for_data = filter(for_data, date == most_recent_forecast) for_data$model = "ensemble" forecast_viz(obs_data = obs_data_newmoon, - obs_date_col_name = "NewMoonNumber", + obs_date_col_name = "newmoonnumber", obs_val_col_name = "total", for_data = for_data, - for_date_col_name = "NewMoonNumber", + for_date_col_name = "newmoonnumber", for_val_col_name = "estimate", for_model_name = "ensemble", for_lowerpi_col_name = "LowerPI", @@ -61,10 +61,10 @@ most_abund_sp = sp_predictions %>% for (species in as.vector(most_abund_sp$species)) { species_forecast = forecast_viz(obs_data = obs_data_newmoon, - obs_date_col_name = "NewMoonNumber", + obs_date_col_name = "newmoonnumber", obs_val_col_name = species, for_data = for_data, - for_date_col_name = "NewMoonNumber", + for_date_col_name = "newmoonnumber", for_val_col_name = "estimate", for_model_name = "ensemble", for_lowerpi_col_name = "LowerPI", diff --git a/models/model_functions.R b/models/model_functions.R index 3451a8d2db..320ade528e 100644 --- a/models/model_functions.R +++ b/models/model_functions.R @@ -9,8 +9,8 @@ get_moon_data <- function(){ moons <- read.csv(FullPath('PortalData/Rodents/moon_dates.csv', '~'), header=T) #Add in year and month to join with the rest of the data - moons$Year=year(moons$NewMoonDate) - moons$Month=month(moons$NewMoonDate) + moons$year=year(moons$newmoondate) + moons$month=month(moons$newmoondate) return(moons) } @@ -31,17 +31,17 @@ get_rodent_data <- function(moons, forecast_date, filename_suffix){ controls = controls %>% filter(treatment == 'control') %>% select(-treatment) %>% - inner_join(moons,by=c("period"="Period")) %>% - subset(NewMoonNumber >= historic_start_newmoon) %>% - select(-NewMoonDate,-CensusDate,-period,-Year,-Month) + inner_join(moons,by=c("period"="period")) %>% + subset(newmoonnumber >= historic_start_newmoon) %>% + select(-newmoondate,-censusdate,-period,-year,-month) #All plots all=portalr::abundance(level="Site",type="Rodents",length="all", incomplete = FALSE) #The total rodent count across the entire site all$total = rowSums(all[,-(1)]) - all=inner_join(moons,all,by=c("Period"="period")) + all=inner_join(moons,all,by=c("period"="period")) - all=subset(all,Period >= historic_start_period) + all=subset(all,period >= historic_start_period) rodent_data = list() rodent_data$controls = controls rodent_data$all = all @@ -53,42 +53,42 @@ get_rodent_data <- function(moons, forecast_date, filename_suffix){ get_weather_data <- function(moons, all, first_forecast_newmoon, last_forecast_newmoon){ weather_data=portalr::weather("Monthly") %>% ungroup() %>% - left_join(moons, by=c('Year','Month')) + left_join(moons, by=c('year','month')) #Offset the NewMoonNumber to create a 6 month lag between #rodent observations and weather - weather_data$NewMoonNumber_with_lag = weather_data$NewMoonNumber + 6 + weather_data$NewMoonNumber_with_lag = weather_data$newmoonnumber + 6 #Assign weather using lag to rodent observations. #This will match weather row numbers to corrosponding rows in all and controls weather_data = weather_data %>% - select(-NewMoonDate, -CensusDate, -Period, -Year, -Month) %>% - right_join(all, by=c('NewMoonNumber_with_lag'='NewMoonNumber')) %>% - select(Year,Month,MinTemp,MaxTemp,MeanTemp,Precipitation,NDVI,NewMoonNumber, NewMoonNumber_with_lag) + select(-newmoondate, -censusdate, -period, -year, -month) %>% + right_join(all, by=c('NewMoonNumber_with_lag'='newmoonnumber')) %>% + select(year,month,mintemp,maxtemp,meantemp,precipitation,NDVI,newmoonnumber, NewMoonNumber_with_lag) ##Get 6 month weather forecast by combining stations data and monthly means of past 3 years #Used to make predictions for the months 7-12 of a 12 month forecast using a 6 month lag. #A data.frame of the months that will be in this weather forecast. weather_forecast_months = moons %>% - filter(NewMoonNumber >= first_forecast_newmoon-5, NewMoonNumber <= last_forecast_newmoon-5) + filter(newmoonnumber >= first_forecast_newmoon-5, newmoonnumber <= last_forecast_newmoon-5) # x=subset(weather,NewMoonNumber>=first_forecast_newmoon-5) %>% # subset(NewMoonNumber<=last_forecast_newmoon-5) weathermeans=weather_data[dim(weather_data)[1]-36:dim(weather_data)[1],] %>% - group_by(Month) %>% - summarize(MinTemp=mean(MinTemp,na.rm=T),MaxTemp=mean(MaxTemp,na.rm=T),MeanTemp=mean(MeanTemp,na.rm=T), - Precipitation=mean(Precipitation,na.rm=T),NDVI=mean(NDVI,na.rm=T)) %>% - slice(match(weather_forecast_months$Month, Month)) + group_by(month) %>% + summarize(mintemp=mean(mintemp,na.rm=T),maxtemp=mean(maxtemp,na.rm=T),meantemp=mean(meantemp,na.rm=T), + precipitation=mean(precipitation,na.rm=T),NDVI=mean(NDVI,na.rm=T)) %>% + slice(match(weather_forecast_months$month, month)) #Insert longterm means where there is missing data in the historic weather weather_data=weather_data %>% mutate(NDVI = ifelse(is.na(NDVI), mean(NDVI, na.rm = T), NDVI)) %>% - mutate(MinTemp = ifelse(is.na(MinTemp), mean(MinTemp, na.rm = T), MinTemp)) %>% - mutate(MaxTemp = ifelse(is.na(MaxTemp), mean(MaxTemp, na.rm = T), MaxTemp)) %>% - mutate(MeanTemp = ifelse(is.na(MeanTemp), mean(MeanTemp, na.rm = T), MeanTemp)) %>% - mutate(Precipitation = ifelse(is.na(Precipitation), mean(Precipitation, na.rm = T), Precipitation)) + mutate(mintemp = ifelse(is.na(mintemp), mean(mintemp, na.rm = T), mintemp)) %>% + mutate(maxtemp = ifelse(is.na(maxtemp), mean(maxtemp, na.rm = T), maxtemp)) %>% + mutate(meantemp = ifelse(is.na(meantemp), mean(meantemp, na.rm = T), meantemp)) %>% + mutate(precipitation = ifelse(is.na(precipitation), mean(precipitation, na.rm = T), precipitation)) } @@ -102,7 +102,7 @@ forecastall <- function(abundances, level, weather_data, weatherforecast, #forecasts is where we will append all forecasts for this level #all_model_aic is where we will append all model aics for this level forecasts = data.frame(date=as.Date(character()), forecastmonth=numeric(), forecastyear=numeric(), - NewMoonNumber=numeric(), currency=character(), model=character(), level=character(), + newmoonnumber=numeric(), currency=character(), model=character(), level=character(), species=character(), estimate=numeric(), LowerPI=numeric(), UpperPI=numeric()) all_model_aic = data.frame(date=as.Date(character()),currency=character(),model=character(),level=character(), species=character(), aic=numeric()) @@ -165,12 +165,12 @@ forecastall <- function(abundances, level, weather_data, weatherforecast, bind_rows(data.frame(pets[2])) #########Include columns describing the data used in the forecast############### - forecasts$fit_start_newmoon = min(abundances$NewMoonNumber) - forecasts$fit_end_newmoon = max(abundances$NewMoonNumber) - forecasts$initial_newmoon = max(abundances$NewMoonNumber) - all_model_aic$fit_start_newmoon = min(abundances$NewMoonNumber) - all_model_aic$fit_end_newmoon = max(abundances$NewMoonNumber) - all_model_aic$initial_newmoon = max(abundances$NewMoonNumber) + forecasts$fit_start_newmoon = min(abundances$newmoonnumber) + forecasts$fit_end_newmoon = max(abundances$newmoonnumber) + forecasts$initial_newmoon = max(abundances$newmoonnumber) + all_model_aic$fit_start_newmoon = min(abundances$newmoonnumber) + all_model_aic$fit_end_newmoon = max(abundances$newmoonnumber) + all_model_aic$initial_newmoon = max(abundances$newmoonnumber) #########Write forecasts to file and aics to separate files############### #Appending a csv without re-writing the header. Needed for hindcasting diff --git a/models/naive01.R b/models/naive01.R index 5ce11ea726..aa93ce730a 100644 --- a/models/naive01.R +++ b/models/naive01.R @@ -7,7 +7,7 @@ naive01=function(abundances,forecast_date,forecast_months,forecast_years,forecas model01=forecast(abundances$total,h=num_forecast_months,level=CI_level,BoxCox.lambda(0),allow.multiplicative.trend=T) -forecasts01=data.frame(date=forecast_date, forecastmonth=forecast_months,forecastyear=forecast_years, NewMoonNumber=forecast_newmoons, +forecasts01=data.frame(date=forecast_date, forecastmonth=forecast_months,forecastyear=forecast_years, newmoonnumber=forecast_newmoons, currency="abundance",model="Forecast", level=level, species="total", estimate=model01$mean, LowerPI=model01$lower[,which(model01$level==CI_level*100)], UpperPI=model01$upper[,which(model01$level==CI_level*100)]) forecasts01[sapply(forecasts01, is.ts)] <- lapply(forecasts01[sapply(forecasts01, is.ts)],unclass) diff --git a/models/naive02.R b/models/naive02.R index ae8a96532b..128b8342cc 100644 --- a/models/naive02.R +++ b/models/naive02.R @@ -5,7 +5,7 @@ naive02=function(abundances,forecast_date,forecast_months,forecast_years,forecas model=forecast(auto.arima(abundances$total,lambda = 0),h=num_forecast_months,level=CI_level,fan=T) -forecasts02=data.frame(date=forecast_date, forecastmonth=forecast_months, forecastyear=forecast_years, NewMoonNumber=forecast_newmoons, +forecasts02=data.frame(date=forecast_date, forecastmonth=forecast_months, forecastyear=forecast_years, newmoonnumber=forecast_newmoons, currency="abundance", model="AutoArima", level=level, species="total", estimate=model$mean, LowerPI=model$lower[,which(model$level==CI_level*100)], UpperPI=model$upper[,which(model$level==CI_level*100)]) forecasts02[sapply(forecasts02, is.ts)] <- lapply(forecasts02[sapply(forecasts02, is.ts)],unclass) diff --git a/models/neg_binom_ts.R b/models/neg_binom_ts.R index 8cb45717a0..cd3d695554 100644 --- a/models/neg_binom_ts.R +++ b/models/neg_binom_ts.R @@ -25,7 +25,7 @@ neg_binom_ts=function(abundances,forecast_date,forecast_months,forecast_years,fo model_aic = tryCatch(summary(model)$AIC, error = function(x) {1e6}) } newpred=data.frame(date=rep(forecast_date,num_forecast_months), forecastmonth=forecast_months, forecastyear=forecast_years, - NewMoonNumber=forecast_newmoons, currency="abundance", model=rep("NegBinom Time Series",num_forecast_months), + newmoonnumber=forecast_newmoons, currency="abundance", model=rep("NegBinom Time Series",num_forecast_months), level=level, species=rep(s,num_forecast_months), estimate=pred$pred, LowerPI=pred$interval[,1], UpperPI=pred$interval[,2]) allforecasts=rbind(allforecasts,newpred) diff --git a/models/pois_env_ts.R b/models/pois_env_ts.R index 12fdcd4b65..9f75303a23 100644 --- a/models/pois_env_ts.R +++ b/models/pois_env_ts.R @@ -10,15 +10,15 @@ pois_env_ts=function(abundances,weather_data,weathermeans,forecast_date,forecast allaic=data.frame() #List of candiate environmental covariate models -model_covariates = list(c('MaxTemp','MeanTemp','Precipitation','NDVI'), - c('MaxTemp','MinTemp','Precipitation','NDVI'), - c('MinTemp','MaxTemp','MeanTemp','Precipitation'), - c('Precipitation','NDVI'), - c('MinTemp','NDVI'), - c('MinTemp'), - c('MaxTemp'), - c('MeanTemp'), - c('Precipitation'), +model_covariates = list(c('maxtemp','meantemp','precipitation','NDVI'), + c('maxtemp','mintemp','precipitation','NDVI'), + c('mintemp','maxtemp','meantemp','precipitation'), + c('precipitation','NDVI'), + c('mintemp','NDVI'), + c('mintemp'), + c('maxtemp'), + c('meantemp'), + c('precipitation'), c('NDVI')) for(s in species) { @@ -58,7 +58,7 @@ for(s in species) { } } newpred = data.frame(date=rep(forecast_date,num_forecast_months), forecastmonth=forecast_months, forecastyear=forecast_years, - NewMoonNumber=forecast_newmoons, currency="abundance", model=rep("Poisson Env",num_forecast_months), + newmoonnumber=forecast_newmoons, currency="abundance", model=rep("Poisson Env",num_forecast_months), level=level, species=rep(s,num_forecast_months), estimate=pred$pred, LowerPI=pred$interval[,1],UpperPI=pred$interval[,2]) allforecasts = rbind(allforecasts,newpred)