forked from kolasniwash/short-term-energy-demand-forecasting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_prophet.py
238 lines (131 loc) · 6.52 KB
/
model_prophet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('dark')
from sklearn.metrics import mean_absolute_error
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from statsmodels.graphics.gofplots import qqplot
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric
from model_persistence import get_persistence_dataset, train_test_split, calculate_errors, plot_error
from create_day_types import get_days_dummies
# ### Import combined data set
# In[2]:
def get_prophet_data(start_date, stop_date):
data_comb = pd.read_csv('./data/cleaned_data/energy_weather_2015_2018.csv', parse_dates=True, index_col='time')
#data_comb.head(3)
data_comb = data_comb[start_date:stop_date]
return data_comb
# In[10]:
def standard_index(data):
data = data.reset_index()
#now that the datetime index is gone be double sure our data is ordered correctly.
data.sort_values(by=['ds'], inplace=True)
return data
# In[11]:
def prophet_cv_performance(model, train, minmax_pipe):
#fit the model
model.fit(train)
#run cross validation
#cv_results = cross_validation(model, initial='365 days', period='24 hours', horizon='24 hours')
cv_results = cross_validation(model, initial='366 days', period='24 hours', horizon='24 hours')
#inverse transform the target and prediction columns
cv_results[['yhat', 'y']] = minmax_pipe.inverse_transform(cv_results[['yhat','y']])
#get performance results on the cv output
cv_performance = performance_metrics(cv_results)
#display the results
print('Model Mean Hourly MAE {0:.2f}' .format(np.mean(cv_performance['mae'])))
print('Model Mean Hourly MAPE {0:.2f}' .format(np.mean(cv_performance['mape'])))
return cv_results, cv_performance
# In[ ]:
# In[ ]:
def pca_minmax_pipes():
## apply PCA pipeline to the weather features.
#initate a min max scaler
minmax = MinMaxScaler()
#initate a PCA object to cpature 90% of dimensionality
pca = PCA(n_components=2)
#initate the pipeline for pc
pca_pipe = make_pipeline(pca, minmax)
minmax_pipe = make_pipeline(minmax)
return pca_pipe, minmax_pipe
def make_weather_data(data_comb, pca_pipe):
weather_cols = ['temp','pressure','wind_speed','rain_1h','rain_3h','snow_3h','heatdd','cooldd']
#select the weather columns and make sure there are no Nans before transform
data_weather = data_comb[weather_cols].interpolate(method='linear')
#transform the weather data
pca_weather = pca_pipe.fit_transform(data_weather)
#make dataframe with the transformed weather pca
pca_weather_df = pd.DataFrame(pca_weather, index=data_weather.index, columns=['pca1', 'pca2'])
return pca_weather_df
def make_energy_data(data_comb, minmax_pipe):
#slice out the energy data
energy_data = data_comb['actual_load']
#apply minmax scaler. reshape because passing Series into function expects 2d array
energy_minmax = minmax_pipe.fit_transform(np.reshape(energy_data.values, (-1,1)))
#convert back to dataframe
energy_minmax_df = pd.DataFrame(energy_minmax, index=energy_data.index, columns=['y'])
#energy_minmax_df.head(3)
return energy_minmax_df
def make_train_test(pca_weather_df, energy_minmax_df, split_date):
#combine the energy and weather again as scaled features
data_scaled = pd.concat([energy_minmax_df,pca_weather_df], axis=1)
data_scaled.index.name='ds'
#split in train and test
train_e_w, test_e_w = train_test_split(data_scaled, split_date)
#change and order index for prophet
train_e_w = standard_index(train_e_w)
##change and order index for prophet
test_e_w = standard_index(test_e_w)
print('Length of the train dataset {}' .format(len(train_e_w)))
print('Length of the test dataset {}' .format(len(test_e_w)))
return train_e_w, test_e_w
def initalize_model(yearly_seasonality=True, seasonality_mode='additive', weather_prior=0.8):
#weather base model initalize
model = Prophet(yearly_seasonality=yearly_seasonality,
seasonality_mode=seasonality_mode)
#add regressors for the pca weather regressors
model.add_regressor('pca1', prior_scale=weather_prior, mode='additive')
model.add_regressor('pca2', prior_scale=weather_prior, mode='additive')
return model
def run_prophet_weather_model(start_date, stop_date):
#initalize dataset. contains both energy and weather data
data_comb = get_prophet_data(start_date, stop_date)
#setup the pca nad minmax transformers
pca_pipe, minmax_pipe = pca_minmax_pipes()
#scale and prep the weather data for prophet format
pca_weather_df = make_weather_data(data_comb, pca_pipe)
#scale and prep the energy data for prophet format
energy_minmax_df = make_energy_data(data_comb, minmax_pipe)
#make the training and test sets from the energy and weather data
train_e_w, test_e_w = make_train_test(pca_weather_df, energy_minmax_df, '2017-12-31')
#setup the prophet model
weather_model_2017 = initalize_model(yearly_seasonality=True, seasonality_mode='additive', weather_prior=0.8)
return weather_model_2017, train_e_w, minmax_pipe
def process_and_save_results(weather_model_2017, train_e_w, minmax_pipe):
#fit model, run cross validation, return results, performance, and mean MAE
weather_cv_results, weather_cv_perfomance = prophet_cv_performance(weather_model_2017, train_e_w, minmax_pipe)
#plot the crossvalidated error performance
plot_cross_validation_metric(weather_cv_results, metric='mae', rolling_window=0);
path = './results/prophet/'
weather_cv_results.to_csv(path + 'weather_cv_results.csv')
weather_cv_perfomance.to_csv(path + 'weather_cv_performance.csv')
print('Results saved at {}'.format(path))
print('Running Crossvalidation of Prophet Weather Model with the following parameters')
print('Yearly seasonality is TRUE')
print('Seasonality mode is ADDITIVE')
print('Weather prior is 0.8')
print('Inital training period of cross validation is 365 days')
print('Type: Multivariate')
weather_model_2017, train_e_w, minmax_pipe = run_prophet_weather_model('2016-01-01', '2018-03-31')
print('Saving results...')
process_and_save_results(weather_model_2017, train_e_w, minmax_pipe)
print('Done')