-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5e0f4bd
commit 6598dfe
Showing
3 changed files
with
172 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
LSTMs for timeseries | ||
LSTMs for timeseries. | ||
Adapted from <https://github.com/tgjeon/TensorFlow-Tutorials-for-Time-Series/> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import tensorflow as tf | ||
from tensorflow.python.framework import dtypes | ||
from tensorflow.contrib import learn as tflearn | ||
from tensorflow.contrib import layers as tflayers | ||
from tensorflow.contrib import rnn | ||
|
||
def rnn_data(data, time_steps, labels=False): | ||
""" | ||
creates new data frame based on previous observation | ||
* example: | ||
l = [1, 2, 3, 4, 5] | ||
time_steps = 2 | ||
-> labels == False [[1, 2], [2, 3], [3, 4]] #Data frame for input with 2 timesteps | ||
-> labels == True [3, 4, 5] # labels for predicting the next timestep | ||
""" | ||
rnn_df = [] | ||
for i in range(len(data) - time_steps): | ||
if labels: | ||
try: | ||
rnn_df.append(data.iloc[i + time_steps].as_matrix()) | ||
except AttributeError: | ||
rnn_df.append(data.iloc[i + time_steps]) | ||
else: | ||
data_ = data.iloc[i: i + time_steps].as_matrix() | ||
rnn_df.append(data_ if len(data_.shape) > 1 else [[i] for i in data_]) | ||
|
||
return np.array(rnn_df, dtype=np.float32) | ||
|
||
|
||
|
||
def split_data(data, val_size=0.1, test_size=0.1): | ||
""" | ||
splits data to training, validation and testing parts | ||
""" | ||
ntest = int(round(len(data) * (1 - test_size))) | ||
nval = int(round(len(data.iloc[:ntest]) * (1 - val_size))) | ||
|
||
df_train, df_val, df_test = data.iloc[:nval], data.iloc[nval:ntest], data.iloc[ntest:] | ||
|
||
return df_train, df_val, df_test | ||
|
||
|
||
|
||
def prepare_data(data, time_steps, labels=False, val_size=0.1, test_size=0.1): | ||
""" | ||
Given the number of `time_steps` and some data, | ||
prepares training, validation and test data for an lstm cell. | ||
""" | ||
df_train, df_val, df_test = split_data(data, val_size, test_size) | ||
return (rnn_data(df_train, time_steps, labels=labels), | ||
rnn_data(df_val, time_steps, labels=labels), | ||
rnn_data(df_test, time_steps, labels=labels)) | ||
|
||
def load_csvdata(rawdata, time_steps, seperate=False): | ||
data = rawdata | ||
if not isinstance(data, pd.DataFrame): | ||
data = pd.DataFrame(data) | ||
|
||
train_x, val_x, test_x = prepare_data(data['a'] if seperate else data, time_steps) | ||
train_y, val_y, test_y = prepare_data(data['b'] if seperate else data, time_steps, labels=True) | ||
return dict(train=train_x, val=val_x, test=test_x), dict(train=train_y, val=val_y, test=test_y) | ||
|
||
|
||
def generate_data(fct, x, time_steps, seperate=False): | ||
"""generates data with based on a function fct""" | ||
data = fct(x) | ||
if not isinstance(data, pd.DataFrame): | ||
data = pd.DataFrame(data) | ||
train_x, val_x, test_x = prepare_data(data['a'] if seperate else data, time_steps) | ||
train_y, val_y, test_y = prepare_data(data['b'] if seperate else data, time_steps, labels=True) | ||
return dict(train=train_x, val=val_x, test=test_x), dict(train=train_y, val=val_y, test=test_y) | ||
|
||
|
||
def lstm_model(num_units, rnn_layers, dense_layers=None, learning_rate=0.1, optimizer='Adagrad'): | ||
""" | ||
Creates a deep model based on: | ||
* stacked lstm cells | ||
* an optional dense layers | ||
:param num_units: the size of the cells. | ||
:param rnn_layers: list of int or dict | ||
* list of int: the steps used to instantiate the `BasicLSTMCell` cell | ||
* list of dict: [{steps: int, keep_prob: int}, ...] | ||
:param dense_layers: list of nodes for each layer | ||
:return: the model definition | ||
""" | ||
|
||
def lstm_cells(layers): | ||
if isinstance(layers[0], dict): | ||
return [tf.nn.rnn_cell.DropoutWrapper(rnn.BasicLSTMCell(layer['num_units'], | ||
state_is_tuple=True), | ||
layer['keep_prob']) | ||
if layer.get('keep_prob') else rnn.BasicLSTMCell(layer['num_units'], | ||
state_is_tuple=True) | ||
for layer in layers] | ||
return [rnn.BasicLSTMCell(steps, state_is_tuple=True) for steps in layers] | ||
|
||
def dnn_layers(input_layers, layers): | ||
if layers and isinstance(layers, dict): | ||
return tflayers.stack(input_layers, tflayers.fully_connected, | ||
layers['layers'], | ||
activation=layers.get('activation'), | ||
dropout=layers.get('dropout')) | ||
elif layers: | ||
return tflayers.stack(input_layers, tflayers.fully_connected, layers) | ||
else: | ||
return input_layers | ||
|
||
def _lstm_model(X, y): | ||
stacked_lstm = rnn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True) | ||
x_ = tf.unstack(X, num=num_units, axis=1) | ||
|
||
output, layers = rnn.static_rnn(stacked_lstm, x_, dtype=dtypes.float32) | ||
output = dnn_layers(output[-1], dense_layers) | ||
prediction, loss = tflearn.models.linear_regression(output, y) | ||
train_op = tf.contrib.layers.optimize_loss( | ||
loss, tf.contrib.framework.get_global_step(), optimizer=optimizer, | ||
learning_rate=learning_rate) | ||
return prediction, loss, train_op | ||
|
||
return _lstm_model | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import tensorflow as tf | ||
from matplotlib import pyplot as plt | ||
|
||
from tensorflow.contrib import learn | ||
from sklearn.metrics import mean_squared_error | ||
|
||
from lstm_predictior import generate_data, lstm_model | ||
LOG_DIR = 'resources/logs/' | ||
TIMESTEPS = 3 | ||
RNN_LAYERS = [{'num_units': 5}] | ||
DENSE_LAYERS = None | ||
TRAINING_STEPS = 100 | ||
PRINT_STEPS = TRAINING_STEPS / 10 | ||
BATCH_SIZE = 100 | ||
|
||
regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), | ||
model_dir=LOG_DIR) | ||
|
||
X, y = generate_data(np.sin, np.linspace(0, 100, 10000, dtype=np.float32), TIMESTEPS, seperate=False) | ||
|
||
# create a lstm instance and validation monitor | ||
validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'], | ||
every_n_steps=PRINT_STEPS, | ||
early_stopping_rounds=1000) | ||
# print(X['train']) | ||
# print(y['train']) | ||
|
||
regressor.fit(X['train'], y['train'], | ||
monitors=[validation_monitor], | ||
batch_size=BATCH_SIZE, | ||
steps=TRAINING_STEPS) | ||
|
||
print X['test'].shape | ||
print y['test'].shape | ||
predicted = regressor.predict(X['test'],as_iterable=False) | ||
#rmse = np.sqrt(((predicted - y['test']) ** 2).mean(axis=0)) | ||
score = mean_squared_error(predicted, y['test']) | ||
print ("MSE: %f" % score) | ||
|
||
plot_predicted, = plt.plot(predicted, label='predicted') | ||
plot_test, = plt.plot(y['test'], label='test') | ||
plt.legend(handles=[plot_predicted, plot_test]) | ||
plt.show() |