Skip to content

Commit

Permalink
code for predicitng sine curvé
Browse files Browse the repository at this point in the history
  • Loading branch information
akash13singh committed Mar 1, 2017
1 parent 5e0f4bd commit 6598dfe
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
LSTMs for timeseries
LSTMs for timeseries.
Adapted from <https://github.com/tgjeon/TensorFlow-Tutorials-for-Time-Series/>

124 changes: 124 additions & 0 deletions lstm_predictior.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.framework import dtypes
from tensorflow.contrib import learn as tflearn
from tensorflow.contrib import layers as tflayers
from tensorflow.contrib import rnn

def rnn_data(data, time_steps, labels=False):
"""
creates new data frame based on previous observation
* example:
l = [1, 2, 3, 4, 5]
time_steps = 2
-> labels == False [[1, 2], [2, 3], [3, 4]] #Data frame for input with 2 timesteps
-> labels == True [3, 4, 5] # labels for predicting the next timestep
"""
rnn_df = []
for i in range(len(data) - time_steps):
if labels:
try:
rnn_df.append(data.iloc[i + time_steps].as_matrix())
except AttributeError:
rnn_df.append(data.iloc[i + time_steps])
else:
data_ = data.iloc[i: i + time_steps].as_matrix()
rnn_df.append(data_ if len(data_.shape) > 1 else [[i] for i in data_])

return np.array(rnn_df, dtype=np.float32)



def split_data(data, val_size=0.1, test_size=0.1):
"""
splits data to training, validation and testing parts
"""
ntest = int(round(len(data) * (1 - test_size)))
nval = int(round(len(data.iloc[:ntest]) * (1 - val_size)))

df_train, df_val, df_test = data.iloc[:nval], data.iloc[nval:ntest], data.iloc[ntest:]

return df_train, df_val, df_test



def prepare_data(data, time_steps, labels=False, val_size=0.1, test_size=0.1):
"""
Given the number of `time_steps` and some data,
prepares training, validation and test data for an lstm cell.
"""
df_train, df_val, df_test = split_data(data, val_size, test_size)
return (rnn_data(df_train, time_steps, labels=labels),
rnn_data(df_val, time_steps, labels=labels),
rnn_data(df_test, time_steps, labels=labels))

def load_csvdata(rawdata, time_steps, seperate=False):
data = rawdata
if not isinstance(data, pd.DataFrame):
data = pd.DataFrame(data)

train_x, val_x, test_x = prepare_data(data['a'] if seperate else data, time_steps)
train_y, val_y, test_y = prepare_data(data['b'] if seperate else data, time_steps, labels=True)
return dict(train=train_x, val=val_x, test=test_x), dict(train=train_y, val=val_y, test=test_y)


def generate_data(fct, x, time_steps, seperate=False):
"""generates data with based on a function fct"""
data = fct(x)
if not isinstance(data, pd.DataFrame):
data = pd.DataFrame(data)
train_x, val_x, test_x = prepare_data(data['a'] if seperate else data, time_steps)
train_y, val_y, test_y = prepare_data(data['b'] if seperate else data, time_steps, labels=True)
return dict(train=train_x, val=val_x, test=test_x), dict(train=train_y, val=val_y, test=test_y)


def lstm_model(num_units, rnn_layers, dense_layers=None, learning_rate=0.1, optimizer='Adagrad'):
"""
Creates a deep model based on:
* stacked lstm cells
* an optional dense layers
:param num_units: the size of the cells.
:param rnn_layers: list of int or dict
* list of int: the steps used to instantiate the `BasicLSTMCell` cell
* list of dict: [{steps: int, keep_prob: int}, ...]
:param dense_layers: list of nodes for each layer
:return: the model definition
"""

def lstm_cells(layers):
if isinstance(layers[0], dict):
return [tf.nn.rnn_cell.DropoutWrapper(rnn.BasicLSTMCell(layer['num_units'],
state_is_tuple=True),
layer['keep_prob'])
if layer.get('keep_prob') else rnn.BasicLSTMCell(layer['num_units'],
state_is_tuple=True)
for layer in layers]
return [rnn.BasicLSTMCell(steps, state_is_tuple=True) for steps in layers]

def dnn_layers(input_layers, layers):
if layers and isinstance(layers, dict):
return tflayers.stack(input_layers, tflayers.fully_connected,
layers['layers'],
activation=layers.get('activation'),
dropout=layers.get('dropout'))
elif layers:
return tflayers.stack(input_layers, tflayers.fully_connected, layers)
else:
return input_layers

def _lstm_model(X, y):
stacked_lstm = rnn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True)
x_ = tf.unstack(X, num=num_units, axis=1)

output, layers = rnn.static_rnn(stacked_lstm, x_, dtype=dtypes.float32)
output = dnn_layers(output[-1], dense_layers)
prediction, loss = tflearn.models.linear_regression(output, y)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.contrib.framework.get_global_step(), optimizer=optimizer,
learning_rate=learning_rate)
return prediction, loss, train_op

return _lstm_model


45 changes: 45 additions & 0 deletions lstm_sin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import numpy as np
import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt

from tensorflow.contrib import learn
from sklearn.metrics import mean_squared_error

from lstm_predictior import generate_data, lstm_model
LOG_DIR = 'resources/logs/'
TIMESTEPS = 3
RNN_LAYERS = [{'num_units': 5}]
DENSE_LAYERS = None
TRAINING_STEPS = 100
PRINT_STEPS = TRAINING_STEPS / 10
BATCH_SIZE = 100

regressor = learn.Estimator(model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS),
model_dir=LOG_DIR)

X, y = generate_data(np.sin, np.linspace(0, 100, 10000, dtype=np.float32), TIMESTEPS, seperate=False)

# create a lstm instance and validation monitor
validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'],
every_n_steps=PRINT_STEPS,
early_stopping_rounds=1000)
# print(X['train'])
# print(y['train'])

regressor.fit(X['train'], y['train'],
monitors=[validation_monitor],
batch_size=BATCH_SIZE,
steps=TRAINING_STEPS)

print X['test'].shape
print y['test'].shape
predicted = regressor.predict(X['test'],as_iterable=False)
#rmse = np.sqrt(((predicted - y['test']) ** 2).mean(axis=0))
score = mean_squared_error(predicted, y['test'])
print ("MSE: %f" % score)

plot_predicted, = plt.plot(predicted, label='predicted')
plot_test, = plt.plot(y['test'], label='test')
plt.legend(handles=[plot_predicted, plot_test])
plt.show()

0 comments on commit 6598dfe

Please sign in to comment.