Lesson 20 - Recurrent Neural Networks for Time Series

In [1]:
import numpy as np
import matplotlib.pyplot as plt
#import pandas as pd
import math

import keras
from keras.layers import Input
from keras.models import Sequential
from keras.layers.core import Dense, Activation 
from keras.layers.recurrent import SimpleRNN, LSTM

from tensorflow import set_random_seed
Using TensorFlow backend.
In [2]:
%run -i "C:\Users\justb\Dropbox\Code\Python\viztools.py"
#%run -i "C:\Users\rbeane\Dropbox\Code\Python\viztools.py"

Helpful Functions

In [3]:
def sine_curve(length=500, freq=1, amp=1, noise=0):
    x = np.linspace(0,length-1,length)
    return amp*np.sin(x*freq*2*math.pi/360) + np.random.normal(0, noise, length)
    
In [4]:
plt.plot(sine_curve(length=360, freq=1, amp=1))
plt.plot(sine_curve(length=360, freq=2, amp=1))
plt.plot(sine_curve(length=360, freq=2, amp=3, noise=0.2))
plt.plot(sine_curve(length=540, freq=0.5, amp=0.5))
plt.show()
In [5]:
def make_samples(data, input_length, output_length):
    
    X = []
    y = []
    n_samples = len(data) + 1 - output_length - input_length
    
    for i in range(n_samples):
        i1 = i + input_length
        i2 = i + input_length + output_length
        X.append(data[i : i1])
        y.append(data[i1 : i2])
        
    return np.array(X)[:,:,np.newaxis], np.array(y)

Example 1

In [6]:
seq_len = 360 * 5
in_len = 60
out_len = 1
sample_len = in_len + out_len

cut = 360 * 3

n_samples = seq_len + 1 - sample_len

n_train = cut + 1 - sample_len
n_val = seq_len - cut

print('Sequence Length:   ', seq_len)
print('Number of samples: ', n_samples)
print('Training samples:  ', n_train)
print('Validation samples:', n_val)
Sequence Length:    1800
Number of samples:  1740
Training samples:   1020
Validation samples: 720

Generate Data

In [7]:
np.random.seed(1)
seq = sine_curve(length=360*5, noise=0.1)

X, y = make_samples(seq, in_len, out_len)

print('seq shape:', seq.shape)
print('X shape:  ', X.shape)
print('y shape:  ', y.shape)
seq shape: (1800,)
X shape:   (1740, 60, 1)
y shape:   (1740, 1)

Training and Validation Sets

In [8]:
# Sequences
seq_train, seq_val = seq[:cut], seq[cut:]

# Timestep ranges
t_train = range(0, cut)
t_val = range(cut, seq_len)

# Training and Validation Samples 
X_train, X_val = X[:n_train,:], X[n_train:,:]
y_train, y_val = y[:n_train], y[n_train:]

print('Training set shapes:')
print('seq:', seq_train.shape)
print('X:  ', X_train.shape)
print('y:  ', y_train.shape)

print()
print('Validation set shapes:')
print('seq:', seq_val.shape)
print('X:  ', X_val.shape)
print('y:  ', y_val.shape)
Training set shapes:
seq: (1080,)
X:   (1020, 60, 1)
y:   (1020, 1)

Validation set shapes:
seq: (720,)
X:   (720, 60, 1)
y:   (720, 1)
In [9]:
plt.figure(figsize=[8,6])
plt.plot(t_train, seq_train, label='Training')
plt.plot(t_val, seq_val, label='Validation')
plt.legend()
plt.show()
curve_train, curve_val = curve[lag:1080], curve[1080:] r_train = range(lag,1080) r_val = range(1080,1800) X_train, X_val = X[:(1080-lag),:], X[(1080-lag):,:] y_train, y_val = y[:(1080-lag)], y[(1080-lag):] print('Training set shapes:') print(curve_train.shape) print(X_train.shape) print(y_train.shape) print() print('Validation set shapes:') print(curve_val.shape) print(X_val.shape) print(y_val.shape)

Create Model: Single Layer SimpleRNN

In [10]:
%%time

np.random.seed(1)
set_random_seed(1)

model = Sequential()
model.add(SimpleRNN(4, activation='tanh', input_shape=(in_len,1), 
                       return_sequences=False))
model.add(Dense(out_len))

opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='mean_squared_error', optimizer=opt)

h = model.fit(X_train, y_train, epochs=1000, batch_size=50, verbose=0, 
              validation_data=[X_val, y_val])

print(model.evaluate(X_train, y_train, verbose=0))
print(model.evaluate(X_val, y_val, verbose=0))
0.012091113400517725
0.013021708942121929
Wall time: 18min 19s
In [11]:
lag = 0
plt.figure(figsize=[6,4])
plt.plot(range(lag, 1000), h.history['loss'][lag:], label='Training')
plt.plot(range(lag, 1000), h.history['val_loss'][lag:], label='Validation')
plt.legend()
plt.show()

Calculating Validation MSE

In [12]:
val_pred = model.predict(X_val)
print(np.mean( (y_val - val_pred)**2))
0.013021708729046462

Plot Predictions

In [13]:
pred = model.predict(X)
print(pred.shape)
(1740, 1)
In [14]:
plt.figure(figsize=[8,6])
plt.plot(t_train, seq_train, linewidth=2, label='Training')
plt.plot(t_val, seq_val, linewidth=2, label='Validation')

plt.plot(range(in_len,1800), pred, linewidth=1, label='Prediction', c='k')

plt.legend(bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
plt.show()

Forecasting

In [15]:
forecast = seq_train

for i in range(0, 720):
    X_temp = forecast[-in_len:]
    X_temp = X_temp.reshape(1,in_len,1)
    new_val = model.predict(X_temp)
    forecast = np.hstack([forecast, new_val[0,0]])

plt.figure(figsize=[8,6])
plt.plot(t_train, seq_train, linewidth=4, label='Training')
plt.plot(t_val, seq_val, linewidth=4, label='Validation')

plt.plot(forecast, linewidth=1, label='Prediction', c='k')

plt.legend(bbox_to_anchor=(1, 1), loc='upper left', ncol=1)

plt.show()
In [16]:
print(np.mean( (y_val - forecast[cut:])**2))
1.2733513985970055

Example 2

In [17]:
seq_len = 360 * 6
in_len = 360
out_len = 180
sample_len = in_len + out_len

cut = 360 * 4

n_samples = seq_len + 1 - sample_len

n_train = cut + 1 - sample_len
n_val = seq_len - cut

print('Sequence Length:   ', seq_len)
print('Number of samples: ', n_samples)
print('Training samples:  ', n_train)
print('Validation samples:', n_val)
Sequence Length:    2160
Number of samples:  1621
Training samples:   901
Validation samples: 720

Generate Data

In [18]:
np.random.seed(1)
seq = sine_curve(length=seq_len, noise=0.1)

X, y = make_samples(seq, in_len, out_len)

print('seq shape:', seq.shape)
print('X shape:  ', X.shape)
print('y shape:  ', y.shape)
seq shape: (2160,)
X shape:   (1621, 360, 1)
y shape:   (1621, 180)

Training and Validation Sets

In [19]:
# Sequences
seq_train, seq_val = seq[:cut], seq[cut:]

# Timestep ranges
t_train = range(0, cut)
t_val = range(cut, seq_len)

# Training and Validation Samples 
X_train, X_val = X[:n_train,:], X[n_train:,:]
y_train, y_val = y[:n_train], y[n_train:]

print('Training set shapes:')
print('seq:', seq_train.shape)
print('X:  ', X_train.shape)
print('y:  ', y_train.shape)

print()
print('Validation set shapes:')
print('seq:', seq_val.shape)
print('X:  ', X_val.shape)
print('y:  ', y_val.shape)
Training set shapes:
seq: (1440,)
X:   (901, 360, 1)
y:   (901, 180)

Validation set shapes:
seq: (720,)
X:   (720, 360, 1)
y:   (720, 180)
In [20]:
plt.figure(figsize=[8,6])
plt.plot(t_train, seq_train, label='Training')
plt.plot(t_val, seq_val, label='Validation')
plt.legend()
plt.show()

Create Model: Single Layer SimpleRNN

In [21]:
%%time

np.random.seed(1)
set_random_seed(1)

model = Sequential()
model.add(SimpleRNN(4, activation='tanh', input_shape=(in_len,1), 
                       return_sequences=False))
model.add(Dense(out_len))

opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='mean_squared_error', optimizer=opt)

h = model.fit(X_train, y_train, epochs=1000, batch_size=1000, verbose=0, 
              validation_data=[X_val, y_val])

print(model.evaluate(X_train, y_train, verbose=0))
print(model.evaluate(X_val, y_val, verbose=0))
0.01767716907609779
0.019502141368057994
Wall time: 6min 10s
In [22]:
lag = 0
plt.figure(figsize=[6,4])
plt.plot(range(lag, 1000), h.history['loss'][lag:], label='Training')
plt.plot(range(lag, 1000), h.history['val_loss'][lag:], label='Validation')
plt.legend()
plt.show()

Calculating Validation MSE

In [23]:
val_pred = model.predict(X_val)
print(np.mean( (y_val - val_pred)**2))
0.019502141210698272

Plot Predictions

In [24]:
pred = model.predict(X)
print(pred.shape)
(1621, 180)
In [25]:
ts1 = cut
ts2 = cut + 240
ts3 = cut + 540

input1 = seq[(ts1 - in_len):ts1].reshape(1,in_len,1)
output1 = model.predict(input1)

input2 = seq[(ts2 - in_len):ts2].reshape(1,in_len,1)
output2 = model.predict(input2)

input3 = seq[(ts3 - in_len):ts3].reshape(1,in_len,1)
output3 = model.predict(input3)

plt.figure(figsize=[8,6])
plt.plot(t_train, seq_train, linewidth=2, label='Training')
plt.plot(t_val, seq_val, linewidth=2, label='Validation')

plt.plot(range(ts1, ts1+out_len), output1[0], c='k', label='Prediction')
plt.plot(range(ts2, ts2+out_len), output2[0], c='k', label='Prediction')
plt.plot(range(ts3, ts3+out_len), output3[0], c='k', label='Prediction')

plt.legend(bbox_to_anchor=(1, 1), loc='upper left', ncol=1)
plt.show()

Example 3

In [26]:
seq_len = 360 * 4
in_len = 720
out_len = 360

sequences = 250

np.random.seed(1)
X_list = []
y_list = []
for i in range(sequences):
    #freq = np.random.uniform(1,3)
    freq = 1
    amp = np.random.uniform(1,2)
    noise = np.random.uniform(0.05, 0.2)
    seq = sine_curve(seq_len, freq, amp, noise)
    X_temp, y_temp = make_samples(seq, in_len, out_len) 

    X_list.append(X_temp)
    y_list.append(y_temp)
    
    
X = np.vstack(X_list)
y = np.vstack(y_list)
    
print(X.shape)
print(y.shape)
    
(90250, 720, 1)
(90250, 360)

Create Model: Single Layer SimpleRNN

In [27]:
%%time

np.random.seed(1)
set_random_seed(1)

model = Sequential()
model.add(SimpleRNN(16, activation='tanh', input_shape=(in_len,1), 
                       return_sequences=True))
model.add(SimpleRNN(16, activation='tanh', return_sequences=False))
model.add(Dense(out_len))

opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='mean_squared_error', optimizer=opt)

h = model.fit(X, y, epochs=25, batch_size=256, verbose=1)

print(model.evaluate(X, y, verbose=0))
Epoch 1/25
90250/90250 [==============================] - 292s 3ms/step - loss: 0.3566
Epoch 2/25
90250/90250 [==============================] - 287s 3ms/step - loss: 0.0816
Epoch 3/25
90250/90250 [==============================] - 289s 3ms/step - loss: 0.0698
Epoch 4/25
90250/90250 [==============================] - 296s 3ms/step - loss: 0.0598
Epoch 5/25
90250/90250 [==============================] - 305s 3ms/step - loss: 0.0546
Epoch 6/25
90250/90250 [==============================] - 312s 3ms/step - loss: 0.0487
Epoch 7/25
90250/90250 [==============================] - 299s 3ms/step - loss: 0.0807
Epoch 8/25
90250/90250 [==============================] - 298s 3ms/step - loss: 0.0714
Epoch 9/25
90250/90250 [==============================] - 294s 3ms/step - loss: 0.0715
Epoch 10/25
90250/90250 [==============================] - 303s 3ms/step - loss: 0.0619
Epoch 11/25
90250/90250 [==============================] - 342s 4ms/step - loss: 0.0567
Epoch 12/25
90250/90250 [==============================] - 300s 3ms/step - loss: 0.0552
Epoch 13/25
90250/90250 [==============================] - 316s 4ms/step - loss: 0.0536
Epoch 14/25
90250/90250 [==============================] - 323s 4ms/step - loss: 0.0497
Epoch 15/25
90250/90250 [==============================] - 318s 4ms/step - loss: 0.0522
Epoch 16/25
90250/90250 [==============================] - 319s 4ms/step - loss: 0.0387
Epoch 17/25
90250/90250 [==============================] - 318s 4ms/step - loss: 0.0338
Epoch 18/25
90250/90250 [==============================] - 311s 3ms/step - loss: 0.0286
Epoch 19/25
90250/90250 [==============================] - 308s 3ms/step - loss: 0.0270
Epoch 20/25
90250/90250 [==============================] - 309s 3ms/step - loss: 0.0256
Epoch 21/25
90250/90250 [==============================] - 309s 3ms/step - loss: 0.0246
Epoch 22/25
90250/90250 [==============================] - 316s 4ms/step - loss: 0.0238
Epoch 23/25
90250/90250 [==============================] - 324s 4ms/step - loss: 0.0230
Epoch 24/25
90250/90250 [==============================] - 313s 3ms/step - loss: 0.0229
Epoch 25/25
90250/90250 [==============================] - 365s 4ms/step - loss: 0.0230
0.023067812756075425
Wall time: 2h 35min 49s
In [28]:
lag = 0
plt.figure(figsize=[6,4])
plt.plot(h.history['loss'][lag:], label='Training')
plt.legend()
plt.show()
In [29]:
freq = 1
amp = np.random.uniform(0.5,3)
noise = np.random.uniform(0.05, 0.2)

test_seq = sine_curve(720 + 360, freq, amp, noise)

X_input = test_seq[:720].reshape(1,720,1)
forecast = model.predict(X_input).reshape(360,)


plt.figure(figsize=[8,6])

plt.plot(range(0,720), test_seq[:720], label='Input')

plt.plot(range(720,1080), test_seq[720:], label='True Values')

plt.plot(range(720,1080), forecast, label='Forecast')

plt.ylim([-2.5,2.5])
plt.legend()
plt.show()