Lesson 19 - Recurrent Neural Networks

The following topics are discussed in this notebook:

  • The architecture of an artificial neural network.
  • Building neural networks in keras.

Additional Resources

  • Deep Learning with Python, Chapter 6
In [1]:
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import set_random_seed
import keras
from keras.models import Sequential
from keras.layers import *
Using TensorFlow backend.
In [2]:
%run -i "C:\Users\justb\Dropbox\Code\Python\viztools.py"
#%run -i "C:\Users\rbeane\Dropbox\Code\Python\viztools.py"

Example 1

In [3]:
np.random.seed(1)

X = np.random.uniform(-1,1,[1000,6,1])
H = np.zeros(1000)
A = np.zeros([1000,6,1])

for i in range(0,6):
    Z1 = 0.4 * X[:,i,0] + 1.4 * H + 0.1
    A1 = np.tanh(Z1)
    H = A1
    Z2 = 2.8 * A1 - 2
    A2 = 1 / (1 + np.exp(-Z2))
    A[:,i,0] = A2
    

roll = np.random.uniform(0, 1, 1000)
roll = 0.2 * roll + 0.4

y = np.where(A[:,-1,0] <= roll, 0, 1)

#print(np.sum(y))
#print(np.sum(y == y_hard))

Train/Validation Split

In [4]:
X_train, X_val = X[:800], X[800:]
y_train, y_val = y[:800], y[800:]
In [5]:
print(X_train.shape)
print(X_val.shape)
(800, 6, 1)
(200, 6, 1)

Basic Feed Forward Network

In [6]:
np.random.seed(1)
set_random_seed(1)

model = Sequential()

model.add(Dense(16, activation='relu', input_shape=(6,)))
model.add(Dense(1, activation='sigmoid'))

model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 16)                112       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
=================================================================
Total params: 129
Trainable params: 129
Non-trainable params: 0
_________________________________________________________________
In [7]:
opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

h = model.fit(X_train[:,:,0], y_train, epochs=4000, batch_size=800,
              verbose=0, validation_data=[X_val[:,:,0], y_val])

print('Training Metrics:  ', model.evaluate(X_train[:,:,0], y_train, verbose=0))
print('Validation Metrics:', model.evaluate(X_val[:,:,0], y_val, verbose=0))

vis_training(h)
Training Metrics:   [0.1584397777915001, 0.93]
Validation Metrics: [0.2803668689727783, 0.87]

Simple Recurrent Neural Network

In [8]:
np.random.seed(1)
set_random_seed(1)

# create and fit the RNN network
model = Sequential()
model.add(SimpleRNN(1, activation='tanh', return_sequences=False, input_shape=(6,1)))
model.add(Dense(1, activation='sigmoid'))

model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
simple_rnn_1 (SimpleRNN)     (None, 1)                 3         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 2         
=================================================================
Total params: 5
Trainable params: 5
Non-trainable params: 0
_________________________________________________________________
In [9]:
opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

h = model.fit(X_train, y_train, epochs=4000, batch_size=800, verbose=0, validation_data=[X_val, y_val])

print('Training Metrics:  ', model.evaluate(X_train, y_train, verbose=0))
print('Validation Metrics:', model.evaluate(X_val, y_val, verbose=0))

vis_training(h)
Training Metrics:   [0.24404405266046525, 0.89625]
Validation Metrics: [0.26740086913108824, 0.89]

Example 2

In [10]:
np.random.seed(1)

length = 10
abc = np.array(['a','b','c','n'])
X = np.random.choice(abc, 1000 * length, p=[0.2,0.2,0.2,0.4]).reshape(1000, length)
abc_tile = np.repeat(abc.reshape(1,1,4), 1000, axis=0)
X_enc = (X.reshape(1000, length, 1) == abc_tile).astype('int')

# Determine classes
index = {'a':0,'b':1,'c':2}
points = np.arange(1,length+1) + 10.0**(-length+np.arange(1,length+1)-1)

y = np.zeros(1000).astype('int')
scores = []

for i in range(1000):
    obs = X[i]
    
    neg_next = False
    score = [0.,0.,0.]
    for j in range(length):
        if(neg_next):
            if(obs[j] == 'n'):
                neg_next = False
            else:
                neg_next = False
                score[index[obs[j]]] -= points[j]
        else:
            if(obs[j] == 'n'):
                neg_next = True
            else:
                score[index[obs[j]]] += points[j]
    scores.append(score)
    y[i] = np.argmax(score)
    
scores = np.array(scores)

print(X.shape)
print(X_enc.shape)
print(y.shape)
(1000, 10)
(1000, 10, 4)
(1000,)
In [11]:
n = np.random.choice(range(1000))

print('X[n] = ', X[n], '\n')
print('scores[n] = ', scores[n].astype('int'), '\n')
print('y[n] = ', y[n], '\n')
print('X_enc[n] = \n', X_enc[n])
X[n] =  ['b' 'c' 'n' 'n' 'c' 'a' 'a' 'c' 'a' 'n'] 

scores[n] =  [22  1 15] 

y[n] =  0 

X_enc[n] = 
 [[0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]
 [0 0 0 1]
 [0 0 1 0]
 [1 0 0 0]
 [1 0 0 0]
 [0 0 1 0]
 [1 0 0 0]
 [0 0 0 1]]

Train/Validation Split

In [12]:
X_train, X_val = X_enc[:800], X_enc[800:]
y_train, y_val = y[:800], y[800:]

Basic Feed Forward Network

In [13]:
np.random.seed(1)
set_random_seed(1)

model = Sequential()

model.add(Dense(16, activation='relu', input_shape=(40,)))
model.add(Dense(3, activation='softmax'))

opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_4 (Dense)              (None, 16)                656       
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 51        
=================================================================
Total params: 707
Trainable params: 707
Non-trainable params: 0
_________________________________________________________________
In [14]:
h = model.fit(X_train.reshape(-1,40), y_train, epochs=1000, batch_size=1000,
              verbose=0, validation_data=[X_val.reshape(-1,40), y_val])

print(model.evaluate(X_train.reshape(-1,40), y_train, verbose=0))
print(model.evaluate(X_val.reshape(-1,40), y_val, verbose=0))

vis_training(h)
[0.30180990815162656, 0.91125]
[1.1883641624450683, 0.6]

Simple Recurrent Neural Network

In [15]:
np.random.seed(2)
set_random_seed(2)

model = Sequential()
model.add(SimpleRNN(6, activation='tanh', return_sequences=False, input_shape=(10,4)))
model.add(Dense(3, activation='softmax'))

model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
simple_rnn_2 (SimpleRNN)     (None, 6)                 66        
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 21        
=================================================================
Total params: 87
Trainable params: 87
Non-trainable params: 0
_________________________________________________________________
In [16]:
opt = keras.optimizers.Adam(lr=0.001)

model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
h = model.fit(X_train, y_train, epochs=10000, batch_size=800, verbose=0, 
              validation_data=[X_val, y_val])

print(model.evaluate(X_train, y_train, verbose=0))
print(model.evaluate(X_val, y_val, verbose=0))

vis_training(h)
[0.07405739799141883, 0.9825]
[0.35565623369067906, 0.93]

LSTM Network

In [17]:
np.random.seed(1)
set_random_seed(1)

model = Sequential()
model.add(LSTM(6, activation='tanh', recurrent_activation='hard_sigmoid', 
               return_sequences=False, input_shape=(10,4)))
model.add(Dense(3, activation='softmax'))

model.summary()
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 6)                 264       
_________________________________________________________________
dense_7 (Dense)              (None, 3)                 21        
=================================================================
Total params: 285
Trainable params: 285
Non-trainable params: 0
_________________________________________________________________
In [18]:
opt = keras.optimizers.Adam(lr=0.001)

model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
h = model.fit(X_train, y_train, epochs=2000, batch_size=800, verbose=0, 
              validation_data=[X_val, y_val])

print(model.evaluate(X_train, y_train, verbose=0))
print(model.evaluate(X_val, y_val, verbose=0))

vis_training(h)
[0.07870874039828778, 0.99375]
[0.3731316101551056, 0.855]