Lesson 05 - Forward Propagation with Matrices

The following topics are discussed in this notebook:

  • Using matrix multiplication to perform forward propagation.
In [1]:
import numpy as np
import matplotlib.pyplot as plt

import keras
from keras.models import Sequential
from keras.layers import Dense
from tensorflow import set_random_seed

from ClassificationPlotter import plot_regions
Using TensorFlow backend.

Generate Data

In [2]:
from sklearn.datasets import make_classification
In [3]:
np.random.seed(136)
X, y = make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, n_classes=3, n_clusters_per_class=1)
#X = X + np.array([6, 13])
plt.figure(figsize=(8,6))
plt.scatter(X[:,0], X[:,1], c=y, edgecolor='k', s=60, cmap='rainbow')
plt.show()
In [4]:
print(X.shape)
print(y.shape)
(200, 2)
(200,)
In [5]:
print(y[:20])
[2 2 2 1 2 1 0 1 1 0 1 1 1 2 2 1 0 0 0 2]

Design and Train a Neural Network

In [6]:
np.random.seed(1)
set_random_seed(1)

model = Sequential()
model.add(Dense(4, input_shape=(2,), activation='relu'))
model.add(Dense(3, activation='relu'))
model.add(Dense(3, activation='softmax'))

opt = keras.optimizers.Adam(lr=0.1)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

h = model.fit(X, y, batch_size=200, epochs=200, verbose=0)

results = model.evaluate(X, y, verbose=0)

print('Training Loss:    ', results[0])
print('Training Accuracy:', results[1])
Training Loss:     0.16523193180561066
Training Accuracy: 0.91

Visualize Classification Region

In [7]:
plot_regions(model, X, y, fig_size=(8,6), keras=True)

Extract Weights from the Model

In [8]:
np.set_printoptions(precision=2, suppress=True)
In [9]:
wts = model.get_weights()
print(type(wts))
<class 'list'>
In [10]:
for w in wts:
    print(w.shape)
(2, 4)
(4,)
(4, 3)
(3,)
(3, 3)
(3,)

Layer 1 Weights

In [11]:
W1 = np.vstack((wts[1].reshape(1,4), wts[0]))
print(W1)
print(W1.shape)
[[ 0.82 -2.2  -1.37  1.4 ]
 [-2.14 -0.27  2.2   0.36]
 [ 2.68  1.39 -2.27 -1.29]]
(3, 4)

Layer 2 Weights

In [12]:
W2 = np.vstack((wts[3].reshape(1,3), wts[2]))
print(W2)
print(W2.shape)
[[-0.56  0.78  0.16]
 [-1.23  1.97  1.83]
 [-0.42  0.41 -7.89]
 [-0.5  -1.25 -1.27]
 [-0.97  1.11 -1.14]]
(5, 3)

Layer 3 Weights

In [13]:
W3 = np.vstack((wts[5].reshape(1,3), wts[4]))
print(W3)
print(W3.shape)
[[-2.31 -0.51  4.92]
 [-0.41 -1.3  -0.03]
 [-1.31  1.47 -0.66]
 [ 2.13 -2.26 -0.35]]
(4, 3)

Create Array of New Observations

In [14]:
X_new = np.array([[0, -1], [1, 0], [0, 1], [0, 0.5]])

We expect the predicted classes for these points will be: [1 or 2, 1 or 2, 0 or 2, 0]

In [15]:
print(X_new.shape)
print(W1.shape)
print(W2.shape)
print(W3.shape)
(4, 2)
(3, 4)
(5, 3)
(4, 3)

Define Activation Functions

In [16]:
def relu(M):
    return np.where(M > 0, M, 0)
In [17]:
def softmax(M):
    exp = np.exp(M)
    return exp / np.sum(exp, axis=1, keepdims=True)

Perform Forward Propagation

Calculate Output of First Hidden Layer

In [18]:
ones = np.ones((X_new.shape[0], 1))
input_matrix = np.hstack((ones, X_new))
print(input_matrix)
print(input_matrix.shape)
[[ 1.   0.  -1. ]
 [ 1.   1.   0. ]
 [ 1.   0.   1. ]
 [ 1.   0.   0.5]]
(4, 3)
In [19]:
Z1 = np.dot(input_matrix, W1)
print(Z1)
print(Z1.shape)
print()

A1 = relu(Z1)
print(A1)
print(A1.shape)
[[-1.86 -3.6   0.9   2.7 ]
 [-1.32 -2.48  0.82  1.77]
 [ 3.5  -0.81 -3.65  0.11]
 [ 2.16 -1.51 -2.51  0.75]]
(4, 4)

[[0.   0.   0.9  2.7 ]
 [0.   0.   0.82 1.77]
 [3.5  0.   0.   0.11]
 [2.16 0.   0.   0.75]]
(4, 4)

Calculate Output of Second Hidden Layer

In [20]:
ones = np.ones((A1.shape[0], 1))
input_matrix = np.hstack((ones, A1))
print(input_matrix)
print(input_matrix.shape)
[[1.   0.   0.   0.9  2.7 ]
 [1.   0.   0.   0.82 1.77]
 [1.   3.5  0.   0.   0.11]
 [1.   2.16 0.   0.   0.75]]
(4, 5)
In [21]:
Z2 = np.dot(input_matrix, W2)
print(Z2)
print(Z2.shape)
print()

A2 = relu(Z2)
print(A2)
print(A2.shape)
[[-3.63  2.65 -4.07]
 [-2.68  1.72 -2.91]
 [-4.98  7.79  6.45]
 [-3.96  5.88  3.26]]
(4, 3)

[[0.   2.65 0.  ]
 [0.   1.72 0.  ]
 [0.   7.79 6.45]
 [0.   5.88 3.26]]
(4, 3)

Calculate Output of Final Layer

In [22]:
ones = np.ones((A2.shape[0], 1))
input_matrix = np.hstack((ones, A2))
print(input_matrix)
print(input_matrix.shape)
[[1.   0.   2.65 0.  ]
 [1.   0.   1.72 0.  ]
 [1.   0.   7.79 6.45]
 [1.   0.   5.88 3.26]]
(4, 4)
In [23]:
Z3 = np.dot(input_matrix, W3)
print(Z3)
print(Z3.shape)
print()


P = softmax(Z3)
print(P)
print(P.shape)
[[-5.78  3.39  3.17]
 [-4.55  2.01  3.79]
 [ 1.22 -3.66 -2.47]
 [-3.07  0.75 -0.09]]
(4, 3)

[[0.   0.55 0.45]
 [0.   0.14 0.86]
 [0.97 0.01 0.02]
 [0.02 0.69 0.3 ]]
(4, 3)

Compare with Keras Results

In [24]:
print(model.predict(X_new))
[[0.   0.55 0.45]
 [0.   0.14 0.86]
 [0.97 0.01 0.02]
 [0.02 0.69 0.3 ]]