Lesson 09 - Fashion MNIST CNN

The following topics are discussed in this notebook:

  • Using a neural network for classifying image data.
In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd

import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from tensorflow import set_random_seed
import tensorflow as tf

from sklearn.model_selection import train_test_split
from keras.datasets import fashion_mnist
Using TensorFlow backend.

Load the Fashion MNIST Data

The MNIST dataset consists of 70,000, 28x28 grayscale images split into 10 fashion categories. The categories are:

  • 0 - T-shirt/top
  • 1 - Trouser
  • 2 - Pullover
  • 3 - Dress
  • 4 - Coat
  • 5 - Sandal
  • 6 - Shirt
  • 7 - Sneaker
  • 8 - Bag
  • 9 - Ankle boot
In [2]:
(X_train, y_train), (X_holdout, y_holdout) = fashion_mnist.load_data()
In [3]:
X_val, X_test, y_val, y_test = train_test_split(X_holdout, y_holdout, test_size = 0.5, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)
(60000, 28, 28)
(60000,)
(5000, 28, 28)
(5000,)
(5000, 28, 28)
(5000,)

Visualizing Samples

In [4]:
np.set_printoptions(linewidth=120)
n = np.random.choice(range(60000))
img = X_train[n]
print(img)
np.set_printoptions(linewidth=75)
[[  0   0   0   0   0   0   0   0   0   0   0   0 109  95  96  94 113  43   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0 228 249 233 240 239 242 240  91   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   2   0   1   0  10 255 242 226 219 217 221 227 241 234  20   0   4   3   0   0   0   0   0]
 [  0   0   0   0   0   2   0   0   0  79 214 223 226 240 225 237 233 240 214  24   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0 119 225 243 241 230 213 239 184 234 255 231 238 235 203 141   0   0   0   0   0   0]
 [  0   0   0   0   0   0  54 244 229 218 222 248 232 221 191  20 220 234 226 225 234 253 179   0   0   0   0   0]
 [  0   0   0   0   0   0 174 237 215 222 226 220 239 247 216  20 225 224 219 223 221 222 212   0   0   0   0   0]
 [  0   0   0   0   0   0 211 233 218 217 217 219 205 251 183 155 254 218 223 221 226 221 232  29   0   0   0   0]
 [  0   0   0   0   0   0 201 232 222 225 217 209 203 224 194 144 238 203 212 219 221 222 240  65   0   0   0   0]
 [  0   0   0   0   0   0 213 225 225 239 226 211 208 210 229 223 215 208 209 226 237 222 254 100   0   0   0   0]
 [  0   0   0   0   0   0 254 222 232 233 223 213 208 209 218 202 207 214 222 230 239 227 252 134   0   0   0   0]
 [  0   0   0   0   0   9 255 220 232 230 233 223 223 222 222 218 218 229 241 227 231 231 252 157   0   0   0   0]
 [  0   0   0   0   0  61 253 218 240 216 220 216 210 209 215 196 206 214 220 222 227 234 242 179   0   0   0   0]
 [  0   0   0   0   0  88 234 220 252 232 224 217 212 212 212 210 205 217 225 223 247 240 236 190   0   0   0   0]
 [  0   0   0   0   0  99 235 228 216 209 225 212 211 217 216 224 212 218 229 225 216 244 229 211   0   0   0   0]
 [  0   0   0   0   0 112 232 231 171 208 245 211 212 213 211 216 212 214 226 234 136 251 223 218   0   0   0   0]
 [  0   0   0   0   0 125 232 250 137 171 253 219 221 217 211 219 213 223 233 245  89 253 230 226   0   0   0   0]
 [  0   0   0   0   0 125 227 253  98 153 252 207 213 214 214 217 217 218 226 234  77 254 217 227   0   0   0   0]
 [  0   0   0   0   0 135 227 253  90 180 252 219 221 219 216 219 214 223 233 244  37 255 216 227   0   0   0   0]
 [  0   0   0   0   0 143 228 254  92 197 249 214 219 219 214 218 218 219 229 251  17 255 220 232   0   0   0   0]
 [  0   0   0   0   0 130 228 253  45 223 244 213 221 222 217 218 216 220 227 252  16 253 223 228   0   0   0   0]
 [  0   0   0   0   0 125 231 252  27 240 239 223 229 223 215 221 222 228 230 254  40 234 226 230   0   0   0   0]
 [  0   0   0   0   0 123 233 245  40 247 226 221 217 216 220 219 214 219 222 254  51 220 235 232   0   0   0   0]
 [  0   0   0   0   0 120 234 220  62 231 231 225 223 219 215 219 219 226 227 248  46 204 233 224   0   0   0   0]
 [  0   0   0   0   0 120 236 216  59 255 240 240 242 239 231 232 237 243 239 255  30 203 238 218   0   0   0   0]
 [  0   0   0   0   0 110 232 223   0   0  21  29  31  36  29  21  44  32  17   0   0 209 234 208   0   0   0   0]
 [  0   0   0   0   0 124 241 228   0   0   0   0   0   0   0   0   0   0   0   0   0 209 242 201   0   0   0   0]
 [  0   0   0   0   0  64 219 187   0   0   6   2   3   3   3   3   3   2   3   2   0 131 245 167   0   0   0   0]]
In [5]:
#n = np.random.choice(range(60000))
img = X_train[n]
plt.imshow(img, cmap=cm.binary)
plt.axis('off')
plt.show()

Scale and the Reshape Feature Arrays

In [6]:
Xs_train = X_train.reshape(-1,28,28,1) / 255
Xs_test = X_test.reshape(-1,28,28,1) / 255
Xs_val = X_val.reshape(-1,28,28,1) / 255

Convolutional Neural Network

In [7]:
from keras.layers import Conv2D, MaxPooling2D
In [8]:
%%time 

np.random.seed(1)
tf.set_random_seed(1)

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(28,28,1), activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2)))

model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))


opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

model.summary()

h = model.fit(Xs_train.reshape(-1,28,28,1), y_train,
              batch_size=256, epochs=20, verbose=1, 
              validation_data = (Xs_val.reshape(-1,28,28,1), y_val))
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 4, 4, 64)          36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 2, 2, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_2 (Dense)              (None, 10)                650       
=================================================================
Total params: 72,842
Trainable params: 72,842
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 5000 samples
Epoch 1/20
60000/60000 [==============================] - 5s 90us/step - loss: 0.7358 - acc: 0.7449 - val_loss: 0.4642 - val_acc: 0.8458
Epoch 2/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.4101 - acc: 0.8540 - val_loss: 0.4017 - val_acc: 0.8602
Epoch 3/20
60000/60000 [==============================] - 2s 31us/step - loss: 0.3457 - acc: 0.8753 - val_loss: 0.3534 - val_acc: 0.8772
Epoch 4/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.3159 - acc: 0.8856 - val_loss: 0.3370 - val_acc: 0.8748
Epoch 5/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.2936 - acc: 0.8937 - val_loss: 0.3116 - val_acc: 0.8868
Epoch 6/20
60000/60000 [==============================] - 2s 33us/step - loss: 0.2722 - acc: 0.9017 - val_loss: 0.3045 - val_acc: 0.8894
Epoch 7/20
60000/60000 [==============================] - 2s 36us/step - loss: 0.2653 - acc: 0.9042 - val_loss: 0.2799 - val_acc: 0.9014
Epoch 8/20
60000/60000 [==============================] - 2s 38us/step - loss: 0.2519 - acc: 0.9081 - val_loss: 0.2901 - val_acc: 0.8970
Epoch 9/20
60000/60000 [==============================] - 2s 34us/step - loss: 0.2363 - acc: 0.9146 - val_loss: 0.2799 - val_acc: 0.8962
Epoch 10/20
60000/60000 [==============================] - 2s 35us/step - loss: 0.2279 - acc: 0.9157 - val_loss: 0.2629 - val_acc: 0.9026
Epoch 11/20
60000/60000 [==============================] - 2s 35us/step - loss: 0.2176 - acc: 0.9210 - val_loss: 0.2653 - val_acc: 0.9000
Epoch 12/20
60000/60000 [==============================] - 2s 34us/step - loss: 0.2090 - acc: 0.9237 - val_loss: 0.2602 - val_acc: 0.9078
Epoch 13/20
60000/60000 [==============================] - 2s 33us/step - loss: 0.2037 - acc: 0.9264 - val_loss: 0.2511 - val_acc: 0.9074
Epoch 14/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.1945 - acc: 0.9292 - val_loss: 0.2539 - val_acc: 0.9116
Epoch 15/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.1880 - acc: 0.9312 - val_loss: 0.2572 - val_acc: 0.9092
Epoch 16/20
60000/60000 [==============================] - 2s 34us/step - loss: 0.1802 - acc: 0.9349 - val_loss: 0.2410 - val_acc: 0.9162
Epoch 17/20
60000/60000 [==============================] - 2s 33us/step - loss: 0.1710 - acc: 0.9376 - val_loss: 0.2399 - val_acc: 0.9162
Epoch 18/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.1629 - acc: 0.9410 - val_loss: 0.2677 - val_acc: 0.9048
Epoch 19/20
60000/60000 [==============================] - 2s 36us/step - loss: 0.1551 - acc: 0.9440 - val_loss: 0.2427 - val_acc: 0.9176
Epoch 20/20
60000/60000 [==============================] - 2s 32us/step - loss: 0.1497 - acc: 0.9456 - val_loss: 0.2491 - val_acc: 0.9206
Wall time: 44.3 s

Plot Loss and Accuracy Curves

In [9]:
plt.rcParams["figure.figsize"] = [8,4]
plt.subplot(1,2,1)
plt.plot(h.history['loss'], label='Training')
plt.plot(h.history['val_loss'], label='Validation')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend()

plt.subplot(1,2,2)
plt.plot(h.history['acc'], label='Training')
plt.plot(h.history['val_acc'], label='Validation')
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend()
plt.show()

Testing Performance

In [10]:
from sklearn.metrics import confusion_matrix, classification_report
In [11]:
testing_results = model.evaluate(Xs_test, y_test, verbose=0)

print('Test Loss =    ', testing_results[0])
print('Test Accuracy =', testing_results[1])
Test Loss =     0.2629030993461609
Test Accuracy = 0.9138
In [12]:
y_pred = model.predict_classes(Xs_test)

confmat = confusion_matrix(y_test, y_pred)
df = pd.DataFrame(confmat)

df
Out[12]:
0 1 2 3 4 5 6 7 8 9
0 464 0 9 7 2 0 18 0 5 0
1 0 484 0 6 1 0 1 0 1 0
2 11 1 423 3 18 0 11 0 1 0
3 13 4 4 444 16 0 6 0 0 0
4 1 0 27 11 447 0 14 0 0 0
5 0 0 0 0 0 502 0 2 0 3
6 100 0 37 9 48 0 333 0 4 0
7 0 0 0 0 0 4 0 487 0 10
8 2 0 2 1 1 2 0 2 493 0
9 0 0 0 0 0 4 0 9 0 492
In [13]:
print(classification_report(y_test, y_pred))
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       505
           1       0.99      0.98      0.99       493
           2       0.84      0.90      0.87       468
           3       0.92      0.91      0.92       487
           4       0.84      0.89      0.87       500
           5       0.98      0.99      0.99       507
           6       0.87      0.63      0.73       531
           7       0.97      0.97      0.97       501
           8       0.98      0.98      0.98       503
           9       0.97      0.97      0.97       505

   micro avg       0.91      0.91      0.91      5000
   macro avg       0.92      0.92      0.91      5000
weighted avg       0.92      0.91      0.91      5000

View Misclassified Samples

In [14]:
import math
# Find misclassified samples in the test set.
sel = y_pred != y_test
n_mc = np.sum(sel) # number misclassified

X_mc = X_test[sel,:]
y_mc = y_test[sel]
yp_mc = y_pred[sel]

idx = np.argsort(y_mc)
X_mc = X_mc[idx,:]
y_mc = y_mc[idx]
yp_mc = yp_mc[idx]

rows = math.ceil(n_mc / 4)

plt.figure(figsize=(12,360))
for i in range(0, n_mc):
    plt.subplot(rows,4,i+1)
    plt.imshow(X_mc[i], cmap=cm.binary)
    plt.text(-1, 10, s = str(int(y_mc[i])), fontsize=16, color='b')
    plt.text(-1, 16, s = str(int(yp_mc[i])), fontsize=16, color='r')
    plt.axis('off')