import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten
from tensorflow import set_random_seed
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.datasets import fashion_mnist
The MNIST dataset consists of 70,000, 28x28 grayscale images split into 10 fashion categories. The categories are:
(X_train, y_train), (X_holdout, y_holdout) = fashion_mnist.load_data()
X_val, X_test, y_val, y_test = train_test_split(X_holdout, y_holdout, test_size = 0.5, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(X_val.shape)
print(y_val.shape)
np.set_printoptions(linewidth=120)
n = np.random.choice(range(60000))
img = X_train[n]
print(img)
np.set_printoptions(linewidth=75)
#n = np.random.choice(range(60000))
img = X_train[n]
plt.imshow(img, cmap=cm.binary)
plt.axis('off')
plt.show()
Xs_train = X_train.reshape(-1,28,28,1) / 255
Xs_test = X_test.reshape(-1,28,28,1) / 255
Xs_val = X_val.reshape(-1,28,28,1) / 255
from keras.layers import Conv2D, MaxPooling2D
%%time
np.random.seed(1)
tf.set_random_seed(1)
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(28,28,1), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10, activation='softmax'))
opt = keras.optimizers.Adam(lr=0.001)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.summary()
h = model.fit(Xs_train.reshape(-1,28,28,1), y_train,
batch_size=256, epochs=20, verbose=1,
validation_data = (Xs_val.reshape(-1,28,28,1), y_val))
plt.rcParams["figure.figsize"] = [8,4]
plt.subplot(1,2,1)
plt.plot(h.history['loss'], label='Training')
plt.plot(h.history['val_loss'], label='Validation')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend()
plt.subplot(1,2,2)
plt.plot(h.history['acc'], label='Training')
plt.plot(h.history['val_acc'], label='Validation')
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend()
plt.show()
from sklearn.metrics import confusion_matrix, classification_report
testing_results = model.evaluate(Xs_test, y_test, verbose=0)
print('Test Loss = ', testing_results[0])
print('Test Accuracy =', testing_results[1])
y_pred = model.predict_classes(Xs_test)
confmat = confusion_matrix(y_test, y_pred)
df = pd.DataFrame(confmat)
df
print(classification_report(y_test, y_pred))
import math
# Find misclassified samples in the test set.
sel = y_pred != y_test
n_mc = np.sum(sel) # number misclassified
X_mc = X_test[sel,:]
y_mc = y_test[sel]
yp_mc = y_pred[sel]
idx = np.argsort(y_mc)
X_mc = X_mc[idx,:]
y_mc = y_mc[idx]
yp_mc = yp_mc[idx]
rows = math.ceil(n_mc / 4)
plt.figure(figsize=(12,360))
for i in range(0, n_mc):
plt.subplot(rows,4,i+1)
plt.imshow(X_mc[i], cmap=cm.binary)
plt.text(-1, 10, s = str(int(y_mc[i])), fontsize=16, color='b')
plt.text(-1, 16, s = str(int(yp_mc[i])), fontsize=16, color='r')
plt.axis('off')