import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(type(faces))
y = faces.target
X = faces.data
print(y.shape)
print(X.shape)
names = faces.target_names
print(names)
images = faces.images
print(images.shape)
i = np.random.choice(range(images.shape[0]))
plt.close()
plt.rcParams["figure.figsize"] = [4,4]
plt.imshow(images[i], cmap='bone')
plt.axis('off')
plt.show()
_, name_dist = np.unique(y, return_counts=True)
plt.close()
plt.bar(range(len(names)), name_dist)
plt.xticks(range(len(names)), names, rotation='vertical')
plt.show()
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.20, random_state = 1)
print(y_train.size)
print(y_val.size)
Xs = X / 255
Xs_train = X_train / 255
Xs_val = X_val / 255
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
%%time
lr_mod = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=2000)
lr_mod.fit(Xs_train, y_train)
print(lr_mod.score(Xs_train, y_train))
print(lr_mod.score(Xs_val, y_val))
i = np.random.choice(range(X_val.shape[0]))
pred = lr_mod.predict([Xs_val[i,:]])
plt.imshow(X_val[i].reshape(62,47), cmap='bone')
plt.axis('off')
plt.text(50, 5, s = names[y_val[i]], fontsize=16, color='b')
plt.text(50, 15, s = names[pred[0]], fontsize=16, color='r')
plt.show()
pred_val = lr_mod.predict(Xs_val)
print( classification_report(y_val, pred_val, target_names=names))
print(confusion_matrix(y_val, pred_val))
%%time
KNN_mod = KNeighborsClassifier(n_neighbors=12)
KNN_mod.fit(Xs_train, y_train)
print(KNN_mod.score(Xs_train, y_train))
print(KNN_mod.score(Xs_val, y_val))
%%time
svm_mod = SVC(kernel='rbf', gamma=0.001, C=10)
svm_mod.fit(Xs_train, y_train)
print(svm_mod.score(Xs_train, y_train))
print(svm_mod.score(Xs_val, y_val))
from sklearn.decomposition import PCA
pca = PCA(n_components = 150, whiten=True)
Z = pca.fit_transform(X)
print(np.cumsum(pca.explained_variance_ratio_))
pc = pca.components_
plt.close()
plt.rcParams["figure.figsize"] = [12,12]
for i in range(25):
plt.subplot(5,5,i+1)
plt.imshow(pc[i].reshape(62,47), cmap='bone')
plt.axis('off')
plt.show()
plt.rcParams["figure.figsize"] = [4,4]
plt.imshow(images[0], cmap='bone')
plt.axis('off')
plt.show()
print(Z[0,:25])
cp = np.zeros(len(pc[0])).reshape(62,47)
plt.close()
plt.rcParams["figure.figsize"] = [12,12]
for i in range(25):
cp += Z[0,i] * pc[i].reshape(62,47)
plt.subplot(5,5,i+1)
plt.imshow(cp, cmap='bone')
plt.axis('off')
plt.show()
cp = Z[0,:].reshape(150,1) * pc[:,:]
cp = np.sum(cp, axis=0)
plt.rcParams["figure.figsize"] = [4,4]
plt.imshow(cp.reshape(62,47), cmap='bone')
plt.axis('off')
plt.show()
Z_train = pca.transform(X_train)
Z_val = pca.transform(X_val)
%%time
pca_lr_mod = LogisticRegression(solver='lbfgs', multi_class='ovr', max_iter=6000)
pca_lr_mod.fit(Z_train, y_train)
print(pca_lr_mod.score(Z_train, y_train))
print(pca_lr_mod.score(Z_val, y_val))
i = np.random.choice(range(X_val.shape[0]))
pred = lr_mod.predict([Xs_val[i,:]])
plt.imshow(X_val[i].reshape(62,47), cmap='bone')
plt.axis('off')
plt.text(50, 5, s = names[y_val[i]], fontsize=16, color='b')
plt.text(50, 15, s = names[pred[0]], fontsize=16, color='r')
plt.show()
i = np.random.choice(range(X_val.shape[0]))
pred = pca_lr_mod.predict([Z_val[i,:]])
plt.rcParams["figure.figsize"] = [8,8]
plt.subplot(1,2,1)
plt.imshow(X_val[i].reshape(62,47), cmap='bone')
plt.axis('off')
plt.subplot(1,2,2)
pca_rep = np.sum(Z_val[i,:].reshape(150,1) * pc[:,:], axis=0).reshape(62,47)
plt.imshow(pca_rep, cmap='bone')
plt.axis('off')
plt.text(50, 5, s = names[y_val[i]], fontsize=16, color='b')
plt.text(50, 15, s = names[pred[0]], fontsize=16, color='r')
plt.show()
print( classification_report(y_val, pca_lr_mod.predict(Z_val), target_names=names))
%%time
pca_KNN_mod = KNeighborsClassifier(n_neighbors=12)
pca_KNN_mod.fit(Z_train, y_train)
print(pca_KNN_mod.score(Z_train, y_train))
print(pca_KNN_mod.score(Z_val, y_val))
%%time
pca_svm_mod = SVC(kernel='rbf', gamma=0.001, C=10)
pca_svm_mod.fit(Z_train, y_train)
print(pca_svm_mod.score(Z_train, y_train))
print(pca_svm_mod.score(Z_val, y_val))