import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from ClassificationPlotter import plot_regions
from sklearn.datasets import make_circles, make_classification
np.random.seed(4643)
X, y = make_classification(n_samples=500, n_classes=4, n_clusters_per_class=1, n_features=2, n_redundant=0, class_sep=0.8)
plt.figure(figsize=[8,6])
plt.scatter(X[:,0], X[:,1], c=y, cmap='rainbow', edgecolor='k')
plt.show()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.5, random_state=1)
print(X_train.shape)
print(X_val.shape)
tr_acc = []
va_acc = []
exp_list = np.linspace(-3, 3, 100)
for k in exp_list:
temp_mod = LogisticRegression(solver='lbfgs', C=10**k, multi_class='auto')
temp_mod.fit(X_train, y_train)
tr_acc.append(temp_mod.score(X_train, y_train))
va_acc.append(temp_mod.score(X_val, y_val))
plt.figure(figsize=([6,4]))
plt.plot(exp_list, tr_acc, label='Training Accuracy')
plt.plot(exp_list, va_acc, label='Validation Accuracy')
plt.xlabel('log(C)')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
idx = np.argmax(va_acc)
best_logC = exp_list[idx]
print(best_logC)
mod_01 = LogisticRegression(solver='lbfgs', C=10**best_logC, multi_class='auto')
mod_01.fit(X_train, y_train)
print('Training Accuracy: ', mod_01.score(X_train, y_train))
print('Validation Accuracy:', mod_01.score(X_val, y_val))
plot_regions(mod_01, X_train, y_train)
tr_acc = []
va_acc = []
K_list = range(1,40)
for k in K_list:
temp_mod = KNeighborsClassifier(k)
temp_mod.fit(X_train, y_train)
tr_acc.append(temp_mod.score(X_train, y_train))
va_acc.append(temp_mod.score(X_val, y_val))
plt.figure(figsize=([6,4]))
plt.plot(K_list, tr_acc, label='Training Accuracy')
plt.plot(K_list, va_acc, label='Validation Accuracy')
plt.xlabel('K')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
idx = np.argmax(va_acc)
best_K = K_list[idx]
print(best_K)
mod_02 = KNeighborsClassifier(best_K)
mod_02.fit(X_train, y_train)
print('Training Accuracy: ', mod_02.score(X_train, y_train))
print('Validation Accuracy:', mod_02.score(X_val, y_val))
plot_regions(mod_02, X_train, y_train)
tr_acc = []
va_acc = []
depth_list = range(1,10)
for d in depth_list:
temp_mod = DecisionTreeClassifier(max_depth=d)
temp_mod.fit(X_train, y_train)
tr_acc.append(temp_mod.score(X_train, y_train))
va_acc.append(temp_mod.score(X_val, y_val))
plt.figure(figsize=([6,4]))
plt.plot(depth_list, tr_acc, label='Training Accuracy')
plt.plot(depth_list, va_acc, label='Validation Accuracy')
plt.xlabel('Max Depth')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
idx = np.argmax(va_acc)
best_d = depth_list[idx]
print(best_d)
mod_03 = DecisionTreeClassifier(max_depth=best_d)
mod_03.fit(X_train, y_train)
print('Training Accuracy: ', mod_03.score(X_train, y_train))
print('Validation Accuracy:', mod_03.score(X_val, y_val))
plot_regions(mod_03, X_train, y_train)
from sklearn.ensemble import VotingClassifier
voting_clf = VotingClassifier(
estimators = [('lr', mod_01), ('knn', mod_02), ('tree', mod_03)],
voting = 'soft'
)
voting_clf.fit(X_train, y_train)
print('Training Accuracy: ', voting_clf.score(X_train, y_train))
print('Validation Accuracy:', voting_clf.score(X_val, y_val))
plot_regions(voting_clf, X_train, y_train)
print('Model 01 Validation Accuracy:', mod_01.score(X_val, y_val))
print('Model 02 Validation Accuracy:', mod_02.score(X_val, y_val))
print('Model 03 Validation Accuracy:', mod_03.score(X_val, y_val))
print('Ensemble Validation Accuracy:', voting_clf.score(X_val, y_val))