import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
np.random.seed(1)
X, y = make_classification(n_samples=400, n_features=6, n_informative=6, n_redundant=0, n_classes=7, class_sep=2)
np.set_printoptions(suppress=True, precision=2)
print('Distribution of Features:')
print('Min: ', np.min(X, axis=0))
print('Max: ', np.max(X, axis=0))
print('Mean:', np.mean(X, axis=0))
print('SDev:', np.std(X, axis=0))
np.set_printoptions(suppress=True, precision=4)
param_grid = [
{'C': 10**np.linspace(-3,3,10)}
]
lin_reg = LogisticRegression(solver='lbfgs', multi_class='ovr')
gscv_01 = GridSearchCV(lin_reg, param_grid, cv=5, scoring='accuracy',
refit=True, iid=False)
gscv_01.fit(X, y)
res_01 = gscv_01.cv_results_
print(res_01.keys())
print(res_01['mean_test_score'])
#for i in range(0,10):
# print(cv_res_01['mean_test_score'][i], cv_res_01['params'][i])
for score, params in zip(res_01['mean_test_score'], res_01['params']):
print(score, params)
print(gscv_01.best_score_)
print(gscv_01.best_params_)
lin_reg = gscv_01.best_estimator_
print('Training Score:', lin_reg.score(X, y))
param_grid = [
{'n_neighbors': range(1,20), 'p': [1,2]}
]
knn = KNeighborsClassifier()
gscv_02 = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy',
refit=True, iid=False)
gscv_02.fit(X, y)
res_02 = gscv_02.cv_results_
print(gscv_02.best_score_)
print(gscv_02.best_params_)
knn = gscv_02.best_estimator_
print('Training Score:', knn.score(X, y))
param_grid = [
{'kernel':['poly'], 'degree': [1,2,3], 'C':10**np.linspace(-3,3,10), 'gamma':['auto']},
{'kernel':['rbf'], 'C':10**np.linspace(-3,3,10), 'gamma':10**np.linspace(-3,3,10)}
]
svm = SVC()
gscv_03 = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy',
refit=True, iid=False)
gscv_03.fit(X, y)
res_03 = gscv_03.cv_results_
print(gscv_03.best_score_)
print(gscv_03.best_params_)
svm = gscv_03.best_estimator_
print('Training Score:', svm.score(X, y))
param_grid = [
{'n_estimators':np.arange(100,500,100), 'max_depth':range(2,6), 'bootstrap':['True','False']}
]
forest = RandomForestClassifier()
gscv_04 = GridSearchCV(forest, param_grid, cv=5, scoring='accuracy',
refit=True, iid=False)
gscv_04.fit(X, y)
res_04 = gscv_04.cv_results_
print(gscv_04.best_score_)
print(gscv_04.best_params_)
forest = gscv_04.best_estimator_
print('Training Score:', forest.score(X, y))