import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
np.random.seed(1)
n = 40
x = np.random.uniform(-4, 4, n)
X = x.reshape(-1,1)
y = 0.3 + 0.05 * x + 0.001 * x**7 + np.random.normal(0, 2, n)
plt.close()
plt.scatter(x,y)
plt.show()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.4, random_state=1)
print(X_train.shape)
print(X_val.shape)
mod1 = LinearRegression()
mod1.fit(X_train, y_train)
print('Training r-Squared:', mod1.score(X_train, y_train))
print('Testing r-Squared: ', mod1.score(X_val, y_val))
x_curve = np.linspace(-4, 4, 100)
y_curve = mod1.predict(x_curve.reshape(-1,1))
plt.close()
plt.scatter(X_train.reshape(-1,), y_train)
plt.scatter(X_val.reshape(-1,), y_val, c='r')
plt.plot(x_curve, y_curve, c='darkorange')
plt.show()
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(3)
Xp3_train = poly.fit_transform(X_train)
Xp3_val = poly.fit_transform(X_val)
mod3 = LinearRegression()
mod3.fit(Xp3_train, y_train)
print('Training r-Squared:', mod3.score(Xp3_train, y_train))
print('Testing r-Squared: ', mod3.score(Xp3_val, y_val))
x_curve = np.linspace(-4, 4, 100)
xp_curve = poly.fit_transform(x_curve.reshape(-1,1))
y_curve = mod3.predict(xp_curve)
plt.close()
plt.scatter(X_train.reshape(-1,), y_train)
plt.scatter(X_val.reshape(-1,), y_val, c='r')
plt.plot(x_curve, y_curve, c='darkorange')
plt.show()
poly = PolynomialFeatures(5)
Xp5_train = poly.fit_transform(X_train)
Xp5_val = poly.fit_transform(X_val)
mod5 = LinearRegression()
mod5.fit(Xp5_train, y_train)
print('Training r-Squared:', mod5.score(Xp5_train, y_train))
print('Testing r-Squared: ', mod5.score(Xp5_val, y_val))
x_curve = np.linspace(-4, 4, 100)
xp_curve = poly.fit_transform(x_curve.reshape(-1,1))
y_curve = mod5.predict(xp_curve)
plt.close()
plt.scatter(X_train.reshape(-1,), y_train)
plt.scatter(X_val.reshape(-1,), y_val, c='r')
plt.plot(x_curve, y_curve, c='darkorange')
plt.show()
poly = PolynomialFeatures(7)
Xp7_train = poly.fit_transform(X_train)
Xp7_val = poly.fit_transform(X_val)
mod7 = LinearRegression()
mod7.fit(Xp7_train, y_train)
print('Training r-Squared:', mod7.score(Xp7_train, y_train))
print('Testing r-Squared: ', mod7.score(Xp7_val, y_val))
x_curve = np.linspace(-4, 4, 100)
xp_curve = poly.fit_transform(x_curve.reshape(-1,1))
y_curve = mod7.predict(xp_curve)
plt.close()
plt.scatter(X_train.reshape(-1,), y_train)
plt.scatter(X_val.reshape(-1,), y_val, c='r')
plt.plot(x_curve, y_curve, c='darkorange')
plt.show()
poly = PolynomialFeatures(9)
Xp9_train = poly.fit_transform(X_train)
Xp9_val = poly.fit_transform(X_val)
mod9 = LinearRegression()
mod9.fit(Xp9_train, y_train)
print('Training r-Squared:', mod9.score(Xp9_train, y_train))
print('Testing r-Squared: ', mod9.score(Xp9_val, y_val))
x_curve = np.linspace(-4, 4, 100)
xp_curve = poly.fit_transform(x_curve.reshape(-1,1))
y_curve = mod9.predict(xp_curve)
plt.close()
plt.scatter(X_train.reshape(-1,), y_train)
plt.scatter(X_val.reshape(-1,), y_val, c='r')
plt.plot(x_curve, y_curve, c='darkorange')
plt.show()
df = pd.read_csv('data/BostonHousingV2.txt', sep='\t')
df.head(n=10)
#Xb = df.iloc[:,6:].values
Xb = df.iloc[:,[11,16,18]].values
yb = df.iloc[:,5].values
print(Xb.shape)
print(yb.shape)
Xb_train, Xb_holdout, yb_train, yb_holdout = train_test_split(Xb, yb, test_size=0.2, random_state=1)
Xb_val, Xb_test, yb_val, yb_test = train_test_split(Xb_holdout, yb_holdout, test_size=0.5, random_state=1)
print(Xb_train.shape)
print(Xb_val.shape)
print(Xb_test.shape)
tr_r2 = []
va_r2 = []
for d in range(1, 9):
poly = PolynomialFeatures(d)
Xp_train = poly.fit_transform(Xb_train)
Xp_val = poly.fit_transform(Xb_val)
temp_mod = LinearRegression()
temp_mod.fit(Xp_train, yb_train)
print('--- Degree', d, '------------' )
print('Training r-Squared: ', temp_mod.score(Xp_train, yb_train))
print('Validation r-Squared:', temp_mod.score(Xp_val, yb_val))
print()
tr_r2.append(temp_mod.score(Xp_train, yb_train))
va_r2.append(temp_mod.score(Xp_val, yb_val))
plt.figure(figsize=(8,4))
plt.plot(range(1,9), tr_r2, label='Training')
plt.plot(range(1,9), va_r2, label='Validation')
plt.legend()
plt.show()
poly = PolynomialFeatures(4)
Xb4_train = poly.fit_transform(Xb_train)
Xb4_val = poly.fit_transform(Xb_val)
Xb4_test = poly.fit_transform(Xb_test)
b4_mod = LinearRegression()
b4_mod.fit(Xb4_train, yb_train)
print('Training r-Squared: ', b4_mod.score(Xb4_train, yb_train))
print('Validation r-Squared:', b4_mod.score(Xb4_val, yb_val))
print('Testing r-Squared: ', b4_mod.score(Xb4_test, yb_test))