Lesson 16 - Linear Regression Class

In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
class LinearRegression:
    
    def __init__(self, x, y):
        
        self.x = x
        self.y = y
        
        b = [0,0]
        sse = self.find_sse(x, y, b)

        done = False
        count = 0

        while(not done):

            count += 1        
            old_sse = sse

            newb1 = [b[0] + 0.0001, b[1]]
            newb2 = [b[0] - 0.0001, b[1]]
            newb3 = [b[0], b[1] + 0.0001]
            newb4 = [b[0], b[1] - 0.0001]

            new_sse_1 = self.find_sse(x, y, newb1)
            new_sse_2 = self.find_sse(x, y, newb2)
            new_sse_3 = self.find_sse(x, y, newb3)
            new_sse_4 = self.find_sse(x, y, newb4)

            if(new_sse_1 < sse):
                b = newb1
                sse = new_sse_1

            if(new_sse_2 < sse):
                b = newb2
                sse = new_sse_2

            if(new_sse_3 < sse):
                b = newb3
                sse = new_sse_3

            if(new_sse_4 < sse):
                b = newb4
                sse = new_sse_4

            if(old_sse == sse):
                done = True

        self.b = b
        self.sse = sse
        self.iterations = count
        
    def predict(self, x0):
        return self.b[0] + self.b[1] * x0
    
    def find_sse(self, x, y, b):
        total = 0
        for i in range(0, len(x)):
            yhat = b[0] + b[1] * x[i]
            e = y[i] - yhat
            total += e**2
        return total
    
In [3]:
x_test = [1, 2, 3, 4]
y_test = [5, 8, 7, 10]

test_model = LinearRegression(x_test, y_test)
print(test_model.b)
print(test_model.predict(2.5))
[3.999200000004015, 1.4002999999998624]
7.499950000003672

Working with Simulated Data

In [4]:
np.random.seed(1)
x_data = np.random.normal(10, 2, 100)
y_data = 4.3 + 1.7 * x_data + np.random.normal(0, 2, 100)
plt.scatter(x_data, y_data, c='orange', edgecolor='black')
plt.show()

In the cell below, run the regression() function, passing it the lists x_data and y_data as arguments. Store the results.

In [5]:
sim_model = LinearRegression(x_data, y_data)

print(sim_model.b)

print(sim_model.predict(11))
[3.6228000000032208, 1.7970999999998187]
23.390900000001228
In [6]:
print(sim_model.predict(14))
print(sim_model.predict(16))
28.78220000000068
32.376400000000324

Predicing Car Prices

In [7]:
price = [53.7, 56.8, 58.5, 42.0, 48.9, 33.2, 22.2, 32.6, 30.3, 19.8, 
         26.1, 24.9, 18.1, 11.7, 13.3, 23.4, 13.2, 13.6, 14.8, 4.6]
mileage = [3.1,  4.1,  5.3,  7.1, 19.5, 28.3, 36.8, 37.2, 42.3,  52.3, 
           53.3, 53.4, 63.2, 68.4, 82.3, 83.9, 88.4, 97.6, 99.7, 105.9]

plt.scatter(mileage, price, c='orange', edgecolor='black')
plt.xlabel('Mileage (in 1000s of Miles)')
plt.ylabel('Price (in 1000s of Dollars)')
plt.show()
In [8]:
car_model = LinearRegression(mileage, price)
print(car_model.b)
[50.54350000055434, -0.4351999999999684]
In [9]:
print(car_model.predict(40))
print(car_model.predict(80))
33.1355000005556
15.727500000556866