Load Packages and Generate Data

set.seed(125)
n <- 20
x <- runif(n, -4, 4)
y <- 3 + 0.5 * x + 0.01 * x**7 + rnorm(n, 0, 10)
plot(y ~ x)

Create Training and Testing Sets

sel <- sample(1:length(x), 0.7*length(x))
x_train <- x[sel]
x_test <- x[-sel]
y_train <- y[sel]
y_test <- y[-sel]
c(length(x_train), length(x_test))
[1] 14  6

Fit Polynomial to Training Data

X <- x_train
poly_mod <- lm(y_train ~ X + I(X^2) + I(X^3) + I(X^4) + I(X^5) + I(X^6) + I(X^7) + I(X^8) + I(X^9))
summary(poly_mod)

Call:
lm(formula = y_train ~ X + I(X^2) + I(X^3) + I(X^4) + I(X^5) + 
    I(X^6) + I(X^7) + I(X^8) + I(X^9))

Residuals:
         1          2          3          4          5          6          7          8          9         10         11 
  0.001675  12.072649  -0.044082  -0.936949   0.281054  -0.012450  -9.652762   1.134774 -10.166205   0.021866  -2.723524 
        12         13         14 
  4.438837   8.860344  -3.275227 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)  
(Intercept)  13.94676    9.33858   1.493   0.2096  
X            41.38780   18.90707   2.189   0.0938 .
I(X^2)      -14.18574   19.03735  -0.745   0.4976  
I(X^3)      -52.71864   26.24965  -2.008   0.1150  
I(X^4)        1.60472    9.88913   0.162   0.8790  
I(X^5)       17.33773    9.63701   1.799   0.1464  
I(X^6)        0.16687    1.35439   0.123   0.9079  
I(X^7)       -1.84734    1.10405  -1.673   0.1696  
I(X^8)       -0.01302    0.05115  -0.255   0.8116  
I(X^9)        0.06175    0.03826   1.614   0.1818  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 10.73 on 4 degrees of freedom
Multiple R-squared:  0.9855,    Adjusted R-squared:  0.9529 
F-statistic: 30.21 on 9 and 4 DF,  p-value: 0.002514

Plot Fitted Curve

xg <- seq(-4,4,length=100)
yg_poly <- predict(poly_mod, data.frame(X=xg))
y_train_pred_poly <- predict(poly_mod, data.frame(X = x_train))
y_test_pred_poly <- predict(poly_mod, data.frame(X = x_test))
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Polynomial Fit")
lines(xg, yg_poly)
segments(x_train, y_train, x_train, y_train_pred_poly) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_poly, col="red")

Calculate Training and Testing r-Squared Values

sse <- sum((y_train - y_train_pred_poly)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst
sse <- sum((y_test - y_test_pred_poly)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst
poly_rsq <- c(training_rsq, testing_rsq)
names(poly_rsq) <- c('Training', 'Testing')
poly_rsq
  Training    Testing 
 0.9855016 -0.1894984 

Ridge Regression

Goal: Minimize \(Loss\left(\hat\beta_0, ..., \hat\beta_p\right) = \frac{1}{n} SSE + \lambda \sum_{i=1}^{p} \hat\beta_i^2\)

Create Ridge Models for Several Values of Lambda

Xmat <- model.matrix(y ~ x + I(x^2) + I(x^3) + I(x^4) + I(x^5) + I(x^6) + I(x^7) + I(x^8) + I(x^9))[,-1]
colnames(Xmat) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9')
Xmat_train <- Xmat[sel,]
Xmat_test <- Xmat[-sel,]
ridge_grid <- 10^(seq(6, -2, length=100))
ridge_models <- glmnet(Xmat_train, y_train, alpha = 0, lambda = ridge_grid)
rcf <- coef(ridge_models)
rcf <- as.matrix(rcf)
colnames(rcf) <- as.character(round(ridge_grid,3))
round(rcf[,c(100:97,4:1)],8)
                   0.01       0.012       0.015       0.017  572236.766   689261.21  830217.568       1e+06
(Intercept)  4.57653075  4.51553835  4.44337860  4.35915386  4.44681139  4.44667746  4.44656624  4.44647399
x1           3.12784539  3.03769286  2.94841494  2.86006226  0.00148821  0.00123561  0.00102588  0.00085187
x2          -1.77544283 -1.70997150 -1.63332628 -1.54438618 -0.00002874 -0.00002387 -0.00001982 -0.00001647
x3          -0.48269574 -0.42921637 -0.37813056 -0.32953346  0.00015033  0.00012481  0.00010363  0.00008605
x4           0.47887585  0.46253979  0.44539099  0.42722849 -0.00000511 -0.00000424 -0.00000352 -0.00000293
x5           0.10548593  0.09870831  0.09231422  0.08630598  0.00001134  0.00000942  0.00000782  0.00000649
x6          -0.01084175 -0.00959778 -0.00847765 -0.00747294 -0.00000058 -0.00000048 -0.00000040 -0.00000033
x7           0.00209152  0.00224199  0.00237304  0.00248595  0.00000081  0.00000067  0.00000056  0.00000046
x8          -0.00111615 -0.00114182 -0.00115836 -0.00116574 -0.00000006 -0.00000005 -0.00000004 -0.00000003
x9           0.00006628  0.00007295  0.00007999  0.00008735  0.00000006  0.00000005  0.00000004  0.00000003

Plot Coefficient Estimates against Lambda

ridge_cfdf <- data.frame(grid = ridge_grid, x1 = rcf[2,], x2 = rcf[3,], x3 = rcf[4,], x4 = rcf[5,], 
                         x5 = rcf[6,], x6 = rcf[7,], x7 = rcf[8,], x8 = rcf[9,], x9 = rcf[10,])
ridge_melted <- melt(ridge_cfdf, id.vars="grid")
ggplot(ridge_melted, aes(log(grid), value, col=variable)) + geom_line(size=1) +
  xlab('log(lambda)') + ylab('Coefficients') 

Using Cross-Validation to Select Lambda

set.seed(1)
ridge_cv_out <- cv.glmnet(Xmat_train, y_train, alpha = 0, grouped = FALSE, lambda = exp(seq(from = -4,to = 5,length = 100)))
plot(ridge_cv_out, xlim=c(-4,5))

ridge_best_lambda <- ridge_cv_out$lambda.min
ridge_best_lambda
[1] 0.03790291
ridge_model <- glmnet(Xmat_train, y_train, alpha = 0, lambda = ridge_best_lambda)
coef(ridge_model)
10 x 1 sparse Matrix of class "dgCMatrix"
                       s0
(Intercept)  3.932470e+00
x1           2.815298e+00
x2          -1.101402e+00
x3          -3.380609e-01
x4           3.802801e-01
x5           8.320549e-02
x6          -9.916439e-03
x7           3.044001e-03
x8          -9.036347e-04
x9           7.095034e-05

Calculate Training and Testing r-Squared Values

y_train_pred_ridge <- predict(ridge_model, Xmat_train)
sse <- sum((y_train - y_train_pred_ridge)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst
y_test_pred_ridge <- predict(ridge_model, Xmat_test)
sse <- sum((y_test - y_test_pred_ridge)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst
ridge_rsq <- c(training_rsq, testing_rsq)
names(ridge_rsq) <- c('Training', 'Testing')
ridge_rsq
 Training   Testing 
0.9616233 0.9295274 

Plot Fitted Curve

X_grid <- cbind(xg, xg^2, xg^3, xg^4, xg^5, xg^6, xg^7, xg^8, xg^9)
colnames(X_grid) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9')
yg_ridge <- predict(ridge_model, X_grid)
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Ridge Regression")
lines(xg, yg_ridge)
segments(x_train, y_train, x_train, y_train_pred_ridge) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_ridge, col="red")

LASSO Regression

Goal: Minimize \(Loss\left(\hat\beta_0, ..., \hat\beta_p\right) = \frac{1}{n} SSE + \lambda \sum_{i=1}^{p} | \hat\beta_i |\)

Create Lasso Models for Several Values of Lambda

lasso_grid <- 10^(seq(2, -2, length=100))
lasso_models <- glmnet(Xmat_train, y_train, alpha = 1, lambda = lasso_grid)
lcf <- coef(lasso_models)
lcf <- as.matrix(lcf)
colnames(lcf) <- as.character(round(lasso_grid,3))
round(lcf[,c(100:97,4:1)],8)
                   0.01       0.011       0.012       0.013   75.646   83.022   91.116      100
(Intercept)  4.12853918  4.07711662  4.02493649  3.96171216 4.446022 4.446022 4.446022 4.446022
x1           2.46054372  2.42761968  2.39595038  2.36083518 0.000000 0.000000 0.000000 0.000000
x2          -1.27930830 -1.22735910 -1.17492552 -1.11172822 0.000000 0.000000 0.000000 0.000000
x3          -0.06356629 -0.05206069 -0.04163760 -0.03092317 0.000000 0.000000 0.000000 0.000000
x4           0.35116138  0.34349641  0.33602884  0.32762155 0.000000 0.000000 0.000000 0.000000
x5           0.03633810  0.03498810  0.03378586  0.03258892 0.000000 0.000000 0.000000 0.000000
x6          -0.00022746 -0.00009151 -0.00000046  0.00000000 0.000000 0.000000 0.000000 0.000000
x7           0.00606544  0.00611837  0.00616600  0.00621414 0.000000 0.000000 0.000000 0.000000
x8          -0.00138371 -0.00137189 -0.00135805 -0.00133667 0.000000 0.000000 0.000000 0.000000
x9           0.00000000  0.00000000  0.00000000  0.00000000 0.000000 0.000000 0.000000 0.000000

Plot Coefficient Estimates against Lambda

lasso_cfdf <- data.frame(grid = ridge_grid, x1 = lcf[2,], x2 = lcf[3,], x3 = lcf[4,], x4 = lcf[5,], 
                         x5 = lcf[6,], x6 = lcf[7,], x7 = lcf[8,], x8 = lcf[9,], x9 = lcf[10,])
lasso_melted <- melt(ridge_cfdf, id.vars="grid")
ggplot(lasso_melted, aes(log(grid), value, col=variable)) + geom_line(size=1) +
  xlab('log(lambda)') + ylab('Coefficients')

Using Cross-Validation to Select Lambda

set.seed(1)
lasso_cv_out <- cv.glmnet(Xmat_train, y_train, alpha = 1, grouped = FALSE)
plot(lasso_cv_out)

lasso_best_lambda <- lasso_cv_out$lambda.min
lasso_best_lambda
[1] 6.00723
lasso_model <- glmnet(Xmat_train, y_train, alpha = 1, lambda = lasso_best_lambda)
coef(lasso_model)
10 x 1 sparse Matrix of class "dgCMatrix"
                     s0
(Intercept) 4.056519834
x1          .          
x2          .          
x3          .          
x4          .          
x5          0.050889129
x6          .          
x7          0.003181004
x8          .          
x9          0.000118107

Calculate Training and Testing r-Squared Values

y_train_pred_lasso <- predict(lasso_model, Xmat_train)
sse <- sum((y_train - y_train_pred_lasso)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst
y_test_pred_lasso <- predict(lasso_model, Xmat_test)
sse <- sum((y_test - y_test_pred_lasso)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst
lasso_rsq <- c(training_rsq, testing_rsq)
names(lasso_rsq) <- c('Training', 'Testing')
lasso_rsq
 Training   Testing 
0.9378925 0.9322479 
yg_lasso <- predict(lasso_model, X_grid)
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="LASSO Regression")
lines(xg, yg_lasso)
segments(x_train, y_train, x_train, y_train_pred_lasso) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_lasso, col="red")

Comparison of Models

Coefficient Estimates

poly <- coef(poly_mod)
ridge <- as.matrix(t(coef(ridge_model)))
lasso <- as.matrix(t(coef(lasso_model)))
                 
names(poly) <- c('Intercept', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9' )
cf <- rbind(poly, ridge, lasso)
rownames(cf) <- c("Poly", "Ridge", "Lasso")
round(cf, 6)
      Intercept        x1         x2         x3       x4        x5        x6        x7        x8       x9
Poly   13.94676 41.387804 -14.185737 -52.718643 1.604721 17.337728  0.166865 -1.847336 -0.013019 0.061749
Ridge   3.93247  2.815298  -1.101402  -0.338061 0.380280  0.083205 -0.009916  0.003044 -0.000904 0.000071
Lasso   4.05652  0.000000   0.000000   0.000000 0.000000  0.050889  0.000000  0.003181  0.000000 0.000118

R-Squared

rbind(poly_rsq, ridge_rsq, lasso_rsq)
           Training    Testing
poly_rsq  0.9855016 -0.1894984
ridge_rsq 0.9616233  0.9295274
lasso_rsq 0.9378925  0.9322479

Fitted Curves

par(mfrow=c(1,3))
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Poly Regression")
lines(xg, yg_poly)
segments(x_train, y_train, x_train, y_train_pred_poly) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_poly, col="red")
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Ridge Regression")
lines(xg, yg_ridge)
segments(x_train, y_train, x_train, y_train_pred_ridge) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_ridge, col="red")
plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="LASSO Regression")
lines(xg, yg_lasso)
segments(x_train, y_train, x_train, y_train_pred_lasso) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_lasso, col="red")
par(mfrow=c(1,1))

---
title: "Lesson 27 - Lasso and Ridge Regression"
author: "Robbie Beane"
output:
  html_notebook:
    theme: flatly
    toc: yes
    toc_depth: 2
---

# Load Packages and Generate Data

```{r, message=FALSE, results=FALSE, echo=FALSE, warning=FALSE}
require(glmnet)   # Provides lasso and ridge implementations
require(reshape)
require(ggplot2)
```


```{r}
set.seed(125)
n <- 20
x <- runif(n, -4, 4)
y <- 3 + 0.5 * x + 0.01 * x**7 + rnorm(n, 0, 10)

plot(y ~ x)
```

# Create Training and Testing Sets


```{r}
sel <- sample(1:length(x), 0.7*length(x))

x_train <- x[sel]
x_test <- x[-sel]
y_train <- y[sel]
y_test <- y[-sel]

c(length(x_train), length(x_test))
```

# Fit Polynomial to Training Data

```{r}
X <- x_train
poly_mod <- lm(y_train ~ X + I(X^2) + I(X^3) + I(X^4) + I(X^5) + I(X^6) + I(X^7) + I(X^8) + I(X^9))
summary(poly_mod)
```

## Plot Fitted Curve

```{r}
xg <- seq(-4,4,length=100)
yg_poly <- predict(poly_mod, data.frame(X=xg))

y_train_pred_poly <- predict(poly_mod, data.frame(X = x_train))
y_test_pred_poly <- predict(poly_mod, data.frame(X = x_test))

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Polynomial Fit")
lines(xg, yg_poly)
segments(x_train, y_train, x_train, y_train_pred_poly) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_poly, col="red")
```

## Calculate Training and Testing r-Squared Values

```{r}
sse <- sum((y_train - y_train_pred_poly)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst

sse <- sum((y_test - y_test_pred_poly)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst

poly_rsq <- c(training_rsq, testing_rsq)
names(poly_rsq) <- c('Training', 'Testing')
poly_rsq
```



# Ridge Regression

Goal: Minimize $Loss\left(\hat\beta_0, ..., \hat\beta_p\right) = \frac{1}{n} SSE + \lambda \sum_{i=1}^{p} \hat\beta_i^2$

## Create Ridge Models for Several Values of Lambda

```{r}
Xmat <- model.matrix(y ~ x + I(x^2) + I(x^3) + I(x^4) + I(x^5) + I(x^6) + I(x^7) + I(x^8) + I(x^9))[,-1]
colnames(Xmat) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9')
Xmat_train <- Xmat[sel,]
Xmat_test <- Xmat[-sel,]

ridge_grid <- 10^(seq(6, -2, length=100))

ridge_models <- glmnet(Xmat_train, y_train, alpha = 0, lambda = ridge_grid)
rcf <- coef(ridge_models)
rcf <- as.matrix(rcf)
colnames(rcf) <- as.character(round(ridge_grid,3))
round(rcf[,c(100:97,4:1)],8)
```

## Plot Coefficient Estimates against Lambda

```{r}
ridge_cfdf <- data.frame(grid = ridge_grid, x1 = rcf[2,], x2 = rcf[3,], x3 = rcf[4,], x4 = rcf[5,], 
                         x5 = rcf[6,], x6 = rcf[7,], x7 = rcf[8,], x8 = rcf[9,], x9 = rcf[10,])
ridge_melted <- melt(ridge_cfdf, id.vars="grid")

ggplot(ridge_melted, aes(log(grid), value, col=variable)) + geom_line(size=1) +
  xlab('log(lambda)') + ylab('Coefficients') 
```

## Using Cross-Validation to Select Lambda


```{r}
set.seed(1)
ridge_cv_out <- cv.glmnet(Xmat_train, y_train, alpha = 0, grouped = FALSE, lambda = exp(seq(from = -4,to = 5,length = 100)))
plot(ridge_cv_out, xlim=c(-4,5))
```

```{r}
ridge_best_lambda <- ridge_cv_out$lambda.min
ridge_best_lambda
```

```{r}
ridge_model <- glmnet(Xmat_train, y_train, alpha = 0, lambda = ridge_best_lambda)
coef(ridge_model)
```

## Calculate Training and Testing r-Squared Values

```{r}
y_train_pred_ridge <- predict(ridge_model, Xmat_train)
sse <- sum((y_train - y_train_pred_ridge)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst

y_test_pred_ridge <- predict(ridge_model, Xmat_test)
sse <- sum((y_test - y_test_pred_ridge)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst

ridge_rsq <- c(training_rsq, testing_rsq)
names(ridge_rsq) <- c('Training', 'Testing')
ridge_rsq
```

## Plot Fitted Curve

```{r}
X_grid <- cbind(xg, xg^2, xg^3, xg^4, xg^5, xg^6, xg^7, xg^8, xg^9)
colnames(X_grid) <- c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9')
yg_ridge <- predict(ridge_model, X_grid)

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Ridge Regression")
lines(xg, yg_ridge)
segments(x_train, y_train, x_train, y_train_pred_ridge) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_ridge, col="red")
```


# LASSO Regression

Goal: Minimize $Loss\left(\hat\beta_0, ..., \hat\beta_p\right) = \frac{1}{n} SSE + \lambda \sum_{i=1}^{p} | \hat\beta_i |$

## Create Lasso Models for Several Values of Lambda

```{r}
lasso_grid <- 10^(seq(2, -2, length=100))

lasso_models <- glmnet(Xmat_train, y_train, alpha = 1, lambda = lasso_grid)
lcf <- coef(lasso_models)
lcf <- as.matrix(lcf)
colnames(lcf) <- as.character(round(lasso_grid,3))
round(lcf[,c(100:97,4:1)],8)
```

## Plot Coefficient Estimates against Lambda

```{r}
lasso_cfdf <- data.frame(grid = ridge_grid, x1 = lcf[2,], x2 = lcf[3,], x3 = lcf[4,], x4 = lcf[5,], 
                         x5 = lcf[6,], x6 = lcf[7,], x7 = lcf[8,], x8 = lcf[9,], x9 = lcf[10,])
lasso_melted <- melt(ridge_cfdf, id.vars="grid")

ggplot(lasso_melted, aes(log(grid), value, col=variable)) + geom_line(size=1) +
  xlab('log(lambda)') + ylab('Coefficients')
```

## Using Cross-Validation to Select Lambda

```{r}
set.seed(1)
lasso_cv_out <- cv.glmnet(Xmat_train, y_train, alpha = 1, grouped = FALSE)
plot(lasso_cv_out)
```

```{r}
lasso_best_lambda <- lasso_cv_out$lambda.min
lasso_best_lambda
```

```{r}
lasso_model <- glmnet(Xmat_train, y_train, alpha = 1, lambda = lasso_best_lambda)
coef(lasso_model)
```

## Calculate Training and Testing r-Squared Values


```{r}
y_train_pred_lasso <- predict(lasso_model, Xmat_train)
sse <- sum((y_train - y_train_pred_lasso)^2)
sst <- sum((y_train - mean(y_train))^2)
training_rsq <- 1 - sse / sst

y_test_pred_lasso <- predict(lasso_model, Xmat_test)
sse <- sum((y_test - y_test_pred_lasso)^2)
sst <- sum((y_test - mean(y_test))^2)
testing_rsq <- 1 - sse / sst

lasso_rsq <- c(training_rsq, testing_rsq)
names(lasso_rsq) <- c('Training', 'Testing')
lasso_rsq
```


```{r}
yg_lasso <- predict(lasso_model, X_grid)

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="LASSO Regression")
lines(xg, yg_lasso)
segments(x_train, y_train, x_train, y_train_pred_lasso) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_lasso, col="red")
```

# Comparison of Models

## Coefficient Estimates

```{r}
poly <- coef(poly_mod)
ridge <- as.matrix(t(coef(ridge_model)))
lasso <- as.matrix(t(coef(lasso_model)))
                 
names(poly) <- c('Intercept', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9' )
cf <- rbind(poly, ridge, lasso)
rownames(cf) <- c("Poly", "Ridge", "Lasso")
round(cf, 6)
```

## R-Squared

```{r}
rbind(poly_rsq, ridge_rsq, lasso_rsq)
```

## Fitted Curves

```{r}
par(mfrow=c(1,3))

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Poly Regression")
lines(xg, yg_poly)
segments(x_train, y_train, x_train, y_train_pred_poly) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_poly, col="red")

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="Ridge Regression")
lines(xg, yg_ridge)
segments(x_train, y_train, x_train, y_train_pred_ridge) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_ridge, col="red")

plot(y_train ~ x_train, ylim=c(-80, 140), xlab="x", ylab="y", main="LASSO Regression")
lines(xg, yg_lasso)
segments(x_train, y_train, x_train, y_train_pred_lasso) 
points(y_test ~ x_test, col="red")
segments(x_test, y_test, x_test, y_test_pred_lasso, col="red")

par(mfrow=c(1,1))
```


