df <- read.table("studytime.txt", sep="\t", header=TRUE)
summary(df)
     Passed         Hours      
 Min.   :0.00   Min.   : 50.0  
 1st Qu.:0.00   1st Qu.:116.2  
 Median :1.00   Median :189.0  
 Mean   :0.56   Mean   :178.2  
 3rd Qu.:1.00   3rd Qu.:241.5  
 Max.   :1.00   Max.   :299.0  
plot(Passed ~ Hours, df)
We will covert Passed to a factor variable, and will plot the data again.
df$Passed <- factor(df$Passed)
summary(df)
 Passed     Hours      
 0:44   Min.   : 50.0  
 1:56   1st Qu.:116.2  
        Median :189.0  
        Mean   :178.2  
        3rd Qu.:241.5  
        Max.   :299.0  
plot(Passed ~ Hours, df)
plot(Hours ~ Passed, df)
# Create Logistic Regression Model 
m1 <- glm(Passed ~ Hours, data=df, family=binomial(link="logit"))
# Create new data consisting of vector of study times
newdata = data.frame(Hours = c(50, 100, 150, 200, 250, 300))
# Create predictions
logodds <- predict(m1, newdata)     # This produces a prediction of log(odds)
logodds
         1          2          3          4          5          6 
-2.2020131 -1.2189354 -0.2358576  0.7472202  1.7302980  2.7133758 
odds <- exp(logodds)                # Prediction of odds
odds
         1          2          3          4          5          6 
 0.1105803  0.2955447  0.7898932  2.1111234  5.6423351 15.0800968 
odds / (1 + odds)             # Prediction of probabilities
         1          2          3          4          5          6 
0.09956985 0.22812386 0.44130744 0.67857270 0.84945053 0.93781132 
predict(m1, newdata, type="response")
         1          2          3          4          5          6 
0.09956985 0.22812386 0.44130744 0.67857270 0.84945053 0.93781132 
df <- read.table("admissions.txt", sep="\t", header=TRUE)
summary(df)
     admit             gre             gpa             rank      
 Min.   :0.0000   Min.   :220.0   Min.   :2.260   Min.   :1.000  
 1st Qu.:0.0000   1st Qu.:520.0   1st Qu.:3.130   1st Qu.:2.000  
 Median :0.0000   Median :580.0   Median :3.395   Median :2.000  
 Mean   :0.3175   Mean   :587.7   Mean   :3.390   Mean   :2.485  
 3rd Qu.:1.0000   3rd Qu.:660.0   3rd Qu.:3.670   3rd Qu.:3.000  
 Max.   :1.0000   Max.   :800.0   Max.   :4.000   Max.   :4.000  
df$admit = factor(df$admit)
df$rank = factor(df$rank)
summary(df)
 admit        gre             gpa        rank   
 0:273   Min.   :220.0   Min.   :2.260   1: 61  
 1:127   1st Qu.:520.0   1st Qu.:3.130   2:151  
         Median :580.0   Median :3.395   3:121  
         Mean   :587.7   Mean   :3.390   4: 67  
         3rd Qu.:660.0   3rd Qu.:3.670          
         Max.   :800.0   Max.   :4.000          
plot(gre ~ admit, df)
plot(gpa ~ admit, df)
plot(rank ~ admit, df)
# Create Logistic Regression Model 
m2 <- glm(admit ~ gre + gpa + rank, data=df, family=binomial(link="logit"))
summary(m2)
Call:
glm(formula = admit ~ gre + gpa + rank, family = binomial(link = "logit"), 
    data = df)
Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6268  -0.8662  -0.6388   1.1490   2.0790  
Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.989979   1.139951  -3.500 0.000465 ***
gre          0.002264   0.001094   2.070 0.038465 *  
gpa          0.804038   0.331819   2.423 0.015388 *  
rank2       -0.675443   0.316490  -2.134 0.032829 *  
rank3       -1.340204   0.345306  -3.881 0.000104 ***
rank4       -1.551464   0.417832  -3.713 0.000205 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
    Null deviance: 499.98  on 399  degrees of freedom
Residual deviance: 458.52  on 394  degrees of freedom
AIC: 470.52
Number of Fisher Scoring iterations: 4
nd = data.frame( gre = c(720, 720, 720, 720, 550, 550, 550, 550),
                 gpa = c(3.2, 3.2, 3.2, 3.2, 3.8, 3.8, 3.8, 3.8),
                 rank = c('1', '2', '3', '4', '1', '2', '3', '4') )
predict(m2, nd, type="response")
        1         2         3         4         5         6         7         8 
0.5531306 0.3864841 0.2447380 0.2078168 0.5770800 0.4098356 0.2631993 0.2243201 
logodds <- predict(m2, nd)
odds <- exp(logodds)
prob <- odds / (1 + odds)
prob
        1         2         3         4         5         6         7         8 
0.5531306 0.3864841 0.2447380 0.2078168 0.5770800 0.4098356 0.2631993 0.2243201