-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathChapter 3.R
150 lines (134 loc) · 4.21 KB
/
Chapter 3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
############ Chapter 3 R Codes #################
# Simple linear regression:
rm(list = ls())
# Multiple linear regression:
# 3.2.4: ANCOVA: Qualitative and Quantitative predictors with Interaction in the Linear Model:
rm(list = ls())
library(ISLR)
attach(Advertising)
median(radio)
plot(TV, sales, pch = as.numeric(I(radio > 22.9))) #pch: 1 if radio > 22.9 & O otherwise.
legend ("topleft", inset = 0.05, legend = c("radio < 22.09", "radio > 22.0"), pch = c(0,1)) # topleft: kep legend on the lefttop; legend: name and order of legend.
title ("Sales vs. TV vs. Radio")
cat.radio = I(radio > 22.9) # categorical variable.
esam = lm(sales ~ TV + cat.radio) #set linear model.
coef(esam) #model coefficients
summarY(esam) #model summary.
abline(a = 4.828, b = 0.046, lwd = 3) # Draw a line. a = intercept, b = slope.
abline(a = 4.828 + 4.794, b = 0.046, lwd = 3, lyt = 2) # Draw a line; more: help(abline).
# Unequal solpe and intercept: Add in above plot.
usim = lm(sales ~ cat.radio + TV*cat.radio)
coef(usim)
abline(a = 7.1, b = 0.03, lwd = 3, col = 2)
abline(a = 7.1 + 0.636, b = 0.03 + 0.028, lwd = 3, col = 2, lyt = 2)
# Continuous interaction:
cim = lm(sales ~ TV + radio + TV*radio)
summary(lm)
# 3.2.5: Linear Models and non0linear relationships:
rm(list = ls())
data(Auto)
names(Auto) #get names of variables in dataset auto.
attach(Auto)
plot (horsepower, mpg)
hp.sq = horsepower*horsepower
qm.hp= lm(mpg ~ horsepower + hp.sq) #quadratic model.
summary(qm.hp)
co = coef(qm.hp) # coefficient of model.
curve(co[1] + co[2]*x + co[3]* x^2, add = T, lwd = 3, col = "red")
# 3.3 Residual assumptions:
plot(horsepower, mpg)
lm.hp = lm(mpg ~ horsepower)
abline(lm.hp)
par(mfrow = c(2,4))
plot(lm.hp, main = "Linear Model")
plot(qm.hp, main = "Quadratic Model")
# Correlated errors;
par(mfrow = c(1,1))
x = 1:50
y = cumsum(rnorm(50))
confint(lm(y~x))
plot(resid(lm(y~x)), type = "b")
abline (h = 0)
summary(lm(y~x))
# Non-constant Variance:
set.seed(1)
x = 1:100/20
y = rlnorm(length(x), x)
plot(y, x)
abline(lm(y~x))
par(mfrow = c(2,2))
plot(lm(y~x))
z = log(y)
par(mfrow = c(1,1))
plot(x,z)
lm(z~x)
par(mfrow = c(2,2))
plot(lm(z~x))
# Outliars:
set.seed(1)
x = 1:10
y = x +rnorm(length(x))
y1 = y
y1[5] = 0 #replacing 5th row in y to zero
y2 = y
y2[9] = 0 #replacing 9th row in y to zero
m1 = lm(y~x)
m2 = lm(y1~x)
m3 = lm(y2~x)
par(mfrow = c(1,3))
plot(x,y, main = "m1")
abline(m1)
plot(x, y1, main = "m2")
abline(m2)
plot(x, y2, main = "m3")
abline(m3)
#Normality:
qm.hp = lm(mpg ~horsepower + hp.sq)
par(mfrow = c(1,1))
hist(rstandard(qm.hp))
qqnorm(rstandard(qm.hp))
qqline(rstandard(qm.hp))
par(mfrow = c(2,2))
plot(qm.hp)
# Multicollinearity:
# data(package = .packages(all.available = TRUE)) #view all data available.
rm(list = ls())
data(Credit)
credit = Credit
rm(Credit)
# View(credit)
pairs(~Balance + Age + Limit + Rating)
# install.packages("car")
library(car)
vif(lm(Balance ~ Age + Limit + Rating)) #Variance inflation factors.
summary(lm(Balance ~ Age + Limit + Rating))$coeff
summary(lm(Balance ~ Age + Limit))$coeff
# 3.5: Summary:
# Main R pseudo code:
# Fitting:
my.lm(y ~ x1 + x2 + , data = my.data)
my.full.lm = lm(y ~ ., data = my.data)
my.full.lm1 = lm(y ~ . -1 , data = my.data) #remove intercept
my.full.lm2 = lm(y ~ . -x1 , data = my.data) #remove x1 variable.
summary(my.lm) #sumarizing and testing.
confint(my.lm) # Interval estimation.
predict(my.lm, newdata = data.frame(x1 = , x2 = , ....), type = )
predict(object, newdata, se.fit = FALSE, scale = NULL, df = Inf,
interval = c("none", "confidence", "prediction"),
level = 0.95, type = c("response", "terms"),
terms = NULL, na.action = na.pass,
pred.var = res.var/weights, weights = 1, ...)
# Diagnostic
plot(my.lm)
library(car) # needed to test VIF.
vif(my.lm) #Variance inflattion factor
############### Chapter 3 Book Excercises ##################
########### Homework 3 ############################
rm(list = ls()) #Clear environment.
setwd("~/Dropbox/OSU/PhD/SemVISp2021/STAT5063ML/Homeworks/hw3") #Mac.
# install.package("MASS")
library("MASS") # Load MASS package
data(Boston, package = "MASS") #Boston dataset from MASS Package.
attach(Boston) # Attach
q1 = lm(crim ~ zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + black + lstat + medv)
summary(q1)