-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathISLR05.R
98 lines (72 loc) · 2.64 KB
/
ISLR05.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# An Introduction to Statistical Learning with Applications in R
# by Gareth James, Daniela Witten, Trevor Hastie and Robert Tibshirani
# Chaper 5: Resampling Methods
# 5.3 Lab: Cross-Validation and the Bootstrap
# 5.3.1 The Validation Set Approach
library(ISLR)
set.seed(1)
train <- sample(392, 196)
lm.fit <- lm(mpg ~ horsepower, data = Auto, subset = train)
attach(Auto)
mean((mpg - predict(lm.fit,Auto))[-train]^2)
lm.fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto, subset = train)
mean((mpg - predict(lm.fit2,Auto))[-train]^2)
lm.fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)
mean((mpg - predict(lm.fit3, Auto))[-train]^2)
set.seed(2)
train <- sample(392, 196)
lm.fit<- lm(mpg ~ horsepower, subset = train)
mean((mpg - predict(lm.fit, Auto))[-train]^2)
lm.fit2 <- lm(mpg ~ poly(horsepower, 2), data = Auto, subset = train)
mean((mpg - predict(lm.fit2, Auto))[-train]^2)
lm.fit3 <- lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)
mean((mpg - predict(lm.fit3, Auto))[-train]^2)
# 5.3.2 Leave-One-Out Cross-Validation
glm.fit <- glm(mpg ~ horsepower, data = Auto)
coef(glm.fit)
lm.fit <- lm(mpg ~ horsepower, data = Auto)
coef(lm.fit)
library(boot)
glm.fit <- glm(mpg ~ horsepower, data = Auto)
cv.err <- cv.glm(Auto, glm.fit)
cv.err$delta
cv.error <- rep(0, 5)
for (i in 1:5) {
glm.fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
cv.error[i] <- cv.glm(Auto, glm.fit)$delta[1]
}
cv.error
# 5.3.3 k-Fold Cross-Validation
set.seed(17)
cv.error.10 <- rep(0, 10)
for (i in 1:10) {
glm.fit <- glm(mpg ~ poly(horsepower, i), data = Auto)
cv.error.10[i] <- cv.glm(Auto, glm.fit, K = 10)$delta[1]
}
cv.error.10
# 5.3.4 The Bootstrap
# Estimating the Accuracy of a Statistic of Interest
alpha.fn <- function(data, index) {
X <- data$X[index]
Y <- data$Y[index]
return((var(Y) - cov(X,Y))/(var(X) + var(Y) - 2*cov(X,Y)))
}
alpha.fn(Portfolio, 1:100)
set.seed(1)
alpha.fn(Portfolio, sample(100, 100, replace = TRUE))
boot(Portfolio, alpha.fn, R = 1000)
# Estimating the Accuracy of a Linear Regression Model
boot.fn <- function(data, index)
return(coef(lm(mpg ~ horsepower, data = data, subset = index)))
boot.fn(Auto, 1:392)
set.seed(1)
boot.fn(Auto, sample(392, 392, replace = TRUE))
boot.fn(Auto, sample(392, 392, replace = TRUE))
boot(Auto, boot.fn, 1000)
summary(lm(mpg ~ horsepower, data = Auto))$coef
boot.fn <- function(data, index)
coefficients(lm(mpg ~ horsepower + I(horsepower^2), data = data,
subset = index))
set.seed(1)
boot(Auto, boot.fn, 1000)
summary(lm(mpg ~ horsepower + I(horsepower^2), data = Auto))$coef