-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplayWithDistributions.R
167 lines (142 loc) · 4.97 KB
/
playWithDistributions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# Meta -----
# Play with different distributions
# code by: [email protected] (@dataknut)
# Functions ----
checkFolder <- function(name) { #checks if folder exists and if not, creates it
if(dir.exists(plotsf)) {
print(paste0(plotsf, " folder exists, no need to create"))
} else {
print(paste0(plotsf, " folder does not exist, creating"))
dir.create(plotsf)
}
}
# Housekeeping ----
# Working directory is:
getwd()
# change the working directory to where you want to save results (the plots)
setwd("~/UoS One Drive/PG/Southampton/FEEG6025 Data Analysis & Experimental Methods for Engineers/Week 3")
# Set name of folder for storing plots (saves re-typing each time we save a plot)
plotsf <- "plots"
# Use the checkFolder functon to see if the plots folder exists, if not create it so we can save the plots
checkFolder(plotsf)
# clear the workspace
rm(list=ls())
# create a useful separator
sep <- "\n################\n"
# Check working directory is:
getwd()
# print the separator
cat(sep)
# Testing Normal with a mean of 500 and standard deviation of 100 ----
normTest <- rnorm(1000, mean = 500, sd = 100)
summary(normTest)
# if you want to force the x axis to have a specific range a - b, use the option xlim = c(a,b)
hist(normTest)
dev.copy(png,"plots/normTestHist.png")
dev.off()
# use a qnormal plot to test for normality
# R lets us add a theoretical line to the plot for comparison
qqnorm(normTest, main = "Q-Q plot for normal distribution")
qqline(normTest, col = 2)
dev.copy(png,"plots/normTestQQ.png")
dev.off()
# now test for normality using the Shapiro-Wilks test
shapiro.test(normTest)
# a p > 0.05 indicates the distribution is normal
# Create a +ve skewed version by force ----
normTestPSkew <- normTest[normTest > mean(normTest)] # values > than the mean
summary(normTestPSkew)
hist(normTestPSkew, main = "Example of positive skew")
dev.copy(png,"plots/normTestPSkewHist.png")
dev.off()
# use a qnormal plot to test for normality
qqnorm(normTestPSkew, main = "Q-Q plot for positive skew")
qqline(normTestPSkew, col = 2)
dev.copy(png,"plots/normTestPSkewQQ.png")
dev.off()
# now test for normality using the Shapiro-Wilks test
shapiro.test(normTestPSkew)
# is the p value > 0.05 now?
# to test for skew need
# packages.install("e1071")
# or
# packages.install("moments")
# library(e1071)
# skewness(normTestPSkew)
# Create a -ve skewed version by force ----
normTestNSkew <- normTest[normTest < mean(normTest)] # values < than the mean
summary(normTestNSkew)
hist(normTestNSkew, main = "Example of negative skew")
dev.copy(png,"plots/normTestNSkewHist.png")
dev.off()
# use a qnormal plot to test for normality
qqnorm(normTestNSkew, main = "Q-Q plot for negative skew")
qqline(normTestNSkew, col = 2)
dev.copy(png,"plots/normTestNSkewQQ.png")
dev.off()
# skewness(normTestNSkew)
cat(sep)
# Testing Poisson, lambda = average number of events per unit time ----
poisTest <- rpois(1000, lambda = 3)
summary(poisTest)
hist(poisTest)
# Save it into the working directory.
# Seriously, this is what you have to do.
# When we start using ggplot instead of plot things will get better.
# Things can only get better.
dev.copy(png,"plots/poisTestHist.png")
dev.off()
cat(sep)
# Testing Binomial ----
binomTest <- rbinom(1000, 20, 0.2)
hist(binomTest)
summary(binomTest)
dev.copy(png,"plots/binomTestHist.png")
dev.off()
cat(sep)
# Testing Exponential ----
expTest <- rexp(1000, 1)
hist(expTest)
summary(expTest)
dev.copy(png,"plots/expTestHist.png")
dev.off()
cat(sep)
# Testing standard normal - R defaults to mean = 0 & sd = 1 if you don't specifiy ----
stNormTest <- rnorm(1000)
summary(stNormTest)
hist(stNormTest)
# add lower & upper 95% lines
abline(v = c(-1.96,1.96), col="red")
# add text labels
text(-1.96,100, "-1.96", col="red")
text(1.96,100, "1.96", col="red")
text(-1.96, 180, pos = "4", "<----------", col = "red")
text(1.96, 180, pos = "2", "---------->", col = "red")
text(0, 180, "XX% of the distribution is here", col = "red", pos = 3) # how to make background a different colour?
# save it
dev.copy(png,"plots/stNormTestHist.png")
dev.off()
cat(sep)
# Calculating confidence intervals using the initial normal distribution ----
m <- mean(normTest)
s <- sd(normTest)
n <- length(normTest)
se <- s/sqrt(n)
error <- qnorm(0.975)*se
normTestCIu <- m + error
normTestCIl <- m - error
hist(normTest, sub = "Red line = mean, green lines = 95% CI of the mean - not central 95% of the distribution")
# add the mean
abline(v = m, col="red")
# add lower & upper 95% lines
abline(v = c(normTestCIl,normTestCIu), col="green")
# save it
dev.copy(png,"plots/NormTestHistCI.png")
dev.off()
# tell the user the answer rounding the answer to 3 decimal places
print(paste0("normTest upper 95% CI: ", round(normTestCIu, 3)))
print(paste0("normTest mean: ", round(m, 3)))
print(paste0("normTest lower 95% CI: ", round(normTestCIl, 3)))
# now go back to the top and change the value of 1000 to 10,000 in:
# normTest <- rnorm(1000, mean = 500, sd = 100)
# then re-run the whole script - what do you notice about the new 95% CIs?