-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathset_up.r
219 lines (178 loc) · 7.73 KB
/
set_up.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
################################################################
##
## This resource is designed to introduce rstan
## as a tool for Bayesian data analysis
##
##################################################################
## Load rstan libraries
library(rstan)
library(rshinystan)
library("bayesplot")
library("ggplot2")
library(adegenet)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
## 1 Simple binomial logistic regressions
## 1.1 Running the stan model
data1 <- read_rdump("Q:\\RProjects\\Teaching_Rstan\\data_IRS.R")
stan_base <- stan(file="Q:\\RProjects\\Teaching_Rstan\\simple_binomial_regression.stan",
data=data1,
warmup=1000,
control = list(adapt_delta = 0.8,
max_treedepth = 20),
iter=2000, chains=4)
## extract the model parameters and predictive posterior draws (if run)
base <- extract(stan_base)
# Extract posterior draws for later use
posterior_cp <- as.array(stan_base)
#####################################
## 1.2 Diagnostics
## http://mc-stan.org/bayesplot/articles/visual-mcmc-diagnostics.html
## display important HMC/NUTS diagnostic information
##Simplest way is to print the model
print(stan_base)
lp_cp <- log_posterior(stan_base)
np_cp <- nuts_params(stan_base)
## Check the chain acceptance stats
## This pulls out data for the iterations on:
## the acceptance_stat (somewhere bewteen 85% and 95% is good)
## the step_size (searching for the best parameters)
## The tree dephth (which we can alter to help the fit)
## The n_leapfrog
## The divergent
## energy
## We could also use to visualise these diagnostics
stan_diag(stan_base,
information = c("sample","stepsize", "treedepth","divergence"),
chain = 0)
## identifying collinearity between variables (which manifests as narrow bivariate plots)
## as well as the presence of multiplicative non-identifiabilities (banana-like shapes)
color_scheme_set("darkgray") ##any issues will show up as red
mcmc_pairs(posterior_cp, np = np_cp, pars = c("alpha1","alpha2"))
## traceplots - time series plots of the chains converging (ideally!)
color_scheme_set("mix-brightblue-gray")
mcmc_trace(posterior_cp, pars = "alpha1", np = np_cp) +
xlab("Post-warmup iteration")
## we could also used
traceplot(stan_base, pars = c("alpha1","alpha2"), inc_warmup = TRUE)
traceplot(stan_base, pars = c("alpha1","alpha2"), inc_warmup = FALSE)
## overlaid histograms of the (centered) marginal energy distribution πE and the first-differenced distribution πΔE,
## ideally these look the same (Betancourt, 2017)
color_scheme_set("red")
mcmc_nuts_energy(np_cp)
## We could also pull out the histograms for parameters
stan_hist(stan_base, include = TRUE, unconstrain = FALSE,
inc_warmup = FALSE, pars = c("alpha1","alpha2"))
## And visualise the Rhat stat (tideally 0.99 > Rhat < 1.01)
stan_rhat(stan_base,bins = 10)
## or
rhats <- rhat(stan_base)
print(rhats)
color_scheme_set("brightblue") # see help("color_scheme_set")
mcmc_rhat(rhats)
## Checking effective sample sizes
## The effective sample size is an estimate of the number of independent
## draws from the posterior distribution of the estimand of interest
## The larger the ratio of neff to N the better (see Gelman et al. 2013, Stan Development Team 2018 for more details)
ratios_cp <- neff_ratio(stan_base)
print(ratios_cp)
mcmc_neff(ratios_cp, size = 2)
## only need worry about any neff/N less than 0.
## There are different algorithms for MCMC
## these ratios will depend not only on the model being fit but also on the particular MCMC algorithm used
## the one used by stan should give higher ratios than that used by Gibbs for example.
##Autocorrelation
mcmc_acf(posterior_cp, pars = c("alpha1","alpha2"), lags = 10)
#################################
## 1.3 Visuallising the model predictions against data
## For the binomial regression we can simply put in the equation and estimated parameters from the model runs
proportion_Y = data1$d_t/data1$n_t
x = data1$time
plot(proportion_Y ~ x, ylim = c(0,1), ylab = "Proportion of mosquitoes killed by IRS", xlab = "Time in days",
cex.axis = 1.6,cex.lab = 1.6,cex = 1.6,pch=20,col="aquamarine3")
model_predictions_1 = array(dim=c(365,4000))
time = 1:365
for(i in 1:4000){
model_predictions_1[,i] = 1/(1 + exp(-base$alpha2[i] * time - base$alpha1[i]))
}
for(i in 1:4000){
lines(model_predictions_1[,i] ~ time,col=transp("grey",0.01))
}
#######################################################
##
## 2 Binomial logistic regressions with random effects
## 2.1 Running the stan model
##
#######################################################
data2 <- read_rdump("Q:\\RProjects\\Teaching_Rstan\\data_IRS_reff.R")
stan2 <- stan(file="Q:\\RProjects\\Teaching_Rstan\\random_effects_binomial_regression.stan",
data=data2,
warmup=500,
control = list(adapt_delta = 0.9,
max_treedepth = 20),
iter=1000, chains=4)
base2 = extract(stan2)
## Adjust the plot for the random effects of different studies
time=seq(1,365,length=365)
N_IRS = data2$N_IRS
mean_valssp_checker4 = mean_valssp_checker4u = mean_valssp_checker4l =
array(dim=c(length(time), N_IRS))
for(i in 1:N_IRS){
mean_valssp_checker4[,i] = 1 / (1 + exp(-mean(base2$alpha1[,i]) - mean(base2$alpha2[,i])*time))
mean_valssp_checker4u[,i] = 1 / (1 + exp(-quantile(base2$alpha1[,i],0.9) - quantile(base2$alpha2[,i],0.9)*time))
mean_valssp_checker4l[,i] = 1 / (1 + exp(-quantile(base2$alpha1[,i],0.1) - quantile(base2$alpha2[,i],0.1)*time))
}
data2$prop_dead = data2$d_t/data2$n_t
colour_list = c("darkred","orange","aquamarine3","purple","blue","darkgreen")
for(i in 1:N_IRS){
points(data2$prop_dead[data2$IRS == i]~data2$time[data2$IRS == i],col=colour_list[i],pch=14+i,cex=1.6)
lines(mean_valssp_checker4[,i] ~ time,col=colour_list[i])
polygon(c(time,rev(time)),c(mean_valssp_checker4u[,i],rev(mean_valssp_checker4l[,i])),border=FALSE,col=transp(colour_list[i],0.3))
}
## Or you can use the generated quantities
## eg for the 1st study
for(i in 1:2000){
lines(base2$sp_ppc[i,1,] ~ time,col=transp(colour_list[1],0.01))
}
###########################################################
##
## 3.1 PKPD models example
##
###########################################################
# https://github.com/stan-dev/stancon_talks/tree/master/2017/Contributed-Talks/05_margossian/models/effCpt
RUNDATA <- read_rdump("Q:\\RProjects\\Teaching_Rstan\\template_data.R") ## Coverage data March 2019
## initial estimates will be generated randomly for each chain
init <- function(){
list(CL = exp(rnorm(1, log(10), 0.2)),
Q = exp(rnorm(1, log(20), 0.2)),
V1 = exp(rnorm(1, log(70), 0.2)),
V2 = exp(rnorm(1, log(70), 0.2)),
ka = exp(rnorm(1, log(2), 0.2)),
ke0 = exp(rnorm(1,log(1),0.2)),
EC50 = exp(rnorm(1,log(100),0.2)),
sigma = 0.5,
sigmaResp = 20)
}
## Specify the variables for which you want history plots
parametersToPlot <- c("CL", "Q", "V1", "V2", "ka", "sigma")
## Additional variables to monitor
otherRVs <- c("cObsPred")
parameters <- c(parametersToPlot, otherRVs)
parametersToPlot <- c("lp__", parametersToPlot)
nChains <- 4
nPost <- 1000 ## Number of post-warm-up samples per chain after thinning
nBurn <- 1000 ## Number of warm-up samples per chain after thinning
nThin <- 1
nIter <- (nBurn + nPost) * nThin
nBurnin <- nBurn * nThin
fit <- stan(file = "Q:\\RProjects\\Teaching_Rstan\\Template_ODE_model.stan",
data = RUNDATA,
pars = parameters,
iter = nIter,
warmup = nBurnin,
thin = nThin,
init = init,
chains = nChains,
cores = min(nChains, parallel::detectCores()))
stan_trace(fit, parametersToPlot)
print(fit)