-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathTianjinSI_VinkWallinga_CC.R
169 lines (134 loc) · 7.65 KB
/
TianjinSI_VinkWallinga_CC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# WEB APPENDIX 2 from Vinkwallinga 2014
library(fdrtool)
#Note that you have to define your data in vector format
#data<-c()
#see Web Appendix 3 for simulated data and the data sets from the manuscript
#E-step
serial_mix_est = function(data, N, startmu=mean(data), startsig=sd(data)) {
sigma=startsig
mu=startmu
#function for weighted variance, to be used in E-step
#weighted variance
weighted.var <- function(x, w, na.rm = FALSE) {
if (na.rm) {
w <- w[i <- !is.na(x)]
x <- x[i]
}
sum.w <- sum(w)
(sum.w*(sum(w*(x-weighted.mean(x,w))^2))) / (sum.w^2 - sum(w^2))
}
#mixture with 7 components
#we split the folded normal distribution for the PS, PT and PQ route into two parts
#component 1: CP route
#component 2+3: PS route
#component 4+5: PT route
#component 6+7: PQ route
#convolution of the triangular distribution with the mixture component density
#contiunous case. This is bc we have only day of onset, not exact time.
# coprimary transmission: both infected by the same primary case. |x2-x1| -> halfnormal
f10<-function(x,sigma) (2-2*x)*dhalfnorm(x,theta=sqrt(pi/2)/(sqrt(2)*sigma))
f1lower<-function(x,r,sigma) (x-r+1)*dhalfnorm(x,theta=sqrt(pi/2)/(sqrt(2)*sigma))
f1upper<-function(x,r,sigma) (r+1-x)*dhalfnorm(x,theta=sqrt(pi/2)/(sqrt(2)*sigma))
# primary - secondary. here it follows a normal distribution
f20<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=mu,sd=sigma)
f2lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=mu,sd=sigma)
f2upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=mu,sd=sigma)
f30<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=-mu,sd=sigma)
f3lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=-mu,sd=sigma)
f3upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=-mu,sd=sigma)
# Primary-tertiary transmission, f3. Household member 1 infects person 2, and person 2 infects person 3. The time between symptom onset of person 1 and that of person 2 is X1. The time between symptom onset of person 2 and person 3 is X2. The observed ICC interval between person 1 and person 3 follows the distribution of |X1 + X2|, a folded normal distribution with parameters 2μ and 2σ2.
f40<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=2*mu,sd=sqrt(2)*sigma)
f4lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=2*mu,sd=sqrt(2)*sigma)
f4upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=2*mu,sd=sqrt(2)*sigma)
f50<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=-2*mu,sd=sqrt(2)*sigma)
f5lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=-2*mu,sd=sqrt(2)*sigma)
f5upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=-2*mu,sd=sqrt(2)*sigma)
# Primary-quaternary transmission, f4. Household member 1 infects person 2, person 2 infects person 3, and person 3 infects person 4. The observed interval between symptom onset of person 1 and that of person 4 follows the distribution of |X1 + X2 + X3|, a folded normal distribution with parameters 3μ and 3σ2.
f60<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=3*mu,sd=sqrt(3)*sigma)
f6lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=3*mu,sd=sqrt(3)*sigma)
f6upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=3*mu,sd=sqrt(3)*sigma)
f70<-function(x,mu,sigma) (2-2*x)*dnorm(x,mean=-3*mu,sd=sqrt(3)*sigma)
f7lower<-function(x,r,mu,sigma) (x-r+1)*dnorm(x,mean=-3*mu,sd=sqrt(3)*sigma)
f7upper<-function(x,r,mu,sigma) (r+1-x)*dnorm(x,mean=-3*mu,sd=sqrt(3)*sigma)
#discretization
p10<-function(d,sigma) integrate(f = f10, lower = d, upper = (d+1), sigma = sigma)
p1lower<-function(d,sigma) integrate(f=f1lower, lower=(d-1), upper=d, r = d, sigma = sigma)
p1upper<-function(d,sigma) integrate(f=f1upper, lower=d, upper=(d+1), r = d, sigma = sigma)
p20<-function(d,mu,sigma) integrate(f = f20, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p2lower<-function(d,mu,sigma) integrate(f=f2lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p2upper<-function(d,mu,sigma) integrate(f=f2upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
p30<-function(d,mu,sigma) integrate(f = f30, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p3lower<-function(d,mu,sigma) integrate(f=f3lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p3upper<-function(d,mu,sigma) integrate(f=f3upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
p40<-function(d,mu,sigma) integrate(f = f40, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p4lower<-function(d,mu,sigma) integrate(f=f4lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p4upper<-function(d,mu,sigma) integrate(f=f4upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
p50<-function(d,mu,sigma) integrate(f = f50, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p5lower<-function(d,mu,sigma) integrate(f=f5lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p5upper<-function(d,mu,sigma) integrate(f=f5upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
p60<-function(d,mu,sigma) integrate(f = f60, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p6lower<-function(d,mu,sigma) integrate(f=f6lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p6upper<-function(d,mu,sigma) integrate(f=f6upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
p70<-function(d,mu,sigma) integrate(f = f70, lower = d, upper = (d+1), mu = mu, sigma = sigma)
p7lower<-function(d,mu,sigma) integrate(f=f7lower, lower=(d-1), upper=d, r = d, mu = mu, sigma = sigma)
p7upper<-function(d,mu,sigma) integrate(f=f7upper, lower=d, upper=(d+1), r = d, mu = mu, sigma = sigma)
j<-length(data)
#EM algoritme:
tau1<-numeric(j) #mixture weights for (coprimary, coprimary) pairs
tau2<-numeric(j) #mixture weights for (primary, secondary) pairs
tau3<-numeric(j) #mixture weights for (secondary, primary) pairs
tau4<-numeric(j) #mixture weights for (primary, tertiary) pairs
tau5<-numeric(j) #mixture weights for (tertiary, primary) pairs
tau6<-numeric(j) #mixture weights for (primary, quaternary) pairs
tau7<-numeric(j) #mixture weights for (quaternary, primary) pairs
#plausible starting values/educated guess
# startmu<-mean(data)
# startsigma<-sd(data)
#number of iterations
#calculate the absolute probability of interval belonging to a component
for(k in 1:N){
for(l in 1:j){
if(data[l]==0){
d1<-p10(data[l],sigma)[[1]]
d2<-p20(data[l],mu,sigma)[[1]]
d3<-p30(data[l],mu,sigma)[[1]]
d4<-p40(data[l],mu,sigma)[[1]]
d5<-p50(data[l],mu,sigma)[[1]]
d6<-p60(data[l],mu,sigma)[[1]]
d7<-p70(data[l],mu,sigma)[[1]]
}
else{
d1<-p1lower(data[l],sigma)[[1]]+p1upper(data[l],sigma)[[1]]
d2<-p2lower(data[l],mu,sigma)[[1]]+p2upper(data[l],mu,sigma)[[1]]
d3<-p3lower(data[l],mu,sigma)[[1]]+p3upper(data[l],mu,sigma)[[1]]
d4<-p4lower(data[l],mu,sigma)[[1]]+p4upper(data[l],mu,sigma)[[1]]
d5<-p5lower(data[l],mu,sigma)[[1]]+p5upper(data[l],mu,sigma)[[1]]
d6<-p6lower(data[l],mu,sigma)[[1]]+p6upper(data[l],mu,sigma)[[1]]
d7<-p7lower(data[l],mu,sigma)[[1]]+p7upper(data[l],mu,sigma)[[1]]
}
#then calculate the relative probability of a data point to belong to one of the components
dummy<-d1+d2+d3+d4+d5+d6+d7
tau1[l]<-d1/dummy
tau2[l]<-d2/dummy
tau3[l]<-d3/dummy
tau4[l]<-d4/dummy
tau5[l]<-d5/dummy
tau6[l]<-d6/dummy
tau7[l]<-d7/dummy
}
#now calculate the weights for each of the components
w1<-sum(tau1)/j
w2<-sum(tau2)/j
w3<-sum(tau3)/j
w4<-sum(tau4)/j
w5<-sum(tau5)/j
w6<-sum(tau6)/j
w7<-sum(tau7)/j
#M-step
#estimates for the mean and standard deviation of the primary-secondary infection component can #be calculated directly
mu<-weighted.mean(data,tau2)
sigma<-sqrt(weighted.var(data, tau2))
print(c(mu,sigma))
}
return(c(mu,sigma))
}