-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovidPositiveComparison.R
267 lines (237 loc) · 16.5 KB
/
covidPositiveComparison.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# Last updated: 09-20-2021
# Author: Cong Liu
# checked version: Yes
# source("./cohortCharacterizationAndRefine.R")
breakthroughCovidPerson = breakthroughCovidRefined %>%
mutate(is_vaccinated = T) %>%
mutate(time = as.integer(
difftime(index_date, latest_dose_date, units = "days")) - 14 + 1) %>%
mutate(status = 1) %>%
dplyr::select(person_id,latest_dose_date,index_date,is_vaccinated,time,status)
nonBreakthroughPcrCovidPerson = nonBreakthroughPcrCovidRefined %>%
mutate(is_vaccinated = T) %>%
mutate(time = as.integer(
difftime(index_date, latest_dose_date,units = "days")) -14 + 1) %>%
mutate(status = 0) %>%
dplyr::select(person_id,latest_dose_date,index_date,is_vaccinated,time,status)
preVaccinePcrPositiveCovidPerson = preVaccinePcrPositiveCovidRefined %>%
mutate(is_vaccinated = F) %>%
mutate(status = 1) %>%
dplyr::select(person_id,index_date,is_vaccinated,status)
preVaccinePcrNegativeCovidPerson = preVaccinePcrNegativeCovidRefined %>%
mutate(is_vaccinated = F) %>%
mutate(status = 0) %>%
dplyr::select(person_id,index_date,is_vaccinated,status)
UnVaccinePcrPositiveCovidPerson = postVaccinePcrPositiveCovidRefined %>%
mutate(is_vaccinated = F) %>%
mutate(status = 1) %>%
mutate(time = as.integer(
difftime(index_date, entry_date,units = "days")) + 1)
UnVaccinePcrNegativeCovidPerson = postVaccinePcrNegativeCovidRefined %>%
mutate(is_vaccinated = F) %>%
mutate(status = 0) %>%
mutate(end_date = index_date) %>%
# mutate(end_date = case_when(is.na(censor_date)~index_date,TRUE~censor_date)) %>%
mutate(time = as.integer(
difftime(end_date, entry_date,units = "days")) + 1 ) %>%
dplyr::select(-end_date)
vaccinatedCohort = rbind(breakthroughCovidPerson,nonBreakthroughPcrCovidPerson)
vaccinatedCohortCov = vaccinatedCohort %>% left_join(
rbind(breakthroughCovidFeatures$demo,nonBreakthroughPcrCovidFeatures$demo)
) %>% left_join(
rbind(breakthroughCovidFeatures$obDays,nonBreakthroughPcrCovidFeatures$obDays)
) %>% left_join(
rbind(breakthroughCovidFeatures$visit,nonBreakthroughPcrCovidFeatures$visit)
) %>% left_join(
rbind(breakthroughCovidFeatures$immuno,nonBreakthroughPcrCovidFeatures$immuno) %>%
mutate(is_immunoD = T) %>%
dplyr::select(person_id,is_immunoD) %>% distinct_all()
) %>% left_join(
rbind(breakthroughCovidFeatures$rollingAvg, nonBreakthroughPcrCovidFeatures$rollingAvg)
) %>% distinct_all() %>%
mutate(age_at_index = as.integer(difftime(units = "days",index_date,DOB)/365.24)) %>%
mutate(age_category_at_index = cut_number(x = age_at_index, n = 4)) %>%
mutate(race_category = case_when((race == "White") ~ "White",
(race == "Black or African American") ~ "Black",
(race == "Asian") ~ "Asian",
TRUE ~ "Other Race or Unknown")) %>%
replace_na(list(count_of_visits = 0, is_immunoD = F, observation_days = 0,cases_avg=0, deaths_avg=0))
prevaccinatedCohort = rbind(preVaccinePcrNegativeCovidPerson,preVaccinePcrPositiveCovidPerson)
prevaccinatedCohortCov = prevaccinatedCohort %>% left_join(
rbind(preVaccinePcrPositiveCovidFeatures$demo,preVaccinePcrNegativeCovidFeatures$demo)
) %>% left_join(
rbind(preVaccinePcrPositiveCovidFeatures$obDays,preVaccinePcrNegativeCovidFeatures$obDays)
) %>% left_join(
rbind(preVaccinePcrPositiveCovidFeatures$visit,preVaccinePcrNegativeCovidFeatures$visit)
) %>% left_join(
rbind(preVaccinePcrPositiveCovidFeatures$immuno,preVaccinePcrNegativeCovidFeatures$immuno) %>%
mutate(is_immunoD = T) %>%
dplyr::select(person_id,is_immunoD) %>% distinct_all()
) %>% left_join(
rbind(preVaccinePcrPositiveCovidFeatures$rollingAvg, preVaccinePcrPositiveCovidFeatures$rollingAvg)
) %>% distinct_all() %>%
mutate(age_at_index = as.integer(difftime(units = "days",index_date,DOB)/365.24)) %>%
mutate(age_category_at_index = cut_number(x = age_at_index, n = 4)) %>%
mutate(race_category = case_when((race == "White") ~ "White",
(race == "Black or African American") ~ "Black",
(race == "Asian") ~ "Asian",
TRUE ~ "Other Race or Unknown")) %>%
replace_na(list(count_of_visits = 0, is_immunoD = F, observation_days = 0,cases_avg=0, deaths_avg=0))
#### vax vs pre-vax ####
# match
forMatchData = rbind(vaccinatedCohortCov %>% dplyr::select(-latest_dose_date,-time),prevaccinatedCohortCov)
set.seed(5)
# take a minute
matchIt = matchit(is_vaccinated ~ count_of_visits+
observation_days+gender+age_at_index+race_category+ethnicity+
is_immunoD + cases_avg, data = forMatchData, method="nearest", ratio=1)
plot(summary(matchIt))
matchItData = match.data(matchIt)[1:ncol(forMatchData)]
# oddsRatioTest(table(matchItData$status, matchItData$is_vaccinated))
# oddsratio(table(matchItData$status, matchItData$is_vaccinated))
# oddsratio(table(matchItData %>% filter(age_at_index <= 65) %>% pull(status), matchItData %>% filter(age_at_index <= 65) %>% pull(is_vaccinated)))
# oddsratio(table(matchItData %>% filter(age_at_index > 65) %>% pull(status), matchItData %>% filter(age_at_index > 65) %>% pull(is_vaccinated)))
# prevalence in vax/pre-vax
table5col1 = rbind(
t(as.matrix(matchItData %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index <= 65) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index > 65) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "MALE") %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "FEMALE") %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == T) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == F) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',]
)
table5col2 = rbind(
t(as.matrix(matchItData %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index <= 65) %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index > 65) %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "MALE") %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "FEMALE") %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == T) %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == F) %>% group_by(is_vaccinated) %>% summarise(N = sum(status))))['N',]
)
# test
# adj for covariates
table5col3 = rbind(
univarTest(forTest = matchItData,var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index <= 65),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index > 65),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "MALE"),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "FEMALE"),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == T),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == F),var = "is_vaccinated",adj = NULL,lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',]
)
# further adj.
table5col4 = rbind(
univarTest(forTest = matchItData,var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index <= 65),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index > 65),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "MALE"),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "FEMALE"),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == T),var = "is_vaccinated",adj = c("count_of_visits","observation_days","age_at_index"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == F),var = "is_vaccinated",adj = c("count_of_visits","observation_days","age_at_index"),lr = T,cox=F,poisson = F)['is_vaccinatedTRUE',]
)
# vax vs. unvax.
unVaccinatedCohort = rbind(UnVaccinePcrNegativeCovidPerson,UnVaccinePcrPositiveCovidPerson)
unVaccinatedCohortCov = unVaccinatedCohort %>% left_join(
rbind(postVaccinePcrPositiveCovidFeatures$demo,postVaccinePcrNegativeCovidFeatures$demo)
) %>% left_join(
rbind(postVaccinePcrPositiveCovidFeatures$obDays,postVaccinePcrNegativeCovidFeatures$obDays)
) %>% left_join(
rbind(postVaccinePcrPositiveCovidFeatures$visit,postVaccinePcrNegativeCovidFeatures$visit)
) %>% left_join(
rbind(postVaccinePcrPositiveCovidFeatures$immuno,postVaccinePcrNegativeCovidFeatures$immuno) %>%
mutate(is_immunoD = T) %>%
dplyr::select(person_id,is_immunoD) %>% distinct_all()
) %>% left_join(
rbind(postVaccinePcrPositiveCovidFeatures$rollingAvg, postVaccinePcrPositiveCovidFeatures$rollingAvg)
) %>% distinct_all() %>%
mutate(age_at_index = as.integer(difftime(units = "days",index_date,DOB)/365.24)) %>%
mutate(age_category_at_index = cut_number(x = age_at_index, n = 4)) %>%
mutate(race_category = case_when((race == "White") ~ "White",
(race == "Black or African American") ~ "Black",
(race == "Asian") ~ "Asian",
TRUE ~ "Other Race or Unknown")) %>%
replace_na(list(count_of_visits = 0, is_immunoD = F, observation_days = 0,cases_avg=0, deaths_avg=0))
# match
forMatchData = bind_rows(vaccinatedCohortCov ,unVaccinatedCohortCov %>% dplyr::select(-entry_date))
forMatchData = forMatchData %>% mutate(ldd_category= cut(x = index_date, "months"))
set.seed(5)
# take a minute
matchIt = matchit(is_vaccinated ~ count_of_visits+
observation_days+gender+age_at_index+race_category+ethnicity+
is_immunoD + ldd_category, data = forMatchData, method="nearest", ratio=1)
plot(summary(matchIt))
matchItData = match.data(matchIt)[1:ncol(forMatchData)]
# test
# incident rate in vax/unvax
table6col1 = rbind(
t(as.matrix(matchItData %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index <= 65) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index > 65) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "MALE") %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(gender == "FEMALE") %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == T) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == F) %>% group_by(is_vaccinated) %>% summarise(N = length(status))))['N',]
)
table6col2 = rbind(
t(as.matrix(matchItData %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index <= 65) %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(age_at_index > 65) %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(gender == "MALE") %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(gender == "FEMALE") %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == T) %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',],
t(as.matrix(matchItData %>% filter(is_immunoD == F) %>% group_by(is_vaccinated) %>% summarise(N = 1000*sum(status)/sum(time))))['N',]
)
# raw IRR
table6col3 = rbind(
univarTest(forTest = matchItData,var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index <= 65),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index > 65),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "MALE"),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "FEMALE"),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == T),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == F),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',]
)
# further adj.
table6col4 = rbind(
univarTest(forTest = matchItData,var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index <= 65),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(age_at_index > 65),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "MALE"),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(gender == "FEMALE"),var = "is_vaccinated",adj = c("count_of_visits","observation_days"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == T),var = "is_vaccinated",adj = c("count_of_visits","observation_days","age_at_index"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',],
univarTest(forTest = matchItData %>% filter(is_immunoD == F),var = "is_vaccinated",adj = c("count_of_visits","observation_days","age_at_index"),lr = F,cox=F,poisson = T)['is_vaccinatedTRUE',]
)
# by ldd
# res = NULL
# for(i in matchItData$ldd_category %>% unique()){
# r = univarTest(forTest = matchItData %>% filter(ldd_category == i ),var = "is_vaccinated",adj = NULL,lr = F,cox=F,poisson = T)
# res = rbind(res,cbind(i,r[2,1]))
# }
# res
# # further adj.
# res = NULL
# for(i in matchItData$ldd_category %>% unique()){
# r = univarTest(forTest = matchItData %>% filter(ldd_category == i ),var = "is_vaccinated",adj = c("count_of_visits","observation_days","age_at_index"),lr = F,cox=F,poisson = T)
# res = rbind(res,cbind(i,r[2,1]))
# }
# res
# # calculate raw IR.
# icn = (vaccinatedCohort %>% filter(is_vaccinated & status) %>% dim())[1]
# obsTimePerson = (matchItData %>% filter(is_vaccinated) %>% pull(time) %>% sum()/1000)
# icn/obsTimePerson
# IR
# cleanedNoVaccinatedCohortPerson = cleanedVaccinatedCohort %>% left_join(breakthroughCovidPerson) %>%
# filter(is.na(time)) %>%
# mutate(is_vaccinated = T) %>%
# mutate(status = 0)%>%
# mutate(time = as.integer(
# difftime("2021-06-30", latest_dose_date, units = "days")) - 14) %>%
# dplyr::select(person_id,latest_dose_date,index_date,is_vaccinated,time,status) # 182432.
#
# vaccinatedRawCohort = rbind(breakthroughCovidPerson,cleanedNoVaccinatedCohortPerson)
#
# icn = (vaccinatedRawCohort %>% filter(status == 1) %>% dim())[1]
# obsTimePerson = (vaccinatedRawCohort %>% pull(time) %>% sum()/1000)
# icn/obsTimePerson # incident rate by comparing positive vaccinated vs. all vaccinated