This repository has been archived by the owner on Sep 24, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathipachecksetup.ado
318 lines (265 loc) · 10.3 KB
/
ipachecksetup.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
*! Version 1.0.0 Ishmail Azindoo Baako (IPA) 24feb2018
version 13.0
program define ipachecksetup
#d;
syntax using/, template(string) outfile(string) [osp(real -666) REFusal(real -888) DONTKnow(real -999) replace long wide]
;
#d cr
qui {
* tempfiles
tempfile _choices _survey
* import choices data
import excel using "`using'", sheet("choices") first allstr clear
drop if missing(value)
* save choices
save `_choices', replace
* import survey
import excel using "`using'", sheet("survey") firstrow allstr clear
drop if missing(type) | regexm(disabled, "[Yy][Es][Ss]")
save `_survey'
* check if form includes repeat groups and ask user to specify long or wide option
if "`long'`wide'" == "" {
cap assert !regexm(type, "begin repeat|end repeat")
if _rc {
disp as err "must specify either long or wide option. XLS form contains repeat groups"
exit 198
}
}
* check that both long and wide formats are not specified
if "`long'" ~= "" & "`wide'" ~= "" {
disp as err "options long and wide are mutually exclusive"
exit 198
}
* Mark beginning and end of groups and repeats
count if regexm(type, "group|repeat")
gen grp_var = .
gen rpt_grp_var = .
gen begin_row = .
gen begin_fieldname = ""
gen end_row = .
gen end_fieldname = ""
if `r(N)' > 0 {
* generate _n to mark groups
gen _sn = _n
* get the name of all begin groups|repeat and check if the name if their pairs match
levelsof _sn if (regexm(type, "^(begin)") & regexm(type, "group|repeat")), ///
loc (_sns) clean
count if (regexm(type, "^(begin)") & regexm(type, "group|repeat"))
loc b_cnt `r(N)'
count if (regexm(type, "^(end)") & regexm(type, "group|repeat"))
loc e_cnt `r(N)'
if `b_cnt' ~= `e_cnt' {
di as err "Error in XLS form: There are `b_cnt' begin types and `e_cnt' end types"
exit 198
}
foreach _sn in `_sns' {
if regexm(type[`_sn'], "group") loc gtype "grp"
else loc gtype "rpt_grp"
loc b 1
loc e 0
loc curr_sn `_sn'
loc stop 0
while `stop' == 0 {
loc ++curr_sn
cap assert regexm(type, "^(end)") & regexm(type, "group|repeat") in `curr_sn'
if !_rc {
loc ++e
if `b' == `e' {
loc end `curr_sn'
loc stop 1
}
}
else {
if "`gtype'" == "grp" replace grp_var = 1 in `curr_sn'
if "`gtype'" == "rpt_grp" replace rpt_grp_var = 1 in `curr_sn'
cap assert regexm(type, "^(begin)") & regexm(type, "group|repeat") in `curr_sn'
if !_rc loc ++b
}
}
replace begin_row = _sn[`_sn'] in `_sn'
replace begin_fieldname = name[`_sn'] in `_sn'
replace end_row = _sn[`end'] in `_sn'
replace end_fieldname = name[`end'] in `_sn'
}
replace grp_var = 0 if missing(grp_var)
replace rpt_grp_var = 0 if missing(rpt_grp_var)
replace name = subinstr(name, "*", "", .) if (regexm(type, "^(begin)") & regexm(type, "group|repeat")) in `curr_sn'
}
* Check form for repeat groups and mark all repeat group variables
* drop all repeat variables if long option is used
if "`long'" ~= "" {
drop if rpt_grp_var
replace _sn = _n
}
* include a wild card in repeat var names if option excluderepeats is not used
if "`wide'" ~= "" replace name = name + "*" if rpt_grp_var
save `_survey', replace
noi disp
noi disp "Prefilling HFC Inputs ..."
copy "`template'" "`outfile'", replace
* 04. no miss
* Find variables to be added to no miss
drop if inlist(type, "deviceid", "subscriberid", "simserial", "phonenumber", "username", "caseid")
* drop all variables in groups and repeats that have relevance expressions
loc repeat 9
while `repeat' == 9 {
gen n = _n
levelsof name if !missing(relevance) & regexm(type, "begin"), ///
loc (variables) clean
loc variable: word 1 of `variables'
levelsof n if name == "`variable'", loc (indexes)
loc start: word 1 of `indexes'
loc end: word 2 of `indexes'
drop in `start'/`end'
cap assert missing(relevance) if regexm(type, "begin")
loc repeat `=_rc'
drop n
}
* keep only required or scto always generated vars
replace required = lower(required)
keep if regexm(required, "[Yy][Ee][Ss]") | inlist(name, "starttime", "endtime", "duration")
* drop all notes and fields with no relevance
drop if type == "note" | !missing(relevance)
* export variables to nomiss sheet. The first 2 cells will already contain key and skey
export excel name label using "`outfile'", ///
sheet("4. no miss") sheetmodify cell(A2)
noi disp "... 4. no miss complete"
* 06. logic
use `_survey', clear
* Add group|repeat relevance to individual field within groups
gen if_condition = ""
replace relevance = subinstr(relevance, "$", "", .) if !missing(relevance)
levelsof _sn if !missing(relevance) & regexm(type, "begin group|begin repeat"), ///
loc (groups) clean
foreach group in `groups' {
gen n = _n
loc start = _sn[`group']
loc end = _sn[`group']
loc relevance = relevance[`start']
replace if_condition = if_condition + "\(" + "`relevance'" + ")" in `start'/`end'
drop n
}
* drop all field without relevance
drop if missing(relevance) | type == "note" | regexm(type, "group|repeat")
* to cater for no spaces in programming, add white space to either side of =
* trim excess whitespace, change = to ==
foreach var of varlist relevance if_condition {
replace `var' = trim(itrim(subinstr(`var', "=", " # ", .)))
replace `var' = subinstr(`var', "'", char(34), .)
replace `var' = subinstr(`var', "> # ", ">= ", .)
replace `var' = subinstr(`var', "< # ", "<= ", .)
replace `var' = subinstr(`var', "! # ", "!= ", .)
replace `var' = subinstr(`var', "{", "", .)
replace `var' = subinstr(`var', "}", "", .)
replace `var' = subinstr(`var', " and ", " & ", .)
replace `var' = subinstr(`var', " or ", " | ", .)
replace `var' = subinstr(`var', "\(", "", 1)
replace `var' = subinstr(`var', "\", " & ", .)
replace `var' = subinstr(`var', "\", " & ", .)
replace `var' = subinstr(`var', "not(", "!(", .)
replace `var' = subinstr(`var', ")", "", 1) if strpos(`var', "(") == 0 ///
| (strpos(`var', "(") > strpos(`var', ")"))
loc repeat = 9
while `repeat' == 9 {
gen sub = substr(`var', (strpos(`var', "#") + 2), 1)
replace `var' = subinstr(`var', "#", "==", 1) if ///
regexm(sub, "[0-9]|[-]") | regexm(sub, char(34))
replace `var' = subinstr(`var', "#", "=", 1) if ///
regexm(sub, "[a-zA-Z]") | regexm(sub, char(34))
cap assert !regexm(`var', "#")
loc repeat `=_rc'
drop sub
}
* change selected and selected-at with regexm
replace `var' = subinstr(`var', "count-selected", "wordcount", .)
replace `var' = subinstr(`var', "selected-at", "regexm", .)
replace `var' = subinstr(`var', "selected", "regexm", .)
}
* add relevance to if condition
replace if_condition = if_condition + " & (" + relevance + ")" if !missing(if_condition)
replace if_condition = relevance if missing(if_condition)
* generate assertion. Assert for non-missing in all. Manual edits will be needed for addional
* assertions required
gen assertion = name + " == ." if regexm(type, "integer|select_one")
replace assertion = "!missing(" + name + ")" ///
if missing(assertion) & ///
!inlist(type, "begin group", "end group", "begin repeat", "end repeat")
* export variables to skip sheet.
export excel name label assertion if_condition using "`outfile'", sheet("6. logic") sheetmodify cell(A2)
noi disp "... 6. logic"
* 8. constraints
use `_survey', clear
keep type name label constraint
* keep only fields with contraints
keep if !missing(constraint)
* export variable names, label and constraints to first column A
if `=_N' > 0 {
export excel name label constraint using "`outfile'", ///
sheet("8. constraints") sheetmodify cell(A2)
noi disp "... 8. constraint complete"
}
* 9. specify
use `_survey', clear
keep type name relevance
keep if regexm(relevance, "`osp'") & !regexm(type, "begin") & type == "text"
if `=_N' > 0 {
* rename name child and keep only needed variables
ren (name) (child)
keep child relevance
* generate parent
replace relevance = trim(itrim(relevance))
gen parent = substr(relevance, strpos(relevance, "$") + 2, strpos(relevance, "}") - strpos(relevance, "$") - 2)
* Export child and parent variables
export excel child parent using "`outfile'", sheet("9. specify") sheetmodify cell(A2)
noi disp "... 9. specify complete"
}
* 11. outliers
use `_survey', clear
keep type label name
* keep only integer and decimal fields
keep if type == "decimal" | type == "integer"
* Export variable names and multiplier
if `=_N' > 0 {
export excel name label using "`outfile'", sheet("11. outliers") sheetmodify cell(A2)
noi disp "... 11. outliers complete"
}
* enumdb
* Import choices
use `_choices', clear
* keep only list_name and value fields
keep list_name value
* get names of list_names with rf | dk
levelsof list_name if value == "`refusal'" | value == "`dontknow'", loc (dkrf_opts)
* Import survey
use `_survey', clear
keep type name
* Drop group names
drop if regexm(type, "group")
* Loop through and mark variables with dk ref opts
gen dkref_var = 0
foreach opt in `dkrf_opts' {
replace dkref_var = 1 if regexm(type, "`opt'")
}
* keep only dkref vars and text fields
keep if dkref_var == 1 | type == "text"
* Export dk and refusal vars
export excel name using "`outfile'", sheet("enumdb") sheetmodify cell(A2)
* export missing rate
use `_survey', clear
gen include_grp = 0
replace relevance = subinstr(relevance, "$", "", .) if !missing(relevance)
levelsof _sn if !missing(relevance) & regexm(type, "begin group|begin repeat"), ///
loc (groups) clean
foreach group in `groups' {
loc start = _sn[`group']
loc end = _sn[`group']
replace include_grp = 1 in `start'/`end'
}
keep if include_grp | (!missing(relevance) & regexm(type, "note|begin group|end group"))
* Export missing var rate and refusal vars
export excel name using "`outfile'", sheet("enumdb") sheetmodify cell(B2)
* export other specify
use `_survey', clear
export excel name using "`outfile'" if regexm(relevance, "`osp'"), sheet("enumdb") sheetmodify cell(D2)
}
end