-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathrun_by_config.sh
executable file
·230 lines (205 loc) · 5.71 KB
/
run_by_config.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#!/usr/bin/env bash
#SBATCH --mem=30000
#SBATCH --gres=gpu:1
#SBATCH --time=0
#SBATCH --output=slurm_out/slurm-%j.out
emb=$2
output=$3
args="${@:4}"
# default configs
use_neg=true
max_sent_len=null
tokenizer=space
sentencizer=newline
truncate_span_loss=true
pair_ind_method=null
special_loss=false
use_attentive_span_repr=true
use_context_layer=true
batch_size=32
max_total_seq_len=1600
bucket_widths=false
max_num_sample=null
num_order=0
vocab=output/all_vocab/vocabulary/
if [[ $emb == 'glove' ]]; then
attentive_after_context=true
elif [[ $emb == 'elmo' ]]; then
attentive_after_context=false
fi
if [[ $1 == 'dp' ]]; then
task=dp
max_span_width=1
data_dir=ptb/brat
task_loss=span_pair
spans_per_word=1.0
validation_metric=dp_sp_prf_f
elif [[ $1 == 'dp_conll' ]]; then
task=dp_conll
max_span_width=1
data_dir=conll_dep_2012/brat
task_loss=span_pair
spans_per_word=1.0
validation_metric=dp_conll_sp_prf_f
elif [[ $1 == 'srl' ]]; then
task=srl
max_span_width=30
data_dir=conll_srl_2012/brat
task_loss=span-span_pair
spans_per_word=1.0
max_total_seq_len=512
validation_metric=srl_sp_prf_f
elif [[ $1 == 'ner' ]]; then
task=ner
max_span_width=10
tokenizer=spacy
data_dir=conll_ner_2003/brat
task_loss=span
spans_per_word=0.4
validation_metric=ner_s_prf_f
elif [[ $1 == 'coref' ]]; then
task=coref
max_span_width=10
sentencizer=concat
data_dir=conll_coref_2012/brat
task_loss=span-span_pair
spans_per_word=0.4
pair_ind_method=left:100
special_loss=true
max_total_seq_len=4096
validation_metric=coref_coref_2
elif [[ $1 == 'consti' ]]; then
task=consti
max_span_width=null
data_dir=ptb_consti/brat
task_loss=span
spans_per_word=1000000.0
use_attentive_span_repr=false
validation_metric=consti_bracket_evalb_f1_measure
bucket_widths=true
elif [[ $1 == 'consti_conll' ]]; then
task=consti_conll
max_span_width=null
data_dir=conll_consti_2012/brat
task_loss=span
spans_per_word=1000000.0
use_attentive_span_repr=false
validation_metric=consti_conll_bracket_evalb_f1_measure
bucket_widths=true
elif [[ $1 == 'consti_small' ]]; then
task=consti
max_span_width=10
data_dir=ptb_consti/brat
task_loss=span
spans_per_word=2.0
validation_metric=consti_bracket_evalb_f1_measure
elif [[ $1 == 'consti_conll_small' ]]; then
task=consti_conll
max_span_width=10
data_dir=conll_consti_2012/brat
task_loss=span
spans_per_word=2.0
validation_metric=consti_conll_bracket_evalb_f1_measure
elif [[ $1 == 'rc' ]]; then
task=rc
max_span_width=5
tokenizer=spacy
data_dir=semeval_2010_task8/brat
task_loss=span-span_pair
spans_per_word=5
validation_metric=rc_sp_prf_r
elif [[ $1 == 'oie' ]]; then
task=oie
max_span_width=30
data_dir=openie/brat
task_loss=span-span_pair
spans_per_word=0.8
max_total_seq_len=512
validation_metric=oie_sp_prf_f
elif [[ $1 == 'pos' ]]; then
task=pos
use_neg=false
max_span_width=1
data_dir=ptb_pos/brat
task_loss=span
truncate_span_loss=false
spans_per_word=0
max_total_seq_len=1000000
validation_metric=pos_s_acc
elif [[ $1 == 'pos_conll' ]]; then
task=pos_conll
use_neg=false
max_span_width=1
data_dir=conll_pos_2012/brat
task_loss=span
truncate_span_loss=false
spans_per_word=0
max_total_seq_len=1000000
validation_metric=pos_conll_s_acc
elif [[ $1 == 'wlp' ]]; then
task=wlp
max_span_width=10
tokenizer=spacy
data_dir=wlp/WLP-Dataset
task_loss=span-span_pair
spans_per_word=0.6
validation_metric=wlp_sp_prf_f
elif [[ $1 == 'semeval14_st2' ]]; then
task=semeval14_st2
max_span_width=10
tokenizer=spacy
data_dir=semeval_2014/brat
task_loss=span-span_pair
spans_per_word=0.4
validation_metric=semeval14_st2_s_prf_f
elif [[ $1 == 'orl' ]]; then
task=orl
max_span_width=30
data_dir=mpqa/brat
task_loss=span-span_pair
spans_per_word=0.3
max_total_seq_len=512
validation_metric=orl_sp_prf_f
elif [[ $1 == 'orl_gold' ]]; then
task=orl
max_span_width=30
data_dir=mpqa/brat
task_loss=span-span_pair
pair_ind_method=gold_predicate
spans_per_word=1.0
max_total_seq_len=512
validation_metric=orl_sp_prf_f
fi
temp_file=$(mktemp)
# build json
jsonnet \
--ext-str task=${task} \
--ext-code use_neg=${use_neg} \
--ext-code max_span_width=${max_span_width} \
--ext-code max_sent_len=${max_sent_len} \
--ext-str tokenizer=${tokenizer} \
--ext-str sentencizer=${sentencizer} \
--ext-str data_dir=${data_dir} \
--ext-str task_loss=${task_loss} \
--ext-code truncate_span_loss=${truncate_span_loss} \
--ext-str spans_per_word=${spans_per_word} \
--ext-str pair_ind_method=${pair_ind_method} \
--ext-code special_loss=${special_loss} \
--ext-code use_attentive_span_repr=${use_attentive_span_repr} \
--ext-code batch_size=${batch_size} \
--ext-code max_total_seq_len=${max_total_seq_len} \
--ext-str validation_metric=${validation_metric} \
--ext-code bucket_widths=${bucket_widths} \
--ext-code attentive_after_context=${attentive_after_context} \
--ext-code use_context_layer=${use_context_layer} \
--ext-code max_num_sample=${max_num_sample} \
--ext-code num_order=${num_order} \
--ext-str vocab=${vocab} \
training_config/template/${emb}.jsonnet > ${temp_file}
echo "write config to" ${temp_file}
cat ${temp_file}
if [[ ${vocab} == "null" ]]; then
allennlp make-vocab ${temp_file} -s ${output} --include-package brat_multitask ${args}
else
allennlp train ${temp_file} -s ${output} --include-package brat_multitask ${args}
fi