-
Notifications
You must be signed in to change notification settings - Fork 2
/
arguments.config
106 lines (73 loc) · 3.75 KB
/
arguments.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
experiment ?= restaurants
experiment_dialogs ?=
# all dataset names to process
all_names = eval
dlg_side = user
update_canonical_flags ?= --skip
process_schemaorg_flags ?= --manual
input_bucket ?= almond_dataset
glossary_bucket ?= almond_glossary
glossary_type ?= default
# 23 languages
subset_languages = en fa it zh hr ja ko ru es sv tr hi fi fr de pl ar vi ji pt el he si ta
# 10 languages
oht_languages = ar de es fa fi it ja pl tr zh
oht_languages_plus_en = en ar de es fa fi it ja pl tr zh
marian_oht_languages = ar de es fi it pl zh
gt_oht_languages = fa ja tr
# bootstrap languages
# boot_languages = de fa fi ja tr
boot_languages = ar de es fa fi it ja pl tr zh
marian_boot_languages = de fi
gt_boot_languages = fa ja tr
missing_tsv_values = tt:phone_number tt:number
missing_manifest_languages = ar fa
missing_manifest_strings = org.schema.Hotel:Place_address_postalCode org.schema.Hotel:Hotel_address_postalCode org.schema.Restaurant:Restaurant_servesCuisine org.schema.Restaurant:Restaurant_address_streetAddress org.schema.Restaurant:Restaurant_address_postalCode org.schema.Restaurant:Restaurant_address_addressLocality
missing_manifest_entities = org.schema.Restaurant:Restaurant org.schema.Hotel:LocationFeatureSpecification
train-synthetic-expand-factor := 2
train-quoted-paraphrasing-expand-factor := 4
train-no-quote-paraphrasing-expand-factor := 3
eval-synthetic-expand-factor := 1
eval-quoted-paraphrasing-expand-factor := 1
eval-no-quote-paraphrasing-expand-factor := 1
test-synthetic-expand-factor := 1
test-quoted-paraphrasing-expand-factor := 1
test-no-quote-paraphrasing-expand-factor := 1
train_output_per_example = 1
evaltest_output_per_example = 1
eval_restaurants_oht_size = 377
eval_hotels_oht_size = 330
train-subset-param-set = 0-0.8
evaltest-subset-param-set = 0.2-1
crawl_target_size = 100
remove_duplicate_sents = true
replace_ids = true
preprocess_paraphrased = false
# for contextual datasets uncomment following lines
#contextual = --contextual
#handle-heuristics = $(handle-heuristics)
#num_columns=4
#parameter-datasets=dlg-shared-parameter-datasets.tsv
# for single-turn datasets uncomment following lines
contextual =
handle-heuristics =
num_columns=3
parameter-datasets=$(experiment)/parameter-datasets.tsv
#replace-numbers=--replace-numbers
replace-numbers=
source_language = en
target_language = it
# paraphrase
num_paraphrases = 4
cut_off = 2
#eval_oracle = --oracle
eval_oracle =
default_augment_train_hparams = --requotable --target-language dlgthingtalk $(contextual) --thingpedia $(experiment)/schema.tt --parameter-datasets $(parameter-datasets) \
--synthetic-expand-factor $(train-synthetic-expand-factor) --quoted-paraphrasing-expand-factor $(train-quoted-paraphrasing-expand-factor) --no-quote-paraphrasing-expand-factor $(train-no-quote-paraphrasing-expand-factor) \
--subset-param-set $(train-subset-param-set) --sampling-type uniform --quoted-fraction 0.0 --replace-numbers --replace-locations --parallelize $(parallel)
default_augment_evaltest_hparams = --requotable --target-language dlgthingtalk $(contextual) --thingpedia $(experiment)/schema.tt --parameter-datasets $(parameter-datasets) \
--synthetic-expand-factor 1 --quoted-paraphrasing-expand-factor 1 --no-quote-paraphrasing-expand-factor 1 \
--subset-param-set $(evaltest-subset-param-set) --sampling-type random --quoted-fraction 0.0 --replace-numbers --replace-locations --override-flags S --parallelize $(parallel)
default_translation_hparams = --cache_dir $(GENIENLP_EMBEDDINGS) --batch_size 50 --temperature 0.4 --id_column 0 --output_example_ids_too \
--input_column 1 --gold_column 1 --replace_qp --force_replace_qp \
--repetition_penalty 1.0 --num_samples 1 --skip_heuristics --att_pooling mean --task translate --return_attentions