-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig_version_control.toml
77 lines (69 loc) · 2.39 KB
/
config_version_control.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Where to find latexml in the local system
# Always do config.toml for your local settings
[executables]
LATEXML_BIN="/usr/bin/site_perl/latexml"
# Maximum time allowed for LaTeXML to run on a single file in seconds
MAXT=4800
[classif-lstm]
'batch_size'= 5000
'glob_data_source'= '/training_defs/math*/*.xml.gz'
'TVT_split' = 0.8 ## Train Validation Test split
'max_seq_len'= 400 # Length of padding and input of Embedding layer
'promath_dir'= 'promath' # name of dir with the processed arXiv tar files
#'save_path': 'glossary/test_lstm', #Path to save the positive results
'min_words'= 15 # min number of words for paragraphs to be considered
'model_type'= 'lstm' # options are lstm or conv
'profiling'= false # T/F whether to add the callback for profiling
'callbacks'= ['epoch_times',]
'lstm_cells' = 128 # Required LSTM layer parameter
'epochs' = 2
wembed_basename = 'embeddings/glove_model_18-31_15-08'
[finetuning]
shrink_data_factor=0.01
checkpoint = 'bert-base-uncased'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 1
batch_size = 8
initial_lr = 5e-05 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0
[finetuning2]
# uses AdamW optimizer
shrink_data_factor=0.01
#checkpoint = 'mistralai/Mistral-7B-v0.1'
checkpoint = 'gpt2'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 10
batch_size = 2
initial_lr = 5e-06 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0
weight_decay=0.01
max_length=120
n_workers = 2 # numbers of workers for dataloaders
[peft-p-tuning-lora]
shrink_data_factor=0.01
#checkpoint = 'mistralai/Mistral-7B-v0.1'
#checkpoint = 'gpt2'
checkpoint = 'bigscience/bloom-1b1'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 2
batch_size = 4 # per_device training and eval batch size
initial_lr = 5e-06 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0
weight_decay=0.01
max_length=120
peft_conf='lora' #Options are p-tuning or lora
[finetuning-ner]
checkpoint = 'bert-base-cased'
label_list = ['O', 'B-DFNDUM', 'I-DFNDUM']
batch_size = 8
batch_size_prep=8 # batch size for prepare_tf_dataset
init_lr = 2e-5
end_lr = 0.0
weight_decay_rate=0.01
num_warmup_steps=0
num_labels=3
epochs=3
shrink_data_factor=0.2