config_version_control.toml

# Where to find latexml in the local system
# Always do config.toml for your local settings
[executables]
LATEXML_BIN="/usr/bin/site_perl/latexml"
# Maximum time allowed for LaTeXML to run on a single file in seconds
MAXT=4800

[classif-lstm]
'batch_size'= 5000
'glob_data_source'= '/training_defs/math*/*.xml.gz'
'TVT_split' = 0.8    ## Train  Validation Test split
'max_seq_len'= 400   # Length of padding and input of Embedding layer
'promath_dir'= 'promath' # name of dir with the processed arXiv tar files
#'save_path': 'glossary/test_lstm', #Path to save the positive results
'min_words'= 15 # min number of words for paragraphs to be considered

'model_type'= 'lstm'  # options are lstm or conv
'profiling'= false     # T/F whether to add the callback for profiling
'callbacks'= ['epoch_times',]

'lstm_cells' = 128 # Required LSTM layer parameter
'epochs' = 2

wembed_basename = 'embeddings/glove_model_18-31_15-08'

[finetuning]
shrink_data_factor=0.01
checkpoint = 'bert-base-uncased'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 1
batch_size = 8
initial_lr = 5e-05 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0

[finetuning2]
# uses AdamW optimizer 
shrink_data_factor=0.01
#checkpoint = 'mistralai/Mistral-7B-v0.1'
checkpoint = 'gpt2'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 10
batch_size = 2
initial_lr = 5e-06 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0
weight_decay=0.01
max_length=120
n_workers = 2 # numbers of workers for dataloaders

[peft-p-tuning-lora]
shrink_data_factor=0.01
#checkpoint = 'mistralai/Mistral-7B-v0.1'
#checkpoint = 'gpt2'
checkpoint = 'bigscience/bloom-1b1'
glob_data_source = 'training_defs/math18/*.xml.gz'
'data_stream_batch_size'= 5000 ## raw examples to read from hard drive
num_epochs = 2
batch_size = 4 # per_device training and eval batch size
initial_lr = 5e-06 ## learning rates for PolynomialDecay scheduler
end_lr = 0.0
weight_decay=0.01
max_length=120
peft_conf='lora'   #Options are p-tuning or lora

[finetuning-ner]
checkpoint = 'bert-base-cased'
label_list = ['O', 'B-DFNDUM', 'I-DFNDUM']
batch_size = 8
batch_size_prep=8 # batch size for prepare_tf_dataset
init_lr = 2e-5
end_lr = 0.0
weight_decay_rate=0.01
num_warmup_steps=0
num_labels=3
epochs=3
shrink_data_factor=0.2