config.yaml

meta_variable:
  checkpoint_dir: 'checkpoints/'
  training_log_dir: 'logs/'
  data_path: 'timit/' # Preprocessed TIMIT data generated by timt_preprocess.sh

model_parameter:
  max_timestep: 784                           # max_timestep%8 == 0 is required due to listener time resolution reduction
  max_label_len: 77                           #
  input_feature_dim: 26                       # Default feature dimension in the original paper where CTC was proposed
  decoder_hidden_dim: 256                    # Default decoder LSTM output dimension from LAS paper
  use_mlp_in_attention: True                  # Set to False to exclude phi and psi in attention formula
  mlp_dim_in_attention: 128                   #
  mlp_activate_in_attention: 'relu'           #
  encoder_rnn_layer: 1                        # Default RNN layer number
  encoder_hidden_dim: 512                     # Default encoder LSTM output dimension from LAS paper
  output_class_dim: 63                        # 61 phonemes + 2 for <sos> & <eos>
  rnn_unit: 'LSTM'                            # Default recurrent unit in the original paper


training_parameter:
  learning_rate: 0.0001
  seed: 1
  num_epochs: 200
  batch_size: 4
  tf_rate_upperbound: 0.8                     # teacher forcing rate during training will be linearly
  tf_rate_lowerbound: 0.0                     # decayinn from upperbound to lower bound for each epoch
  verbose_step: 200                           # Show progress every verbose_step