forked from sgrvinod/a-PyTorch-Tutorial-to-Image-Captioning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModelParameters.txt
175 lines (152 loc) · 8.41 KB
/
ModelParameters.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
Parameters to watch: checkpoint_folder_name, emb_dim, attention_dim, decoder_dim, batch_size,
fine_tune_encoder, training how many images are we taking
Decoder Model No Pretrained Embedding:
# Data parameters
data_folder = '../outdoor_output_full' # folder with data files saved by create_input_files.py
data_name = 'coco_5_cap_per_img_5_min_word_freq' # base name shared by data files
checkpoint_folder_name = 'full_data_no_pretrained_embedding'
# Model parameters
emb_dim = 512 # dimension of word embeddings
attention_dim = 512 # dimension of attention linear layers
decoder_dim = 512 # dimension of decoder RNN
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
# Training parameters
start_epoch = 0
epochs = 20 # number of epochs to train for (if early stopping is not triggered)
epochs_since_improvement = 0 # keeps track of number of epochs since there's been an improvement in validation BLEU
batch_size = 80
workers = 1 # for data-loading; right now, only 1 works with h5py
encoder_lr = 1e-4 # learning rate for encoder if fine-tuning
decoder_lr = 4e-4 # learning rate for decoder
grad_clip = 5. # clip gradients at an absolute value of
alpha_c = 1. # regularization parameter for 'doubly stochastic attention', as in the paper
best_bleu4 = 0. # BLEU-4 score right now
print_freq = 10 # print training/validation stats every __ batches
fine_tune_encoder = False # fine-tune encoder?
checkpoint = None # path to checkpoint, None if none
pretrained_embeddings = False
Decoder Model with Pretrained Glove Embeddings:
# Data parameters
data_folder = '../outdoor_output_full' # folder with data files saved by create_input_files.py
data_name = 'coco_5_cap_per_img_5_min_word_freq' # base name shared by data files
checkpoint_folder_name = 'full_data_pretained_embedding_11_7'
# Model parameters
emb_dim = 100 # dimension of word embeddings
attention_dim = 512 # dimension of attention linear layers
decoder_dim = 512 # dimension of decoder RNN
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
# Training parameters
start_epoch = 0
epochs = 10 # number of epochs to train for (if early stopping is not triggered)
epochs_since_improvement = 0 # keeps track of number of epochs since there's been an improvement in validation BLEU
batch_size = 80
workers = 1 # for data-loading; right now, only 1 works with h5py
encoder_lr = 1e-4 # learning rate for encoder if fine-tuning
decoder_lr = 4e-4 # learning rate for decoder
grad_clip = 5. # clip gradients at an absolute value of
alpha_c = 1. # regularization parameter for 'doubly stochastic attention', as in the paper
best_bleu4 = 0. # BLEU-4 score right now
print_freq = 10 # print training/validation stats every __ batches
fine_tune_encoder = False # fine-tune encoder?
checkpoint = None # path to checkpoint, None if none
pretrained_embeddings = True
Encoder Model with 300-dim Pretrained Glove Embeddings, blurred training data:
# Data parameters
data_folder = '../data_outdoor_full_coco_blurred/min1e-10tomax6' # folder with data files saved by create_input_files.py
data_name = 'coco_5_cap_per_img_5_min_word_freq' # base name shared by data files
checkpoint_folder_name = 'checkpoints/full_pretrained_300_blur_finetune'
# Model parameters
emb_dim = 300 # dimension of word embeddings
attention_dim = 512 # dimension of attention linear layers
decoder_dim = 512 # dimension of decoder RNN
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
# Training parameters
start_epoch = 0
epochs = 22 # number of epochs to train for (if early stopping is not triggered)
epochs_since_improvement = 0 # keeps track of number of epochs since there's been an improvement in validation BLEU
batch_size = 32
workers = 1 # for data-loading; right now, only 1 works with h5py
encoder_lr = 1e-4 # learning rate for encoder if fine-tuning
decoder_lr = 0.8 * 4e-4 # learning rate for decoder
grad_clip = 5. # clip gradients at an absolute value of
alpha_c = 1. # regularization parameter for 'doubly stochastic attention', as in the paper
best_bleu4 = 0. # BLEU-4 score right now
print_freq = 10 # print training/validation stats every __ batches
fine_tune_encoder = True # fine-tune encoder?
checkpoint = 'checkpoints/full_pretrained_300_blur_1e-10to6/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar' # path to checkpoint, None if none
pretrained_embeddings = True
Decoder Model from Milestone, with full dataset and adaptive learning:
# Data parameters
data_folder = '../data_outdoor_full_coco' # folder with data files saved by create_input_files.py
data_name = 'coco_5_cap_per_img_5_min_word_freq' # base name shared by data files
checkpoint_folder_name = 'checkpoints/full_milestone_decoder_beam'
# Model parameters
emb_dim = 512 # dimension of word embeddings
attention_dim = 512 # dimension of attention linear layers
decoder_dim = 512 # dimension of decoder RNN
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
# Training parameters
start_epoch = 0
epochs = 14 # number of epochs to train for (if early stopping is not triggered)
epochs_since_improvement = 0 # keeps track of number of epochs since there's been an improvement in validation BLEU
batch_size = 80
workers = 1 # for data-loading; right now, only 1 works with h5py
encoder_lr = 1e-4 # learning rate for encoder if fine-tuning
decoder_lr = 4e-4 # learning rate for decoder
grad_clip = 5. # clip gradients at an absolute value of
alpha_c = 1. # regularization parameter for 'doubly stochastic attention', as in the paper
best_bleu4 = 0. # BLEU-4 score right now
print_freq = 10 # print training/validation stats every __ batches
fine_tune_encoder = False # fine-tune encoder?
checkpoint = None # path to checkpoint, None if none
pretrained_embeddings = False
adaptive_learning = True
## Schedules sampling###
scheduled_sampling_start = 50
scheduled_sampling_increase_every = 4
scheduled_sampling_max_prob = 0.25
scheduled_sampling_increase_prob = 0.08
Eval.py, Encoder finetuning no glove embedding:
nlgeval = NLGEval(no_skipthoughts=True, no_glove=True) # loads the models
print('loaded nlgeval..')
# Parameters
data_folder = '../outdoor_output' # folder with data files saved by create_input_files.py
data_name = 'coco_5_cap_per_img_5_min_word_freq' # base name shared by data files
# checkpoint = 'checkpoints/BEST_checkpoint_coco_5_cap_per_img_5_min_word_freq.pth.tar' # model checkpoint
# checkpoint = 'BEST_checkpoint_encoder_finetune_coco_5_cap_per_img_5_min_word_freq.pth.tar'
checkpoint='BEST_checkpoint_encoder_finetune_coco_5_cap_per_img_5_min_word_freq.pth.tar'
word_map_file = '../outdoor_output/WORDMAP_coco_5_cap_per_img_5_min_word_freq.json' # word map, ensure it's the same the data was encoded with and the model was trained with
# word_map_file = '/data/code/WORDMAP_pretrained_coco.json'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
# Load model
checkpoint = torch.load(checkpoint)
decoder = checkpoint['decoder']
decoder = decoder.to(device)
decoder.eval()
encoder = checkpoint['encoder']
encoder = encoder.to(device)
encoder.eval()
# Load word map (word2ix)
with open(word_map_file, 'r') as j:
word_map = json.load(j)
rev_word_map = {v: k for k, v in word_map.items()}
vocab_size = len(word_map)
# Normalization transform
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
torch.manual_seed(17) #we fix the test set blurring this way
#Currently testing on (0.000001, 3) (3, 6) and (0.000001, 6)
min_sigma = 3 #0.000001
max_sigma = 6 #3
blur = transforms.GaussianBlur(5, sigma=(min_sigma, max_sigma))
dataset = 'TEST'
toblur = True