-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest.py
142 lines (119 loc) · 4.7 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# coding: utf-8
import argparse
import time
import math
import torch
import torch.nn as nn
from torch.autograd import Variable
import os
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import ipdb
import data
from model import BBBRNNModel
parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model')
parser.add_argument('--data', type=str, default='./data/ptb/',
help='location of the data corpus')
parser.add_argument('--logdir', default='./logs/')
parser.add_argument('--cuda', action='store_true')
parser.add_argument('--bptt', default=35)
args = parser.parse_args()
###############################################################################
# Load data
###############################################################################
corpus = data.Corpus(args.data)
# Starting from sequential data, batchify arranges the dataset into columns.
# For instance, with the alphabet as the sequence and batch size 4, we'd get
# ┌ a g m s ┐
# │ b h n t │
# │ c i o u │
# │ d j p v │
# │ e k q w │
# └ f l r x ┘.
# These columns are treated as independent by the model, which means that the
# dependence of e. g. 'g' on 'f' can not be learned, but allows more efficient
# batch processing.
def batchify(data, bsz):
# Work out how cleanly we can divide the dataset into bsz parts.
nbatch = data.size(0) // bsz
# Trim off any extra elements that wouldn't cleanly fit (remainders).
data = data.narrow(0, 0, nbatch * bsz)
# Evenly divide the data across the bsz batches.
data = data.view(bsz, -1).t().contiguous()
if args.cuda:
data = data.cuda()
return data
eval_batch_size = 50
train_data = batchify(corpus.train, eval_batch_size)
val_data = batchify(corpus.valid, eval_batch_size)
test_data = batchify(corpus.test, eval_batch_size)
train_rev_data = batchify(corpus.train_rev, eval_batch_size)
val_rev_data = batchify(corpus.valid_rev, eval_batch_size)
test_rev_data = batchify(corpus.test_rev, eval_batch_size)
###############################################################################
# Build the model
###############################################################################
ntokens = len(corpus.dictionary)
###############################################################################
# Training code
###############################################################################
def repackage_hidden(h):
"""Wraps hidden states in new Variables, to detach them from their history."""
if type(h) == Variable:
return Variable(h.data)
else:
return tuple(repackage_hidden(v) for v in h)
# get_batch subdivides the source data into chunks of length args.bptt.
# If source is equal to the example output of the batchify function, with
# a bptt-limit of 2, we'd get the following two Variables for i = 0:
# ┌ a g m s ┐ ┌ b h n t ┐
# └ b h n t ┘ └ c i o u ┘
# Note that despite the name of the function, the subdivison of data is not
# done along the batch dimension (i.e. dimension 1), since that was handled
# by the batchify function. The chunks are along dimension 0, corresponding
# to the seq_len dimension in the LSTM.
def get_batch(source, i, evaluation=False):
seq_len = min(args.bptt, len(source) - 1 - i)
data = Variable(source[i:i+seq_len], volatile=evaluation)
target = Variable(source[i+1:i+1+seq_len].view(-1))
return data, target
def evaluate(data_source):
# Turn on evaluation mode which disables dropout.
model.eval()
total_nll = 0
hidden = model.init_hidden(eval_batch_size)
for i in range(0, data_source.size(0)- 1, args.bptt):
data, targets = get_batch(data_source, i, evaluation=True)
output, hidden = model(data, hidden, targets)
NLL = model.get_nll(output, targets).data
total_nll += len(data) * NLL
hidden = repackage_hidden(hidden)
return total_nll[0] / len(data_source)
# Load the best saved model.
model_path = os.path.join(args.logdir, "./model.pt")
with open(model_path, 'rb') as f:
model = torch.load(f)
if args.cuda:
model.cuda()
# Run on test data.
train_loss = evaluate(train_data)
train_rev_loss = evaluate(train_rev_data)
print('=' * 89)
print('train ppl {:8.2f}, train_rev ppl {:8.2f}'.format
(math.exp(train_loss), math.exp(train_rev_loss))
)
print('=' * 89)
val_loss = evaluate(val_data)
val_rev_loss = evaluate(val_rev_data)
print('=' * 89)
print('val ppl {:8.2f}, val_rev ppl {:8.2f}'.format
(math.exp(val_loss), math.exp(val_rev_loss))
)
print('=' * 89)
test_loss = evaluate(test_data)
test_rev_loss = evaluate(test_rev_data)
print('=' * 89)
print('test ppl {:8.2f}, test_rev ppl {:8.2f}'.format
(math.exp(test_loss), math.exp(test_rev_loss))
)
print('=' * 89)