forked from facebookarchive/MemNN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparseBabiTask.m
104 lines (92 loc) · 2.89 KB
/
parseBabiTask.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
% Copyright (c) 2015-present, Facebook, Inc.
% All rights reserved.
%
% This source code is licensed under the BSD-style license found in the
% LICENSE file in the root directory of this source tree. An additional grant
% of patent rights can be found in the PATENTS file in the same directory.
function [story, questions, qstory] = parseBabiTask(data_path, dict, include_question)
story = zeros(20, 1000, 1000, 'single');
story_ind = 0;
sentence_ind = 0;
max_words = 0;
max_sentences = 0;
questions = zeros(10,1000, 'single');
question_ind = 0;
qstory = zeros(20,1000, 'single');
fi = 1;
fd = fopen(data_path{fi});
line_ind = 0;
while true
line = fgets(fd);
if ischar(line) == false
fclose(fd);
if fi < length(data_path)
fi = fi + 1;
fd = fopen(data_path{fi});
line_ind = 0;
line = fgets(fd);
else
break
end
end
line_ind = line_ind + 1;
words = textscan(line, '%s');
words = words{1};
if strcmp(words{1}, '1')
story_ind = story_ind + 1;
sentence_ind = 0;
map = [];
end
if sum(line == '?') == 0
is_question = false;
sentence_ind = sentence_ind + 1;
else
is_question = true;
question_ind = question_ind + 1;
questions(1,question_ind) = story_ind;
questions(2,question_ind) = sentence_ind;
if include_question
sentence_ind = sentence_ind + 1;
end
end
map(end+1) = sentence_ind;
for k = 2:length(words);
w = words{k};
w = lower(w);
if w(end) == '.' || w(end) == '?'
w = w(1:end-1);
end
if isKey(dict, w) == false
dict(w) = length(dict) + 1;
end
max_words = max(max_words, k-1);
if is_question == false
story(k-1, sentence_ind, story_ind) = dict(w);
else
qstory(k-1, question_ind) = dict(w);
if include_question == true
story(k-1, sentence_ind, story_ind) = dict(w);
end
if words{k}(end) == '?'
answer = words{k+1};
answer = lower(answer);
if isKey(dict, answer) == false
dict(answer) = length(dict) + 1;
end
questions(3,question_ind) = dict(answer);
for h = k+2:length(words)
questions(2+h-k,question_ind) = map(str2num(words{h}));
end
questions(10,question_ind) = line_ind;
break
end
end
end
max_sentences = max(max_sentences, sentence_ind);
end
story = story(1:max_words, 1:max_sentences, 1:story_ind);
questions = questions(:,1:question_ind);
qstory = qstory(1:max_words,1:question_ind);
story(story == 0) = dict('nil');
qstory(qstory == 0) = dict('nil');
end