-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathco_occurence.py
56 lines (38 loc) · 1.37 KB
/
co_occurence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from step1_text import get_textual_features
from lda_image import lda_image
import os
import cPickle
import numpy as np
def get_vistex():
vistex = []
data_path = os.path.join('dataset','ImageCLEFmed2009_train.02')
model = lda_image(data_path)
textual_data,textual_vocab = get_textual_features()
print len(textual_data)
ipath_cache_file = os.path.join('cache', 'paths.pkl')
if os.path.isfile(ipath_cache_file):
print('Loading image paths from : ' + ipath_cache_file)
with open(ipath_cache_file, 'rb') as f:
ipath = cPickle.load(f)
print 'Done!'
for i,doc_topics in enumerate(model.doc_topics):
try:
textual_words = textual_data[ipath[i][:-4]]
except:
# print 'image not found in csv!'
continue
feature = np.zeros((textual_vocab,model.num_topics))
feature[textual_words,:] = doc_topics
vistex.append(feature)
return np.stack(vistex)
if __name__ == '__main__':
# np.set_printoptions(threshold='nan')
# ipath_cache_file = os.path.join('cache', 'paths.pkl')
# if os.path.isfile(ipath_cache_file):
# print('Loading image paths from : ' + ipath_cache_file)
# with open(ipath_cache_file, 'rb') as f:
# ipath = cPickle.load(f)
# print 'Done!'
x = get_vistex()
# print x
print x.shape