forked from zergey/nupic_nlp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_association_experiment.py
executable file
·124 lines (102 loc) · 3.7 KB
/
run_association_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
import os
import sys
import time
from optparse import OptionParser
from nupic_nlp import SDR_Builder, Nupic_Word_Client, Association_Runner
from pycept.cept import RETINA_SIZES
if 'CEPT_APP_KEY' not in os.environ:
print 'Missing CEPT_APP_KEY environment variables.'
print 'You can retrieve these by registering for the CEPT API at '
print 'https://cept.3scale.net/'
quit(-1)
cept_app_key = os.environ['CEPT_APP_KEY']
DEFAULT_MAX_TERMS = '100'
DEFAULT_MIN_sparsity = 2.0 # percent
DEFAULT_PREDICTION_START = '50'
DEFAULT_RETINA = 'eng_gen'
cache_dir = './cache'
parser = OptionParser(usage="%prog input_file [options]")
parser.add_option('-t', '--max-terms',
default=DEFAULT_MAX_TERMS,
dest='max_terms',
help='Maximum terms to process. Specify "all" for to process all available \
terms.')
parser.add_option('-s', '--min-sparsity',
default=DEFAULT_MIN_sparsity,
dest='min_sparsity',
help='Minimum SDR sparsity threshold. Any words processed with sparsity lower \
than this value will be ignored.')
parser.add_option('-p', '--prediction-start',
default=DEFAULT_PREDICTION_START,
dest='prediction_start',
help='Start converting predicted values into words using the CEPT API after \
this many values have been seen.')
parser.add_option('-r', '--retina',
default=DEFAULT_RETINA,
dest='retina',
help='Which retina to use from cotrical.io')
parser.add_option('--triples',
action="store_true", default=False,
dest='predict_triples',
help='If specified, assumes word file contains word triples')
parser.add_option("-v", "--verbose",
action="store_true",
dest="verbose",
default=False,
help="Prints details about errors and API calls.")
def main(*args, **kwargs):
""" NuPIC NLP main entry point. """
(options, args) = parser.parse_args()
if options.max_terms.lower() == 'all':
max_terms = sys.maxint
else:
max_terms = int(options.max_terms)
min_sparsity = float(options.min_sparsity)
prediction_start = int(options.prediction_start)
verbosity = 0
if options.verbose:
verbosity = 1
retina = options.retina
# Create the cache directory if necessary.
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
builder = SDR_Builder(cept_app_key, cache_dir,
verbosity=verbosity,
retina=retina)
def size_to_thresholds(sdr_size):
""" scale minThreshold and activationThreshold according to sdr_size """
factor = float(sdr_size) / (128*128)
return 80*factor, 100*factor
sdr_size = RETINA_SIZES[retina]['width'] * RETINA_SIZES[retina]['height']
minThreshold, activationThreshold = size_to_thresholds(sdr_size)
if options.predict_triples:
# Instantiate TP with parameters for Fox demo
nupic = Nupic_Word_Client(numberOfCols=sdr_size,
minThreshold=minThreshold,
activationThreshold=activationThreshold,
pamLength=10)
else:
nupic = Nupic_Word_Client(numberOfCols=sdr_size)
if options.verbose:
nupic.printParameters()
runner = Association_Runner(builder, nupic,
max_terms, min_sparsity,
prediction_start, verbosity=verbosity)
if len(args) is 0:
print 'no input file provided!'
exit(1)
elif len(args) == 1:
if options.predict_triples:
if options.verbose: print "Predicting triples!"
runner.direct_association_triples(args[0])
else:
runner.direct_association(args[0])
else:
if options.predict_triples:
print "Please specify exactly one input file containing triples"
else:
runner.random_dual_association(args[0], args[1])
if __name__ == "__main__":
main()
time.sleep(30)