Skip to content

Commit

Permalink
Fixes issue #298 for LdaMulticore
Browse files Browse the repository at this point in the history
  • Loading branch information
cscorley committed May 24, 2015
1 parent e620910 commit b4d4d3b
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions gensim/models/ldamulticore.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,6 @@ def update(self, corpus):
converge for any `decay` in (0.5, 1.0>.
"""
# rho is the "speed" of updating, decelerating over time
rho = lambda: pow(self.offset + self.num_updates / self.chunksize, -self.decay)

try:
lencorpus = len(corpus)
except:
Expand Down Expand Up @@ -195,6 +192,12 @@ def update(self, corpus):
job_queue = Queue(maxsize=2 * self.workers)
result_queue = Queue()

# rho is the "speed" of updating; TODO try other fncs
# pass_ + num_updates handles increasing the starting t for each pass,
# while allowing it to "reset" on the first pass of each update
def rho():
return pow(self.offset + pass_ + (self.num_updates / self.chunksize), -self.decay)

logger.info("training LDA model using %i processes", self.workers)
pool = Pool(self.workers, worker_e_step, (job_queue, result_queue,))
for pass_ in xrange(self.passes):
Expand All @@ -213,7 +216,7 @@ def process_result_queue(force=False):
queue_size[0] -= 1
merged_new = True
if (force and merged_new and queue_size[0] == 0) or (not self.batch and (other.numdocs >= updateafter)):
self.do_mstep(rho(), other)
self.do_mstep(rho(), other, pass_ > 0)
other.reset()
if self.eval_every is not None and ((force and queue_size[0] == 0) or (self.eval_every != 0 and (self.num_updates / updateafter) % self.eval_every == 0)):
self.log_perplexity(chunk, total_docs=lencorpus)
Expand Down

0 comments on commit b4d4d3b

Please sign in to comment.