Skip to content

Commit

Permalink
Issue #298: Re-introduce rho relying on chunksize.
Browse files Browse the repository at this point in the history
  • Loading branch information
cscorley committed Apr 27, 2015
1 parent 1ec7acc commit 811c0ae
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,6 @@ def update(self, corpus, chunksize=None, decay=None, offset=None,
"""
# use parameters given in constructor, unless user explicitly overrode them
if chunksize is None:
chunksize = self.chunksize
if decay is None:
decay = self.decay
if offset is None:
Expand All @@ -517,6 +515,9 @@ def update(self, corpus, chunksize=None, decay=None, offset=None,
logger.warning("LdaModel.update() called with an empty corpus")
return

if chunksize is None:
chunksize = min(lencorpus, self.chunksize)

self.state.numdocs += lencorpus

if update_every:
Expand Down Expand Up @@ -544,7 +545,7 @@ def update(self, corpus, chunksize=None, decay=None, offset=None,
# pass_ + num_updates handles increasing the starting t for each pass,
# while allowing it to "reset" on the first pass of each update
def rho():
return pow(offset + ((pass_ + self.num_updates)), -decay)
return pow(offset + pass_ + (self.num_updates / chunksize), -decay)

for pass_ in xrange(passes):
if self.dispatcher:
Expand Down Expand Up @@ -628,7 +629,7 @@ def do_mstep(self, rho, other, extra_pass=False):

if not extra_pass:
# only update if this isn't an additional pass
self.num_updates += 1
self.num_updates += other.numdocs

def bound(self, corpus, gamma=None, subsample_ratio=1.0):
"""
Expand Down

0 comments on commit 811c0ae

Please sign in to comment.