Skip to content

Commit

Permalink
Merge pull request #65 from bluemoon/bradford-recurse-poc
Browse files Browse the repository at this point in the history
Recursion as an option PoC
  • Loading branch information
Adam DePue authored Apr 3, 2017
2 parents e0135de + 33cfd52 commit 48298a2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
25 changes: 24 additions & 1 deletion normalize/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ def __init__(self, ignore_ws=True, ignore_case=False,
unicode_normal=True, unchanged=False,
ignore_empty_slots=False, ignore_empty_items=False,
duck_type=False, extraneous=False,
compare_filter=None, fuzzy_match=True, moved=False):
compare_filter=None, fuzzy_match=True, moved=False,
recurse=False):
"""Create a new ``DiffOptions`` instance.
args:
Expand Down Expand Up @@ -183,6 +184,11 @@ def __init__(self, ignore_ws=True, ignore_case=False,
Restrict comparison to the fields described by the passed
:py:class:`MultiFieldSelector` (or list of FieldSelector
lists/objects)
``recurse=``\ *BOOL* During diff operations, do a deeper
comparison via recursion. This may be potentially very
expensive computationally if your records are large or
very nested.
"""
self.ignore_ws = ignore_ws
self.ignore_case = ignore_case
Expand All @@ -194,6 +200,7 @@ def __init__(self, ignore_ws=True, ignore_case=False,
self.moved = moved
self.duck_type = duck_type
self.extraneous = extraneous
self.recurse = recurse
if isinstance(compare_filter, (MultiFieldSelector, types.NoneType)):
self.compare_filter = compare_filter
else:
Expand Down Expand Up @@ -660,6 +667,22 @@ def compare_collection_iter(propval_a, propval_b, fs_a=None, fs_b=None,
rev_key[(pk, seen[pk])] = k
seen[pk] += 1

if options.recurse:
# we can be sure that both records have these keys
set_a = set(rev_keys["a"].values())
set_b = set(rev_keys["b"].values())
shared_keys = set_a.intersection(set_b)
for key in shared_keys:
if (isinstance(propval_a, collections.Iterable) and
isinstance(propval_b, collections.Iterable)):

diffs = _diff_iter(propval_a[key], propval_b[key],
fs_a + [key], fs_b + [key], options)
for diff in diffs:
yield diff
# early exit
return

removed = values['a'] - values['b']
added = values['b'] - values['a']
common = values['a'].intersection(values['b'])
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
install_requires=('richenum>=1.0.0',),
tests_require=('nose', 'unittest2'),
test_suite="run_tests",
version='1.0.1',
version='1.0.2',
url="http://hearsaycorp.github.io/normalize",
classifiers=[
'Development Status :: 5 - Production/Stable',
Expand Down

0 comments on commit 48298a2

Please sign in to comment.