From 8235c218d79c2c5bcb813122097c522fb1b166bc Mon Sep 17 00:00:00 2001 From: Bradford Toney Date: Wed, 22 Mar 2017 13:39:37 -0700 Subject: [PATCH 1/4] Recursion as an option PoC --- normalize/diff.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/normalize/diff.py b/normalize/diff.py index d684fd1..b807f5e 100644 --- a/normalize/diff.py +++ b/normalize/diff.py @@ -128,7 +128,8 @@ def __init__(self, ignore_ws=True, ignore_case=False, unicode_normal=True, unchanged=False, ignore_empty_slots=False, ignore_empty_items=False, duck_type=False, extraneous=False, - compare_filter=None, fuzzy_match=True, moved=False): + compare_filter=None, fuzzy_match=True, moved=False, + recurse=False): """Create a new ``DiffOptions`` instance. args: @@ -183,6 +184,8 @@ def __init__(self, ignore_ws=True, ignore_case=False, Restrict comparison to the fields described by the passed :py:class:`MultiFieldSelector` (or list of FieldSelector lists/objects) + + ``recurse=``\ *BOOL* Do a deeper comparison via recursion """ self.ignore_ws = ignore_ws self.ignore_case = ignore_case @@ -194,6 +197,7 @@ def __init__(self, ignore_ws=True, ignore_case=False, self.moved = moved self.duck_type = duck_type self.extraneous = extraneous + self.recurse = recurse if isinstance(compare_filter, (MultiFieldSelector, types.NoneType)): self.compare_filter = compare_filter else: @@ -660,6 +664,20 @@ def compare_collection_iter(propval_a, propval_b, fs_a=None, fs_b=None, rev_key[(pk, seen[pk])] = k seen[pk] += 1 + if options.recurse: + # we can be sure that both records have these keys + shared_keys = set(rev_key.values()) + for key in shared_keys: + if (isinstance(propval_a, collections.Iterable) and + isinstance(propval_b, collections.Iterable)): + + diffs = _diff_iter(propval_a[key], propval_b[key], + fs_a + [key], fs_b + [key], options) + for diff in diffs: + yield diff + # early exit + return + removed = values['a'] - values['b'] added = values['b'] - values['a'] common = values['a'].intersection(values['b']) From 2eceaad0e56d34ee267c8c3ef165275fabe16a0c Mon Sep 17 00:00:00 2001 From: Bradford Toney Date: Sun, 2 Apr 2017 16:42:40 -0700 Subject: [PATCH 2/4] Change the diffing set --- normalize/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/normalize/diff.py b/normalize/diff.py index b807f5e..ec76add 100644 --- a/normalize/diff.py +++ b/normalize/diff.py @@ -666,7 +666,7 @@ def compare_collection_iter(propval_a, propval_b, fs_a=None, fs_b=None, if options.recurse: # we can be sure that both records have these keys - shared_keys = set(rev_key.values()) + shared_keys = set(rev_keys["a"].values()).intersection(rev_keys["b"].values()) for key in shared_keys: if (isinstance(propval_a, collections.Iterable) and isinstance(propval_b, collections.Iterable)): From 02b7dc0c93dc60dd83c786f3a0277ab907586d54 Mon Sep 17 00:00:00 2001 From: Bradford Toney Date: Sun, 2 Apr 2017 21:47:15 -0700 Subject: [PATCH 3/4] Fix diffing long line/Bump version --- normalize/diff.py | 4 +++- setup.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/normalize/diff.py b/normalize/diff.py index ec76add..215e6e0 100644 --- a/normalize/diff.py +++ b/normalize/diff.py @@ -666,7 +666,9 @@ def compare_collection_iter(propval_a, propval_b, fs_a=None, fs_b=None, if options.recurse: # we can be sure that both records have these keys - shared_keys = set(rev_keys["a"].values()).intersection(rev_keys["b"].values()) + set_a = set(rev_keys["a"].values()) + set_b = set(rev_keys["b"].values()) + shared_keys = set_a.intersection(set_b) for key in shared_keys: if (isinstance(propval_a, collections.Iterable) and isinstance(propval_b, collections.Iterable)): diff --git a/setup.py b/setup.py index 9507ed7..d16780a 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ install_requires=('richenum>=1.0.0',), tests_require=('nose', 'unittest2'), test_suite="run_tests", - version='1.0.1', + version='1.0.2', url="http://hearsaycorp.github.io/normalize", classifiers=[ 'Development Status :: 5 - Production/Stable', From 33cfd52635962b40534711468dc1e8cfb5584bd7 Mon Sep 17 00:00:00 2001 From: Bradford Toney Date: Sun, 2 Apr 2017 22:48:12 -0700 Subject: [PATCH 4/4] Write a proper message for `recurse` option --- normalize/diff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/normalize/diff.py b/normalize/diff.py index 215e6e0..7f45cfc 100644 --- a/normalize/diff.py +++ b/normalize/diff.py @@ -185,7 +185,10 @@ def __init__(self, ignore_ws=True, ignore_case=False, :py:class:`MultiFieldSelector` (or list of FieldSelector lists/objects) - ``recurse=``\ *BOOL* Do a deeper comparison via recursion + ``recurse=``\ *BOOL* During diff operations, do a deeper + comparison via recursion. This may be potentially very + expensive computationally if your records are large or + very nested. """ self.ignore_ws = ignore_ws self.ignore_case = ignore_case