Skip to content

Commit

Permalink
Merge pull request #27327 from dimagi/dm/couch2sql-next
Browse files Browse the repository at this point in the history
Couch to SQL forms & cases: add system for encoding diffs in patch forms
  • Loading branch information
millerdev authored Apr 30, 2020
2 parents 0e8c41d + 2539dbe commit d8332de
Show file tree
Hide file tree
Showing 5 changed files with 414 additions and 41 deletions.
68 changes: 57 additions & 11 deletions corehq/apps/couch_sql_migration/casediff.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import logging
from collections import defaultdict
from contextlib import contextmanager
from functools import partial
from xml.sax.saxutils import unescape

import attr

Expand Down Expand Up @@ -32,6 +34,7 @@
from corehq.util.metrics import metrics_counter

from .diff import filter_case_diffs, filter_ledger_diffs
from .diffrule import ANY
from .rebuildcase import rebuild_and_diff_cases
from .statedb import Change
from .util import retry_on_sql_error
Expand Down Expand Up @@ -115,7 +118,7 @@ def diff(couch_json, sql_json):
return couch_case, diffs, changes
diffs = diff(couch_case, sql_json)
if diffs:
if is_case_patched(diffs):
if is_case_patched(case_id, diffs):
return couch_case, [], []
form_diffs = diff_case_forms(couch_case, sql_json)
if form_diffs:
Expand Down Expand Up @@ -312,15 +315,58 @@ def iter_stock_transactions(self, form_id):
yield report.report_type, tx


def is_case_patched(diffs):
from .casepatch import PatchForm
if not (len(diffs) == 1
and diffs[0].diff_type == "set_mismatch"
and list(diffs[0].path) == ["xform_ids", "[*]"]
and diffs[0].new_value):
def is_case_patched(case_id, diffs):
"""Check if case has been patched
The case has been patched if at least one patch form has been
applied to the SQL case and if all of the given diffs are
unpatchable and match an unpatchable diff encoded in one of the
patch forms.
Additionally, diffs having a MISSING `old_value` are patched with an
empty string, which is semantically equivalent to removing the case
property in CommCare. However, a difference is detectable at the
storage level even after the patch has been applied, and therefore
these subsequent patch diffs are considered to be patched.
The "xform_ids" diff is a special exception because it is not
patchable and is not required to be present in the patch form.
:returns: True if the case has been patched else False.
"""
def is_patched(form_ids):
forms = get_sql_forms(form_ids, ordered=True)
for form in reversed(forms):
if form.xmlns == PatchForm.xmlns:
discard_expected_diffs(form.form_data.get("diff"))
if not unpatched:
return True
return False
form_ids = diffs[0].new_value.split(",")
return any(f.xmlns == PatchForm.xmlns for f in get_sql_forms(form_ids))

def discard_expected_diffs(patch_data):
data = json.loads(unescape(patch_data)) if patch_data else {}
if data.get("case_id") != case_id:
return
for diff in data.get("diffs", []):
diff.pop("reason", None)
path = tuple(diff["path"])
if path in unpatched and diff_to_json(unpatched[path], ANY) == diff:
unpatched.pop(path)

def expected_patch_diff(diff):
return not is_patchable(diff) or (
diff.old_value is MISSING and diff.new_value == "")

from .casepatch import PatchForm, is_patchable, diff_to_json
unpatched = {tuple(d.path): d for d in diffs if expected_patch_diff(d)}
xform_ids = unpatched.pop(("xform_ids", "[*]"), None)
return (
xform_ids is not None
and xform_ids.diff_type == "set_mismatch"
and xform_ids.new_value
and len(diffs) == len(unpatched) + 1 # false if any diffs are patchable
and is_patched(xform_ids.new_value.split(","))
)


def diff_case_forms(couch_json, sql_json):
Expand Down Expand Up @@ -497,5 +543,5 @@ def get_couch_form(form_id):


@retry_on_sql_error
def get_sql_forms(form_id):
return FormAccessorSQL.get_forms(form_id)
def get_sql_forms(form_id, **kw):
return FormAccessorSQL.get_forms(form_id, **kw)
110 changes: 87 additions & 23 deletions corehq/apps/couch_sql_migration/casepatch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import logging
from datetime import datetime
from functools import partial, wraps
from uuid import uuid4
from xml.sax.saxutils import escape

from django.template.loader import render_to_string

Expand Down Expand Up @@ -98,7 +100,7 @@ def __attrs_post_init__(self):
updates.extend([const.CASE_ACTION_CREATE, const.CASE_ACTION_UPDATE])
self._dynamic_properties = self.case.dynamic_case_properties()
else:
if cannot_patch(self.diffs):
if has_illegal_props(self.diffs):
raise CannotPatch(self.diffs)
props = dict(iter_dynamic_properties(self.diffs))
self._dynamic_properties = props
Expand Down Expand Up @@ -133,8 +135,42 @@ def indices(self):
yield CommCareCaseIndex.wrap(diff.old_value)


ILLEGAL_PROPS = {"actions", "*"}
IGNORE_PROPS = {"opened_by", "external_id"}
def has_illegal_props(diffs):
return any(d.path[0] in ILLEGAL_PROPS for d in diffs)


def has_known_props(diffs):
return any(d.path[0] in KNOWN_PROPERTIES for d in diffs)


def iter_dynamic_properties(diffs):
for diff in diffs:
name = diff.path[0]
if name in STATIC_PROPS:
continue
if diff.old_value is MISSING:
value = ""
elif len(diff.path) > 1 or not isinstance(diff.old_value, str):
raise CannotPatch([diff])
else:
value = diff.old_value
yield name, value


ILLEGAL_PROPS = {"actions", "case_id", "domain", "*"}
UNPATCHABLE_PROPS = {
"closed_by",
"closed_on",
"deleted_on",
"deletion_id",
"external_id",
"modified_by",
"modified_on",
"opened_by",
"opened_on",
"server_modified_on",
"xform_ids",
}
STATIC_PROPS = {
"case_id",
"closed",
Expand Down Expand Up @@ -167,25 +203,6 @@ def indices(self):
}


def cannot_patch(diffs):
return any(d.path[0] in ILLEGAL_PROPS for d in diffs) \
or all(d.path[0] in IGNORE_PROPS for d in diffs)


def has_known_props(diffs):
return any(d.path[0] in KNOWN_PROPERTIES for d in diffs)


def iter_dynamic_properties(diffs):
for diff in diffs:
name = diff.path[0]
if name in STATIC_PROPS:
continue
if len(diff.path) > 1 or not isinstance(diff.old_value, str):
raise CannotPatch([diff])
yield name, diff.old_value


@attr.s
class PatchForm:
_case = attr.ib()
Expand All @@ -204,9 +221,10 @@ def __getattr__(self, name):
def get_xml(self):
updates = self._case._updates
case_block = get_case_xml(self._case, updates, version='2.0')
diff_block = get_diff_block(self._case)
return render_to_string('hqcase/xml/case_block.xml', {
'xmlns': self.xmlns,
'case_block': case_block.decode('utf-8'),
'case_block': case_block.decode('utf-8') + diff_block,
'time': json_format_datetime(self.received_on),
'uid': self.form_id,
'username': "",
Expand Down Expand Up @@ -245,6 +263,52 @@ def add_patch_operation(sql_form):
))


def get_diff_block(case):
"""Get XML element containing case diff data
Some early patch forms were submitted without this element.
:param case: `PatchCase` instance.
:returns: A "<diff>" XML element string containing XML-escaped
JSON-encoded case diff data, some of which may be patched.
```json
{
"case_id": case.case_id,
"diffs": [
{
"path": diff.path,
"old": diff.old_value, # omitted if old_value is MISSING
"new": diff.new_value, # omitted if new_value is MISSING
"patch": true if patched else false
"reason": "...", # omitted if reason for change is unknown
},
...
]
}
```
"""
diffs = [diff_to_json(d) for d in sorted(case.diffs, key=lambda d: d.path)]
data = {"case_id": case.case_id, "diffs": diffs}
return f"<diff>{escape(json.dumps(data))}</diff>"


def diff_to_json(diff, new_value=None):
assert diff.old_value is not MISSING or diff.new_value is not MISSING, diff
obj = {"path": list(diff.path), "patch": is_patchable(diff)}
if diff.old_value is not MISSING:
obj["old"] = diff.old_value
if diff.new_value is not MISSING:
obj["new"] = diff.new_value if new_value is None else new_value
if getattr(diff, "reason", ""):
obj["reason"] = diff.reason
return obj


def is_patchable(diff):
return diff.path[0] not in UNPATCHABLE_PROPS


class CannotPatch(Exception):

def __init__(self, json_diffs):
Expand Down
Loading

0 comments on commit d8332de

Please sign in to comment.