Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL Repeat Record Report & Case Data Report #29029

Merged
merged 21 commits into from
Apr 14, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 56 additions & 2 deletions corehq/motech/repeaters/dbaccessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,36 @@ def get_cancelled_repeat_record_count(domain, repeater_id):


def get_repeat_record_count(domain, repeater_id=None, state=None):
from .models import are_repeat_records_migrated

if are_repeat_records_migrated(domain):
return get_sql_repeat_record_count(domain, repeater_id, state)
return get_couch_repeat_record_count(domain, repeater_id, state)


def get_couch_repeat_record_count(domain, repeater_id=None, state=None):
from .models import RepeatRecord
kwargs = dict(
include_docs=False,
reduce=True,
descending=True,
)
kwargs.update(_get_startkey_endkey_all_records(domain, repeater_id, state))

result = RepeatRecord.get_db().view('repeaters/repeat_records', **kwargs).one()

return result['value'] if result else 0


def get_sql_repeat_record_count(domain, repeater_id=None, state=None):
from .models import SQLRepeatRecord

queryset = SQLRepeatRecord.objects.filter(domain=domain)
if repeater_id:
queryset = queryset.filter(repeater_stub__repeater_id=repeater_id)
if state:
queryset = queryset.filter(state=state)
return queryset.count()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it important for this count to be exact? If not, would it be good enough to estimate it using the query planner (much faster)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.



def get_overdue_repeat_record_count(overdue_threshold=datetime.timedelta(minutes=10)):
from .models import RepeatRecord
overdue_datetime = datetime.datetime.utcnow() - overdue_threshold
Expand Down Expand Up @@ -75,6 +92,14 @@ def _get_startkey_endkey_all_records(domain, repeater_id=None, state=None):


def get_paged_repeat_records(domain, skip, limit, repeater_id=None, state=None):
from .models import are_repeat_records_migrated

if are_repeat_records_migrated(domain):
return get_paged_sql_repeat_records(domain, skip, limit, repeater_id, state)
return get_paged_couch_repeat_records(domain, skip, limit, repeater_id, state)


def get_paged_couch_repeat_records(domain, skip, limit, repeater_id=None, state=None):
from .models import RepeatRecord
kwargs = {
'include_docs': True,
Expand All @@ -90,6 +115,19 @@ def get_paged_repeat_records(domain, skip, limit, repeater_id=None, state=None):
return [RepeatRecord.wrap(result['doc']) for result in results]


def get_paged_sql_repeat_records(domain, skip, limit, repeater_id=None, state=None):
from .models import SQLRepeatRecord

queryset = SQLRepeatRecord.objects.filter(domain=domain)
if repeater_id:
queryset = queryset.filter(repeater_stub__repeater_id=repeater_id)
if state:
queryset = queryset.filter(state=state)
return (queryset.order_by('-registered_at')[skip:skip + limit]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Offset is inefficient for large offsets and can result in records being skipped in some cases. Would it work to use paginate_query or possibly the technique used in iter_large?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I share your concern.

Thanks for the links. I also looked at Ethan's recent PR along the same lines. But all of them seem to me to be appropriate for iterating rows or models, but not for paginating a report.

I think we have two options here:

  • Use Elasticsearch instead of Postgres for the Repeat Record Report.
  • Hope that users don't care about what happens more than a couple of pages into the report.

I don't really love either of those options. Are my concerns valid? Are there other options I'm not considering?

@dannyroberts @gherceg do you have opinions on whether the Repeat Records Report should be backed by Elasticsearch or Postgres? And if Postgres, how we should paginate it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder, doesn't couch (which currently backs the report) have the same pagination issue? And doesn't elasticsearch? Maybe I'm missing some special magic that Couch and/or Elasticsearch do, but my guess is that they also just internally do the query for skip + limit items and return the last limit of them.

At some point it also becomes more than is feasible for a person to actually click through. So having more granular filters and an upper limit (10k? 100k?) to the number of items we're willing to show in the results through pagination seems like the best longer term option to me

.select_related('repeater_stub')
.prefetch_related('sqlrepeatrecordattempt_set'))


def iter_repeat_records_by_domain(domain, repeater_id=None, state=None, chunk_size=1000):
from .models import RepeatRecord
kwargs = {
Expand Down Expand Up @@ -124,6 +162,13 @@ def iter_repeat_records_by_repeater(domain, repeater_id, chunk_size=1000):


def get_repeat_records_by_payload_id(domain, payload_id):
repeat_records = get_sql_repeat_records_by_payload_id(domain, payload_id)
if repeat_records:
return repeat_records
return get_couch_repeat_records_by_payload_id(domain, payload_id)


def get_couch_repeat_records_by_payload_id(domain, payload_id):
from .models import RepeatRecord
results = RepeatRecord.get_db().view(
'repeaters/repeat_records_by_payload_id',
Expand All @@ -136,6 +181,15 @@ def get_repeat_records_by_payload_id(domain, payload_id):
return [RepeatRecord.wrap(result['doc']) for result in results]


def get_sql_repeat_records_by_payload_id(domain, payload_id):
from corehq.motech.repeaters.models import SQLRepeatRecord

return (SQLRepeatRecord.objects
.filter(domain=domain, payload_id=payload_id)
.order_by('-registered_at')
.all())


def get_repeaters_by_domain(domain):
from .models import Repeater

Expand Down
56 changes: 55 additions & 1 deletion corehq/motech/repeaters/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,17 @@ class RepeatRecordAttempt(DocumentSchema):
def message(self):
return self.success_response if self.succeeded else self.failure_reason

@property
def state(self):
state = RECORD_PENDING_STATE
if self.succeeded:
state = RECORD_SUCCESS_STATE
elif self.cancelled:
state = RECORD_CANCELLED_STATE
elif self.failure_reason:
state = RECORD_FAILURE_STATE
return state


class RepeatRecord(Document):
"""
Expand Down Expand Up @@ -1120,7 +1131,31 @@ def attempts(self):

@property
def num_attempts(self):
return self.sqlrepeatrecordattempt_set.count()
# Uses `len(queryset)` instead of `queryset.count()` to use
# prefetched attempts, if available.
return len(self.attempts)

def get_numbered_attempts(self):
for i, attempt in enumerate(self.attempts):
yield i + 1, attempt
kaapstorm marked this conversation as resolved.
Show resolved Hide resolved

@property
def record_id(self):
# Used by Repeater.get_url() ... by SQLRepeatRecordReport._make_row()
return self.pk

@property
def failure_reason(self):
if has_failed(self):
return self.last_message
else:
return ''

@property
def last_message(self):
# Uses `list(queryset)[-1]` instead of `queryset.last()` to use
# prefetched attempts, if available.
return list(self.attempts)[-1].message
kaapstorm marked this conversation as resolved.
Show resolved Hide resolved


class SQLRepeatRecordAttempt(models.Model):
Expand Down Expand Up @@ -1239,6 +1274,14 @@ def later_might_be_better(resp):
RECORD_CANCELLED_STATE) # Don't retry


def is_queued(record):
return record.state in (RECORD_PENDING_STATE, RECORD_FAILURE_STATE)


def has_failed(record):
return record.state in (RECORD_FAILURE_STATE, RECORD_CANCELLED_STATE)


def format_response(response) -> Optional[str]:
if not is_response(response):
return None
Expand All @@ -1256,6 +1299,17 @@ def is_response(duck):
return hasattr(duck, 'status_code') and hasattr(duck, 'reason')


@quickcache(['domain'], timeout=5 * 60)
def are_repeat_records_migrated(domain) -> bool:
"""
Returns True if ``domain`` has SQLRepeatRecords.

.. note:: Succeeded and cancelled RepeatRecords may not have been
migrated to SQLRepeatRecords.
"""
return SQLRepeatRecord.objects.filter(domain=domain).exists()


def domain_can_forward(domain):
return domain and (
domain_has_privilege(domain, ZAPIER_INTEGRATION)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
<ul class="list-unstyled">
{% for attempt_number, attempt in record.get_numbered_attempts %}
<li><strong>Attempt #{{ attempt_number }}</strong>
{% if attempt.succeeded %}
{% if attempt.state == 'SUCCESS' %}
<br/><i class="fa fa-check"></i> {% trans "Success" %}
<a href="#" class="toggle-next-attempt">…</a>
<div class="well record-attempt" style="display: none; font-family: monospace;">
{{ attempt.success_response }}
</div>
{% elif attempt.failure_reason %}
{% elif attempt.state == 'FAIL' or attempt.state == 'CANCELLED' %}
<br/><i class="fa fa-exclamation-triangle"></i> {% trans "Failed" %}
{% elif attempt.state == 'PENDING' %}
<br/><i class="fa fa-spinner"></i> {% trans "Unsent" %}
{% endif %}
{% if attempt.message %}
<a href="#" class="toggle-next-attempt">…</a>
<div class="well record-attempt" style="display: none; font-family: monospace;">
{{ attempt.failure_reason }}
{{ attempt.message }}
</div>
{% endif %}
{% if attempt.cancelled %}
{% if attempt.state == 'CANCELLED' %}
<br/><i class="fa fa-remove"></i> {% trans "Cancelled" %}
{% endif %}
</li>
Expand Down
1 change: 1 addition & 0 deletions corehq/motech/repeaters/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .repeat_records import (
DomainForwardingRepeatRecords,
RepeatRecordView,
SQLRepeatRecordReport,
cancel_repeat_record,
requeue_repeat_record,
)
Expand Down
114 changes: 81 additions & 33 deletions corehq/motech/repeaters/views/repeat_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,30 @@
from corehq.apps.reports.generic import GenericTabularReport
from corehq.apps.users.decorators import require_can_edit_web_users
from corehq.form_processor.exceptions import XFormNotFound
from corehq.motech.repeaters.const import (
from corehq.motech.utils import pformat_json
from corehq.util.xml_utils import indent_xml

from ..const import (
RECORD_CANCELLED_STATE,
RECORD_FAILURE_STATE,
RECORD_PENDING_STATE,
RECORD_SUCCESS_STATE,
)
from corehq.motech.repeaters.dbaccessors import (
from ..dbaccessors import (
get_cancelled_repeat_record_count,
get_paged_repeat_records,
get_pending_repeat_record_count,
get_repeat_record_count,
get_repeat_records_by_payload_id,
)
from corehq.motech.repeaters.models import RepeatRecord
from corehq.motech.utils import pformat_json
from corehq.util.xml_utils import indent_xml
from ..models import RepeatRecord, is_queued


class DomainForwardingRepeatRecords(GenericTabularReport):
class BaseRepeatRecordReport(GenericTabularReport):
name = 'Repeat Records'
base_template = 'repeaters/repeat_record_report.html'
section_name = 'Project Settings'
slug = 'repeat_record_report'

dispatcher = DomainReportDispatcher
ajax_pagination = True
asynchronous = False
Expand Down Expand Up @@ -199,31 +200,7 @@ def _payload_id_and_search_link(self, payload_id):
)

def _make_row(self, record):
checkbox = mark_safe(
"""<input type="checkbox" class="xform-checkbox"
data-id="{}" name="xform_ids"/>""".format(record.get_id)
)
row = [
checkbox,
self._make_state_label(record),
record.repeater.get_url(record) if record.repeater else _('Unable to generate url for record'),
self._format_date(record.last_checked) if record.last_checked else '---',
self._format_date(record.next_check) if record.next_check else '---',
render_to_string('repeaters/partials/attempt_history.html', {'record': record}),
self._make_view_payload_button(record.get_id),
self._make_resend_payload_button(record.get_id),
]

if record.cancelled and not record.succeeded:
row.append(self._make_requeue_payload_button(record.get_id))
elif not record.cancelled and not record.succeeded:
row.append(self._make_cancel_payload_button(record.get_id))
else:
row.append(None)

if toggles.SUPPORT.enabled_for_request(self.request):
row.insert(2, self._payload_id_and_search_link(record.payload_id))
return row
raise NotImplementedError

@property
def headers(self):
Expand Down Expand Up @@ -253,7 +230,7 @@ def headers(self):

@property
def report_context(self):
context = super(DomainForwardingRepeatRecords, self).report_context
context = super().report_context

total = get_repeat_record_count(self.domain, self.repeater_id)
total_pending = get_pending_repeat_record_count(self.domain, self.repeater_id)
Expand All @@ -276,6 +253,77 @@ def report_context(self):
return context


class DomainForwardingRepeatRecords(BaseRepeatRecordReport):
slug = 'couch_repeat_record_report'

def _make_row(self, record):
checkbox = mark_safe(
"""<input type="checkbox" class="xform-checkbox"
data-id="{}" name="xform_ids"/>""".format(record.get_id)
)
row = [
checkbox,
self._make_state_label(record),
record.repeater.get_url(record) if record.repeater else _('Unable to generate url for record'),
self._format_date(record.last_checked) if record.last_checked else '---',
self._format_date(record.next_check) if record.next_check else '---',
render_to_string('repeaters/partials/attempt_history.html', {'record': record}),
self._make_view_payload_button(record.get_id),
self._make_resend_payload_button(record.get_id),
]

if record.cancelled and not record.succeeded:
row.append(self._make_requeue_payload_button(record.get_id))
elif not record.cancelled and not record.succeeded:
row.append(self._make_cancel_payload_button(record.get_id))
else:
row.append(None)

if toggles.SUPPORT.enabled_for_request(self.request):
row.insert(2, self._payload_id_and_search_link(record.payload_id))
return row


class SQLRepeatRecordReport(BaseRepeatRecordReport):
slug = 'repeat_record_report'

def _make_row(self, record):
checkbox = mark_safe('<input type="checkbox" class="xform-checkbox" '
f'data-id="{record.pk}" name="xform_ids"/>')
if record.attempts:
# Use prefetched `record.attempts` instead of requesting a
# different queryset
created_at = self._format_date(list(record.attempts)[-1].created_at)
else:
created_at = '---'
if record.repeater_stub.next_attempt_at:
next_attempt_at = self._format_date(record.repeater_stub.next_attempt_at)
else:
next_attempt_at = '---'
row = [
checkbox,
self._make_state_label(record),
record.repeater_stub.repeater.get_url(record),
created_at,
next_attempt_at,
render_to_string('repeaters/partials/attempt_history.html',
{'record': record}),
self._make_view_payload_button(record.pk),
self._make_resend_payload_button(record.pk),
]

if record.state == RECORD_CANCELLED_STATE:
row.append(self._make_requeue_payload_button(record.pk))
elif is_queued(record):
row.append(self._make_cancel_payload_button(record.pk))
else:
row.append(None)

if toggles.SUPPORT.enabled_for_request(self.request):
row.insert(2, self._payload_id_and_search_link(record.payload_id))
return row


@method_decorator(domain_admin_required, name='dispatch')
@method_decorator(requires_privilege_with_fallback(privileges.DATA_FORWARDING), name='dispatch')
class RepeatRecordView(View):
Expand Down
Loading