-
-
Notifications
You must be signed in to change notification settings - Fork 218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SQL Repeat Record Report & Case Data Report #29029
Changes from 1 commit
b506f4f
42cb272
a4629c1
be898b7
087e7e3
56e3a17
f9a9302
2142e55
cca491a
94a1b0a
923453d
89ba6db
365c159
e6b7c9a
57be9cf
9c6c06f
e686887
550e93d
db0a110
027dc4f
171cbe7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,19 +30,36 @@ def get_cancelled_repeat_record_count(domain, repeater_id): | |
|
||
|
||
def get_repeat_record_count(domain, repeater_id=None, state=None): | ||
from .models import are_repeat_records_migrated | ||
|
||
if are_repeat_records_migrated(domain): | ||
return get_sql_repeat_record_count(domain, repeater_id, state) | ||
return get_couch_repeat_record_count(domain, repeater_id, state) | ||
|
||
|
||
def get_couch_repeat_record_count(domain, repeater_id=None, state=None): | ||
from .models import RepeatRecord | ||
kwargs = dict( | ||
include_docs=False, | ||
reduce=True, | ||
descending=True, | ||
) | ||
kwargs.update(_get_startkey_endkey_all_records(domain, repeater_id, state)) | ||
|
||
result = RepeatRecord.get_db().view('repeaters/repeat_records', **kwargs).one() | ||
|
||
return result['value'] if result else 0 | ||
|
||
|
||
def get_sql_repeat_record_count(domain, repeater_id=None, state=None): | ||
from .models import SQLRepeatRecord | ||
|
||
queryset = SQLRepeatRecord.objects.filter(domain=domain) | ||
if repeater_id: | ||
queryset = queryset.filter(repeater_stub__repeater_id=repeater_id) | ||
if state: | ||
queryset = queryset.filter(state=state) | ||
return queryset.count() | ||
|
||
|
||
def get_overdue_repeat_record_count(overdue_threshold=datetime.timedelta(minutes=10)): | ||
from .models import RepeatRecord | ||
overdue_datetime = datetime.datetime.utcnow() - overdue_threshold | ||
|
@@ -75,6 +92,14 @@ def _get_startkey_endkey_all_records(domain, repeater_id=None, state=None): | |
|
||
|
||
def get_paged_repeat_records(domain, skip, limit, repeater_id=None, state=None): | ||
from .models import are_repeat_records_migrated | ||
|
||
if are_repeat_records_migrated(domain): | ||
return get_paged_sql_repeat_records(domain, skip, limit, repeater_id, state) | ||
return get_paged_couch_repeat_records(domain, skip, limit, repeater_id, state) | ||
|
||
|
||
def get_paged_couch_repeat_records(domain, skip, limit, repeater_id=None, state=None): | ||
from .models import RepeatRecord | ||
kwargs = { | ||
'include_docs': True, | ||
|
@@ -90,6 +115,19 @@ def get_paged_repeat_records(domain, skip, limit, repeater_id=None, state=None): | |
return [RepeatRecord.wrap(result['doc']) for result in results] | ||
|
||
|
||
def get_paged_sql_repeat_records(domain, skip, limit, repeater_id=None, state=None): | ||
from .models import SQLRepeatRecord | ||
|
||
queryset = SQLRepeatRecord.objects.filter(domain=domain) | ||
if repeater_id: | ||
queryset = queryset.filter(repeater_stub__repeater_id=repeater_id) | ||
if state: | ||
queryset = queryset.filter(state=state) | ||
return (queryset.order_by('-registered_at')[skip:skip + limit] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Offset is inefficient for large offsets and can result in records being skipped in some cases. Would it work to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I share your concern. Thanks for the links. I also looked at Ethan's recent PR along the same lines. But all of them seem to me to be appropriate for iterating rows or models, but not for paginating a report. I think we have two options here:
I don't really love either of those options. Are my concerns valid? Are there other options I'm not considering? @dannyroberts @gherceg do you have opinions on whether the Repeat Records Report should be backed by Elasticsearch or Postgres? And if Postgres, how we should paginate it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder, doesn't couch (which currently backs the report) have the same pagination issue? And doesn't elasticsearch? Maybe I'm missing some special magic that Couch and/or Elasticsearch do, but my guess is that they also just internally do the query for At some point it also becomes more than is feasible for a person to actually click through. So having more granular filters and an upper limit (10k? 100k?) to the number of items we're willing to show in the results through pagination seems like the best longer term option to me |
||
.select_related('repeater_stub') | ||
.prefetch_related('sqlrepeatrecordattempt_set')) | ||
|
||
|
||
def iter_repeat_records_by_domain(domain, repeater_id=None, state=None, chunk_size=1000): | ||
from .models import RepeatRecord | ||
kwargs = { | ||
|
@@ -124,6 +162,13 @@ def iter_repeat_records_by_repeater(domain, repeater_id, chunk_size=1000): | |
|
||
|
||
def get_repeat_records_by_payload_id(domain, payload_id): | ||
repeat_records = get_sql_repeat_records_by_payload_id(domain, payload_id) | ||
if repeat_records: | ||
return repeat_records | ||
return get_couch_repeat_records_by_payload_id(domain, payload_id) | ||
|
||
|
||
def get_couch_repeat_records_by_payload_id(domain, payload_id): | ||
from .models import RepeatRecord | ||
results = RepeatRecord.get_db().view( | ||
'repeaters/repeat_records_by_payload_id', | ||
|
@@ -136,6 +181,15 @@ def get_repeat_records_by_payload_id(domain, payload_id): | |
return [RepeatRecord.wrap(result['doc']) for result in results] | ||
|
||
|
||
def get_sql_repeat_records_by_payload_id(domain, payload_id): | ||
from corehq.motech.repeaters.models import SQLRepeatRecord | ||
|
||
return (SQLRepeatRecord.objects | ||
.filter(domain=domain, payload_id=payload_id) | ||
.order_by('-registered_at') | ||
.all()) | ||
|
||
|
||
def get_repeaters_by_domain(domain): | ||
from .models import Repeater | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it important for this count to be exact? If not, would it be good enough to estimate it using the query planner (much faster)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
db0a110