Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Elasticsearch to fetch applications for exports #34266

Merged
merged 9 commits into from
Oct 13, 2024
14 changes: 9 additions & 5 deletions corehq/apps/app_manager/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@

from corehq.apps.app_manager.analytics import get_exports_by_application
from corehq.apps.app_manager.dbaccessors import get_app, get_apps_in_domain
from corehq.apps.export.const import ALL_CASE_TYPE_EXPORT
from corehq.apps.hqwebapp import crispy as hqcrispy
from corehq.apps.registry.models import DataRegistry
from corehq.apps.registry.utils import get_data_registry_dropdown_options
from corehq.apps.hqwebapp import crispy as hqcrispy
from corehq.apps.reports.analytics.esaccessors import get_case_types_for_domain
from corehq.apps.userreports.app_manager.data_source_meta import (
DATA_SOURCE_TYPE_CASE,
Expand All @@ -26,7 +27,6 @@
)
from corehq.apps.userreports.dbaccessors import get_datasources_for_domain
from corehq.toggles import AGGREGATE_UCRS
from corehq.apps.export.const import ALL_CASE_TYPE_EXPORT

DataSource = collections.namedtuple('DataSource', ['application', 'source_type', 'source', 'registry_slug'])
RMIDataChoice = collections.namedtuple('RMIDataChoice', ['id', 'text', 'data'])
Expand Down Expand Up @@ -285,8 +285,10 @@ class ApplicationDataRMIHelper(object):
APP_TYPE_NONE = 'no_app'
APP_TYPE_UNKNOWN = 'unknown'

def __init__(self, domain, user, as_dict=True):
def __init__(self, domain, project, user, as_dict=True):
self.domain = domain
self.domain_object = project
Comment on lines +288 to +290
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it work to only pass in the domain object?

Suggested change
def __init__(self, domain, project, user, as_dict=True):
self.domain = domain
self.domain_object = project
def __init__(self, project, user, as_dict=True):
self.domain = project.name
self.domain_object = project

self.domain could also be changed to a property that returns self.domain_object.name


self.user = user
self.as_dict = as_dict
self.form_labels = AppFormRMIPlaceholder(
Expand Down Expand Up @@ -420,14 +422,16 @@ def _sort_key_form(form):
def _all_forms(self):
forms = []
unknown_forms = []
for f in get_exports_by_form(self.domain):

for f in get_exports_by_form(self.domain, use_es=self.domain_object.exports_use_elasticsearch):
form = f['value']
if form.get('app_deleted') and not form.get('submissions'):
continue
if 'app' in form:
form['has_app'] = True
forms.append(form)
else:
elif not self.domain_object.exports_use_elasticsearch:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Farid, can you explain what does it mean when app is not in form? I would prefer not to exclude those forms, because when I run in a shell, your _get_export_forms_by_app_es
is already quick enough to solve the current issue. 500 error seems like another issue. Loading those either unknown or delete apps used to work without 500 error, at least until 2 months ago. If there is some changes that resulted in those 500 errors, we should fix that, instead of just exclude those forms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Offlining a response to this)

# If the elasticsearch toggle is on, we don't care about forms without apps
app_id = f['key'][1] or ''
form['app'] = {
'id': app_id
Expand Down
26 changes: 26 additions & 0 deletions corehq/apps/domain/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
from corehq.apps.sms.phonenumbers_helper import parse_phone_number
from corehq.apps.users.models import CouchUser, WebUser
from corehq.toggles import (
EXPORTS_APPS_USE_ELASTICSEARCH,
HIPAA_COMPLIANCE_CHECKBOX,
MOBILE_UCR,
SECURE_SESSION_TIMEOUT,
Expand Down Expand Up @@ -481,6 +482,16 @@ class DomainGlobalSettingsForm(forms.Form):
)
)

exports_use_elasticsearch = BooleanField(
label=gettext_lazy("Use elasticsearch when fetching apps for exports"),
required=False,
help_text=gettext_lazy(
"""
(Internal) Fetches apps using elasticsearch instead of couch in exports
"""
)
)

def __init__(self, *args, **kwargs):
self.project = kwargs.pop('domain', None)
self.domain = self.project.name
Expand Down Expand Up @@ -516,6 +527,11 @@ def __init__(self, *args, **kwargs):
self._handle_release_mode_setting_value()
self._handle_orphan_case_alerts_setting_value()

if not EXPORTS_APPS_USE_ELASTICSEARCH.enabled(self.domain):
del self.fields['exports_use_elasticsearch']
else:
self._handle_exports_use_elasticsearch_setting_value()

self.helper = hqcrispy.HQFormHelper(self)
self.helper.layout = crispy.Layout(
crispy.Fieldset(
Expand Down Expand Up @@ -556,6 +572,8 @@ def get_extra_fields(self):
])
if MOBILE_UCR.enabled(self.domain):
extra_fields.append('mobile_ucr_sync_interval')
if EXPORTS_APPS_USE_ELASTICSEARCH.enabled(self.domain):
extra_fields.append('exports_use_elasticsearch')
return extra_fields

def _handle_account_confirmation_by_sms_settings(self):
Expand Down Expand Up @@ -592,6 +610,9 @@ def _handle_release_mode_setting_value(self):
def _handle_orphan_case_alerts_setting_value(self):
self.fields['orphan_case_alerts_warning'].initial = self.project.orphan_case_alerts_warning

def _handle_exports_use_elasticsearch_setting_value(self):
self.fields['exports_use_elasticsearch'].initial = self.project.exports_use_elasticsearch

def _add_range_validation_to_integer_input(self, settings_name, min_value, max_value):
setting = self.fields.get(settings_name)
min_validator = MinValueValidator(min_value)
Expand Down Expand Up @@ -748,6 +769,9 @@ def _save_release_mode_setting(self, domain):
def _save_orphan_case_alerts_setting(self, domain):
domain.orphan_case_alerts_warning = self.cleaned_data.get("orphan_case_alerts_warning", False)

def _save_exports_use_elasticsearch(self, domain):
domain.exports_use_elasticsearch = self.cleaned_data.get("exports_use_elasticsearch", True)

def save(self, request, domain):
domain.hr_name = self.cleaned_data['hr_name']
domain.project_description = self.cleaned_data['project_description']
Expand All @@ -768,6 +792,8 @@ def save(self, request, domain):
self._save_account_confirmation_settings(domain)
self._save_release_mode_setting(domain)
self._save_orphan_case_alerts_setting(domain)
if EXPORTS_APPS_USE_ELASTICSEARCH.enabled(self.domain):
self._save_exports_use_elasticsearch(domain)
domain.save()
return True

Expand Down
1 change: 1 addition & 0 deletions corehq/apps/domain/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ class Domain(QuickCachedDocumentMixin, BlobMixin, Document, SnapshotMixin):

ga_opt_out = BooleanProperty(default=False)
orphan_case_alerts_warning = BooleanProperty(default=False)
exports_use_elasticsearch = BooleanProperty(default=False)

# For domains that have been migrated to a different environment
redirect_url = StringProperty()
Expand Down
2 changes: 1 addition & 1 deletion corehq/apps/export/views/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ def get_app_data_drilldown_values(request, domain):
permissions = ExportsPermissionsManager(model_type, domain, request.couch_user)
permissions.access_list_exports_or_404(is_deid=False, is_odata=is_odata)

rmi_helper = ApplicationDataRMIHelper(domain, request.couch_user)
rmi_helper = ApplicationDataRMIHelper(domain, request.project, request.couch_user)
if model_type == 'form':
response = rmi_helper.get_form_rmi_response()
elif model_type == 'case':
Expand Down
41 changes: 32 additions & 9 deletions corehq/ex-submodules/couchforms/analytics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime

from corehq.apps.es import FormES
from corehq.apps.es import AppES, FormES
from corehq.apps.es.aggregations import TermsAggregation
from corehq.const import MISSING_APP_ID
from corehq.util.quickcache import quickcache
Expand Down Expand Up @@ -132,15 +132,18 @@ def get_form_analytics_metadata(domain, app_id, xmlns):
return None


def get_exports_by_form(domain):
def get_exports_by_form(domain, use_es=False):
from corehq.apps.app_manager.models import Application
rows = Application.get_db().view(
'exports_forms_by_app/view',
startkey=[domain],
endkey=[domain, {}],
group=True,
stale=stale_ok()
).all()
if use_es:
rows = _get_export_forms_by_app_es(domain)
else:
rows = Application.get_db().view(
'exports_forms_by_app/view',
startkey=[domain],
endkey=[domain, {}],
group=True,
stale=stale_ok()
).all()
form_count_breakdown = get_form_count_breakdown_for_domain(domain)

for row in rows:
Expand All @@ -155,6 +158,26 @@ def get_exports_by_form(domain):
return rows


def _get_export_forms_by_app_es(domain):
rows = []
apps = AppES().domain(domain).is_build(False).run().hits
for app in apps:
for module_id, module in enumerate(app.get("modules", [])):
for form_id, form in enumerate(module.get("forms", [])):
rows.append({
"key": [app['domain'], app['_id'], form["xmlns"]],
"value": {
"xmlns": form["xmlns"],
"app": {"name": app["name"], "langs": app["langs"], "id": app["_id"]},
"module": {"name": module["name"], "id": module_id},
"form": {"name": form["name"], "id": form_id},
"app_deleted": app["doc_type"] in ["Application-Deleted", "LinkedApplication-Deleted"],
}
})

return rows


def get_form_count_breakdown_for_domain(domain):
query = (FormES(for_export=True)
.domain(domain)
Expand Down
56 changes: 54 additions & 2 deletions corehq/ex-submodules/couchforms/tests/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid

from django.test import TestCase
from corehq.apps.app_manager.tests.util import delete_all_apps

from couchforms.analytics import (
domain_has_submission_in_last_30_days,
Expand All @@ -13,6 +14,7 @@
get_number_of_forms_in_domain,
)

from corehq.apps.es.apps import app_adapter
from corehq.apps.es.client import manager
from corehq.apps.es.forms import form_adapter
from corehq.apps.es.tests.utils import es_test
Expand All @@ -34,7 +36,7 @@
from testapps.test_pillowtop.utils import process_pillow_changes


@es_test(requires=[form_adapter], setup_class=True)
@es_test(requires=[form_adapter, app_adapter], setup_class=True)
@disable_quickcache
class ExportsFormsAnalyticsTest(TestCase, DocTestMixin):
maxDiff = None
Expand All @@ -46,25 +48,36 @@ def setUpClass(cls):

from corehq.apps.app_manager.models import Application, Form, Module
delete_all_xforms()
delete_all_apps()

cls.domain = 'exports_forms_analytics_domain'
cls.app_id_1 = 'a' + uuid.uuid4().hex
cls.app_id_2 = 'b' + uuid.uuid4().hex
cls.app_id_3 = 'c' + uuid.uuid4().hex
cls.xmlns_1 = 'my://crazy.xmlns/'
cls.xmlns_2 = 'my://crazy.xmlns/app'
cls.xmlns_3 = 'my://crazy.xmlns/deleted-app'
cls.apps = [
Application(_id=cls.app_id_2, domain=cls.domain,
modules=[Module(forms=[Form(xmlns=cls.xmlns_2)])])
modules=[Module(forms=[Form(xmlns=cls.xmlns_2)])]),
Application(_id=cls.app_id_3, domain=cls.domain,
modules=[Module(forms=[Form(xmlns=cls.xmlns_3)])])
]
for app in cls.apps:
app.save()
cls.apps[1].delete_app()
cls.apps[1].save()
cls.forms = [
create_form_for_test(domain=cls.domain, app_id=cls.app_id_1, xmlns=cls.xmlns_1, save=False),
create_form_for_test(domain=cls.domain, app_id=cls.app_id_1, xmlns=cls.xmlns_1, save=False),
create_form_for_test(domain=cls.domain, app_id=cls.app_id_2, xmlns=cls.xmlns_2, save=False),
create_form_for_test(domain=cls.domain, app_id=cls.app_id_3, xmlns=cls.xmlns_3, save=False),
]
cls.error_forms = [create_form_for_test(domain=cls.domain, state=XFormInstance.ERROR, save=False)]
cls.all_forms = cls.forms + cls.error_forms

for app in cls.apps:
app_adapter.index(app, refresh=True)
for form in cls.all_forms:
form_adapter.index(form, refresh=True)

Expand Down Expand Up @@ -97,6 +110,8 @@ def test_get_form_analytics_metadata__app(self):

@flaky_slow
def test_get_exports_by_form(self):
# Call this twice, since the couchdb `update_after` to force couchdb to return the right results
get_exports_by_form(self.domain)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does calling it twice actually force it, or does it just take a bit more time, making the second call a bit more likely to return the correct results? In other words, is this possibly a flaky test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it actually forces it. This is also a flaky test, but it was before I added this change (and is annotated as such)

self.assertEqual(get_exports_by_form(self.domain), [{
'value': {'xmlns': 'my://crazy.xmlns/', 'submissions': 2},
'key': ['exports_forms_analytics_domain', self.app_id_1,
Expand All @@ -110,6 +125,43 @@ def test_get_exports_by_form(self):
'app_deleted': False, 'submissions': 1},
'key': ['exports_forms_analytics_domain', self.app_id_2,
'my://crazy.xmlns/app']
}, {
'value': {
'xmlns': 'my://crazy.xmlns/deleted-app',
'form': {'name': {}, 'id': 0},
'app': {'langs': [], 'name': None, 'id': self.app_id_3},
'module': {'name': {}, 'id': 0},
'app_deleted': True, 'submissions': 1,
},
'key': ['exports_forms_analytics_domain', self.app_id_3,
'my://crazy.xmlns/deleted-app']
}])

def test_get_exports_by_form_es(self):
self.assertEqual(get_exports_by_form(self.domain, use_es=True), [{
'value': {'xmlns': 'my://crazy.xmlns/', 'submissions': 2},
'key': ['exports_forms_analytics_domain', self.app_id_1,
'my://crazy.xmlns/']
}, {
'value': {
'xmlns': 'my://crazy.xmlns/app',
'form': {'name': {}, 'id': 0},
'app': {'langs': [], 'name': None, 'id': self.app_id_2},
'module': {'name': {}, 'id': 0},
'app_deleted': False, 'submissions': 1},
'key': ['exports_forms_analytics_domain', self.app_id_2,
'my://crazy.xmlns/app']
}, {
'value': {
'xmlns': 'my://crazy.xmlns/deleted-app',
'submissions': 1,
'form': {'name': {}, 'id': 0},
'app': {'langs': [], 'name': None, 'id': self.app_id_3},
'module': {'name': {}, 'id': 0},
'app_deleted': True
},
'key': ['exports_forms_analytics_domain', self.app_id_3,
'my://crazy.xmlns/deleted-app']
}])


Expand Down
9 changes: 9 additions & 0 deletions corehq/toggles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1607,6 +1607,15 @@ def _commtrackify(domain_name, toggle_is_enabled):
[NAMESPACE_DOMAIN],
)


EXPORTS_APPS_USE_ELASTICSEARCH = StaticToggle(
'export_apps_use_elasticsearch',
'Use elasticsearch when fetching apps for exports',
TAG_INTERNAL,
[NAMESPACE_DOMAIN],
)


DISABLE_COLUMN_LIMIT_IN_UCR = StaticToggle(
'disable_column_limit_in_ucr',
'Enikshay: Disable column limit in UCR',
Expand Down
Loading