Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rate limit devices per user on short window #35613

Merged
merged 16 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion corehq/apps/ota/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from iso8601 import iso8601
from looseversion import LooseVersion
from memoized import memoized
from tastypie.http import HttpTooManyRequests
from tastypie.http import HttpNotAcceptable, HttpTooManyRequests

from casexml.apps.case.cleanup import claim_case, get_first_claims
from casexml.apps.case.fixtures import CaseDBFixture
Expand Down Expand Up @@ -60,6 +60,7 @@
RegistryNotFound,
)
from corehq.apps.registry.helper import DataRegistryHelper
from corehq.apps.users.device_rate_limiter import device_rate_limiter, DEVICE_RATE_LIMIT_MESSAGE
from corehq.apps.users.models import CouchUser, UserReportingMetadataStaging
from corehq.const import ONE_DAY, OPENROSA_VERSION_MAP
from corehq.form_processor.exceptions import CaseNotFound
Expand Down Expand Up @@ -294,6 +295,9 @@ def get_restore_response(domain, couch_user, app_id=None, since=None, version='1
silently.
:return: Tuple of (http response, timing context or None)
"""
should_limit = device_rate_limiter.rate_limit_device(domain, user_id, device_id)
if should_limit:
return HttpNotAcceptable(DEVICE_RATE_LIMIT_MESSAGE)

if user_id and user_id != couch_user.user_id:
# sync with a user that has been deleted but a new
Expand Down Expand Up @@ -381,6 +385,12 @@ def heartbeat(request, domain, app_build_id):
mobile simply needs it to be resent back in the JSON, and doesn't
need any validation on it. This is pulled from @uniqueid from profile.xml
"""
should_limit = device_rate_limiter.rate_limit_device(
domain, request.couch_user._id, request.GET.get('device_id')
)
if should_limit:
return HttpNotAcceptable(DEVICE_RATE_LIMIT_MESSAGE)

app_id = request.GET.get('app_id', '')
build_profile_id = request.GET.get('build_profile_id', '')
master_app_id = app_id
Expand Down
1 change: 1 addition & 0 deletions corehq/apps/receiverwrapper/tests/test_audit_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def return_submission_run_resp(*args, **kwargs):

@patch('corehq.apps.receiverwrapper.views.couchforms.get_instance_and_attachment',
new=Mock(return_value=(Mock(), Mock())))
@patch('corehq.apps.receiverwrapper.views.convert_xform_to_json', new=Mock())
@patch('corehq.apps.receiverwrapper.views._record_metrics', new=Mock())
@patch('corehq.apps.receiverwrapper.views.SubmissionPost.run', new=return_submission_run_resp)
class TestAuditLoggingForFormSubmission(TestCase):
Expand Down
14 changes: 13 additions & 1 deletion corehq/apps/receiverwrapper/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from casexml.apps.case.xform import get_case_updates, is_device_report
from corehq.apps.hqwebapp.decorators import waf_allow
from corehq.apps.users.decorators import require_permission
from corehq.apps.users.device_rate_limiter import device_rate_limiter, DEVICE_RATE_LIMIT_MESSAGE
from corehq.apps.users.models import HqPermissions
from couchforms import openrosa_response
from couchforms.const import MAGIC_PROPERTY
Expand Down Expand Up @@ -58,7 +59,7 @@
from corehq.util.metrics import metrics_counter, metrics_histogram
from corehq.util.timer import TimingContext, set_request_duration_reporting_threshold
from couchdbkit import ResourceNotFound
from tastypie.http import HttpTooManyRequests
from tastypie.http import HttpNotAcceptable, HttpTooManyRequests

PROFILE_PROBABILITY = float(os.getenv('COMMCARE_PROFILE_SUBMISSION_PROBABILITY', 0))
PROFILE_LIMIT = os.getenv('COMMCARE_PROFILE_SUBMISSION_LIMIT')
Expand Down Expand Up @@ -137,6 +138,17 @@ def _process_form(request, domain, app_id, user_id, authenticated,
_record_metrics(metric_tags, 'blacklisted', response)
return response

try:
form_json = convert_xform_to_json(instance)
except couchforms.XMLSyntaxError:
# let normal response handle invalid xml
pass
else:
device_id = form_json.get('meta', {}).get('deviceID')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be retrieved directly from the instance with instance.metadata.deviceID? Possibly it should be made it forgiving of missing attributes with getattr()? I'm concerned about the overhead of adding convert_xform_to_json() here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gherceg pointed out in an offline discussion that instance is a byte string here, not a form object as it is in SubmissionPost later on.

It would be nice to pass form_json on from here to anything else that subsequently needs parsed form JSON to avoid having to re-parse in those places.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Attempted in d3e70de

should_limit = device_rate_limiter.rate_limit_device(domain, user_id, device_id)
if should_limit:
return HttpNotAcceptable(DEVICE_RATE_LIMIT_MESSAGE)

with TimingContext() as timer:
app_id, build_id = get_app_and_build_ids(domain, app_id)
submission_post = SubmissionPost(
Expand Down
93 changes: 93 additions & 0 deletions corehq/apps/users/device_rate_limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from datetime import datetime, timezone

from django.conf import settings
from django_redis import get_redis_connection

from corehq import toggles
from corehq.apps.cloudcare.const import DEVICE_ID as CLOUDCARE_DEVICE_ID
from corehq.util.metrics import metrics_counter, metrics_histogram

DEVICE_RATE_LIMIT_MESSAGE = "Current usage for this user is too high. Please try again in a minute."
DEVICE_SET_CACHE_TIMEOUT = 2 * 60 # 2 minutes
gherceg marked this conversation as resolved.
Show resolved Hide resolved


class DeviceRateLimiter:
"""
Operates on a time window of 1 minute
"""

def __init__(self):
# need to use raw redis connection to use srem and scard functions
self.client = get_redis_connection()

def device_limit_per_user(self, domain):
if toggles.INCREASE_DEVICE_LIMIT_PER_USER.enabled(domain):
return settings.INCREASED_DEVICE_LIMIT_PER_USER
return settings.DEVICE_LIMIT_PER_USER

def rate_limit_device(self, domain, user_id, device_id):
"""
Returns boolean representing if this user_id + device_id combo is rate limited or not
NOTE: calling this method will result in the device_id being added to the list of used device_ids
"""
if not device_id or self._is_formplayer(device_id):
# do not track formplayer activity
return False

key = self._get_redis_key(user_id)

if not self._exists(key):
self._track_usage(key, device_id, is_key_new=True)
return False

if self._device_has_been_used(key, device_id):
return False

device_count = self._device_count(key)
if device_count < self.device_limit_per_user(domain):
self._track_usage(key, device_id)
gherceg marked this conversation as resolved.
Show resolved Hide resolved
# this intentionally doesn't capture users with 1 device, only those with multiple
metrics_histogram(
'commcare.devices_per_user.device_count',
device_count + 1,
bucket_tag='count',
buckets=[3, 5, 8, 10],
tags={'domain': domain, 'user_id': user_id},
)
return False

metrics_counter(
'commcare.devices_per_user.rate_limit_exceeded', tags={'domain': domain, 'user_id': user_id}
)
return settings.ENABLE_DEVICE_RATE_LIMITER

def _get_redis_key(self, user_id):
"""
Create a redis key using the user_id and current time to the floored minute
This ensures a new key is used every minute
"""
time = datetime.now(timezone.utc)
formatted_time = time.strftime('%Y-%m-%d_%H:%M')
key = f"device-limiter_{user_id}_{formatted_time}"
return key

def _track_usage(self, redis_key, device_id, is_key_new=False):
self.client.sadd(redis_key, device_id)
if is_key_new:
self.client.expire(redis_key, DEVICE_SET_CACHE_TIMEOUT)
gherceg marked this conversation as resolved.
Show resolved Hide resolved

def _device_has_been_used(self, redis_key, device_id):
# check if device_id is member of the set for this key
return self.client.srem(redis_key, device_id)
gherceg marked this conversation as resolved.
Show resolved Hide resolved

def _device_count(self, redis_key):
return self.client.scard(redis_key)

def _exists(self, redis_key):
return self.client.exists(redis_key)

def _is_formplayer(self, device_id):
return device_id.startswith("WebAppsLogin") or device_id == CLOUDCARE_DEVICE_ID


device_rate_limiter = DeviceRateLimiter()
68 changes: 68 additions & 0 deletions corehq/apps/users/tests/test_device_rate_limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from django.test import SimpleTestCase, override_settings
from freezegun import freeze_time

from corehq.apps.cloudcare.const import DEVICE_ID as CLOUDCARE_DEVICE_ID
from corehq.apps.users.device_rate_limiter import device_rate_limiter
from corehq.tests.pytest_plugins.reusedb import clear_redis
from corehq.util.test_utils import flag_enabled


@freeze_time("2024-12-10 12:05:43")
@override_settings(DEVICE_LIMIT_PER_USER=1)
@override_settings(ENABLE_DEVICE_RATE_LIMITER=True)
class TestDeviceRateLimiter(SimpleTestCase):

domain = 'device-rate-limit-test'

def setUp(self):
self.addCleanup(clear_redis)

def test_allowed_if_no_devices_have_been_used_yet(self):
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))

@override_settings(DEVICE_LIMIT_PER_USER=2)
def test_allowed_if_device_count_is_under_limit(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))

def test_rate_limited_if_device_count_exceeds_limit(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertTrue(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))

def test_allowed_if_device_has_already_been_used(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id'))

def test_allowed_if_different_user(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'new-user-id', 'existing-device-id'))

def test_allowed_after_waiting_one_minute(self):
with freeze_time("2024-12-10 12:05:43") as frozen_time:
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertTrue(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))
frozen_time.move_to("2024-12-10 12:06:15")
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))
gherceg marked this conversation as resolved.
Show resolved Hide resolved

def test_formplayer_activity_is_always_allowed(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'WebAppsLogin*newlogin'))
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', CLOUDCARE_DEVICE_ID))

def test_formplayer_activity_does_not_count_towards_limit(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'WebAppsLogin*newlogin')
device_rate_limiter.rate_limit_device(self.domain, 'user-id', CLOUDCARE_DEVICE_ID)
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))

@override_settings(DEVICE_LIMIT_PER_USER=1)
@override_settings(INCREASED_DEVICE_LIMIT_PER_USER=2)
def test_allowed_after_enabling_ff_to_increase_limit(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertTrue(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))
with flag_enabled('INCREASE_DEVICE_LIMIT_PER_USER'):
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))

@override_settings(ENABLE_DEVICE_RATE_LIMITER=False)
def test_allowed_if_rate_limiter_is_disabled(self):
device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'existing-device-id')
self.assertFalse(device_rate_limiter.rate_limit_device(self.domain, 'user-id', 'new-device-id'))
9 changes: 9 additions & 0 deletions corehq/toggles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2988,3 +2988,12 @@ def domain_has_privilege_from_toggle(privilege_slug, domain):
tag=TAG_CUSTOM,
namespaces=[NAMESPACE_DOMAIN],
)

INCREASE_DEVICE_LIMIT_PER_USER = StaticToggle(
slug='increase_device_per_user_limit',
label='In the event that the DEVICE_LIMIT_PER_USER in settings becomes too restrictive, this flag can be used '
'to increase the limit without completely removing it. See INCREASED_DEVICE_LIMIT_PER_USER in settings '
'to see the exact value.',
tag=TAG_SAAS_CONDITIONAL,
namespaces=[NAMESPACE_DOMAIN],
)
4 changes: 4 additions & 0 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,6 +1148,10 @@ def _pkce_required(client_id):
MAX_MOBILE_UCR_LIMIT = 300 # used in corehq.apps.cloudcare.util.should_restrict_web_apps_usage
MAX_MOBILE_UCR_SIZE = 100000 # max number of rows allowed when syncing a mobile UCR

DEVICE_LIMIT_PER_USER = 10 # number of devices allowed per user per minute
INCREASED_DEVICE_LIMIT_PER_USER = 100 # value when INCREASE_DEVICE_LIMIT_PER_USER ff is enabled
ENABLE_DEVICE_RATE_LIMITER = False

# used by periodic tasks that delete soft deleted data older than PERMANENT_DELETION_WINDOW days
PERMANENT_DELETION_WINDOW = 30 # days

Expand Down
Loading