From 90bdf7dbe3bc5d6af32714825c7110e78b26e69e Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 9 Mar 2020 14:39:55 +0200 Subject: [PATCH 01/20] move datadog init to AppConfig.ready --- corehq/util/datadog/apps.py | 27 +++++++++++++++++++++++++++ settings.py | 14 +------------- 2 files changed, 28 insertions(+), 13 deletions(-) create mode 100644 corehq/util/datadog/apps.py diff --git a/corehq/util/datadog/apps.py b/corehq/util/datadog/apps.py new file mode 100644 index 000000000000..032d76e978d5 --- /dev/null +++ b/corehq/util/datadog/apps.py @@ -0,0 +1,27 @@ +from django.apps import AppConfig +from django.conf import settings + + +class DatadogConfig(AppConfig): + + name = 'corehq.util.datadog' + verbose_name = 'Datadog' + + def ready(self): + if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY: + return + + try: + from datadog import initialize + except ImportError: + pass + else: + initialize(settings.DATADOG_API_KEY, settings.DATADOG_APP_KEY) + + if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS: + try: + from ddtrace import tracer + tracer.enabled = False + except ImportError: + pass + diff --git a/settings.py b/settings.py index eaa6db1f5837..f4939ce9adcd 100755 --- a/settings.py +++ b/settings.py @@ -340,6 +340,7 @@ 'corehq.motech.openmrs', 'corehq.motech.repeaters', 'corehq.util', + 'corehq.util.datadog.apps.DatadogConfig' 'dimagi.ext', 'corehq.blobs', 'corehq.apps.case_search', @@ -2073,19 +2074,6 @@ if 'dummy' not in CACHES: CACHES['dummy'] = {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'} -try: - from datadog import initialize -except ImportError: - pass -else: - initialize(DATADOG_API_KEY, DATADOG_APP_KEY) - -if UNIT_TESTING or DEBUG or 'ddtrace.contrib.django' not in INSTALLED_APPS: - try: - from ddtrace import tracer - tracer.enabled = False - except ImportError: - pass REST_FRAMEWORK = { 'DATETIME_FORMAT': '%Y-%m-%dT%H:%M:%S.%fZ', From 7ad93fa2d6aeee3b0d337ae28eb5e0f83516e3e1 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Mar 2020 12:33:08 +0200 Subject: [PATCH 02/20] pluggable metrics --- corehq/util/metrics/__init__.py | 19 ++++ corehq/util/metrics/datadog.py | 49 +++++++++++ corehq/util/metrics/metrics.py | 139 ++++++++++++++++++++++++++++++ corehq/util/metrics/prometheus.py | 46 ++++++++++ settings.py | 3 +- 5 files changed, 255 insertions(+), 1 deletion(-) create mode 100644 corehq/util/metrics/__init__.py create mode 100644 corehq/util/metrics/datadog.py create mode 100644 corehq/util/metrics/metrics.py create mode 100644 corehq/util/metrics/prometheus.py diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py new file mode 100644 index 000000000000..7672abf5b545 --- /dev/null +++ b/corehq/util/metrics/__init__.py @@ -0,0 +1,19 @@ +from corehq.util.metrics.datadog import DatadogMetrics +from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics +from corehq.util.metrics.prometheus import PrometheusMetrics + +_metrics = None # singleton/global + + +def get_metrics() -> HqMetrics: + global _metrics + if not _metrics: + enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()])) + if not enabled: + _metrics = DummyMetrics() + + if len(enabled) > 1: + _metrics = DelegatedMetrics(enabled) + + _metrics = enabled[0] + return _metrics diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py new file mode 100644 index 000000000000..a0598168e9b1 --- /dev/null +++ b/corehq/util/metrics/datadog.py @@ -0,0 +1,49 @@ +import logging + +from django.conf import settings + +from corehq.util.metrics.metrics import HqCounter, HqGauge, HqMetrics +from datadog import api +from datadog.dogstatsd.base import DogStatsd + +datadog_logger = logging.getLogger('datadog') + +COMMON_TAGS = ['environment:{}'.format(settings.SERVER_ENVIRONMENT)] + +statsd = DogStatsd(constant_tags=COMMON_TAGS) + + +def _format_tags(tag_names, tag_values): + if not tag_names or not tag_values: + return None + + return [ + f'{name}:{value}' for name, value in zip(tag_names, tag_values) + ] + + +def _datadog_record(fn, name, value, tags=None): + try: + fn(name, value, tags=tags) + except Exception: + datadog_logger.exception('Unable to record Datadog stats') + + +class Counter(HqCounter): + def _inc(self, amount=1): + tags = _format_tags(self.tag_names, self.tag_values) + _datadog_record(statsd.increment, self.name, amount, tags) + + +class Gauge(HqGauge): + def _set(self, value): + tags = _format_tags(self.tag_names, self.tag_values) + _datadog_record(statsd.gauge, self.name, value, tags) + + +class DatadogMetrics(HqMetrics): + _counter_class = Counter + _gauge_class = Gauge + + def enabled(self) -> bool: + return api._api_key and api._application_key diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py new file mode 100644 index 000000000000..9b52266bee92 --- /dev/null +++ b/corehq/util/metrics/metrics.py @@ -0,0 +1,139 @@ +import abc +import re +from abc import abstractmethod +from typing import Iterable + +from corehq.util.soft_assert import soft_assert + +METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:.][a-zA-Z0-9_:.]*$') +METRIC_TAG_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') +RESERVED_METRIC_TAG_NAME_RE = re.compile(r'^__.*$') +RESERVED_METRIC_TAG_NAMES = ['quantile', 'le'] + + +def _enforce_prefix(name, prefix): + soft_assert(fail_if_debug=True).call( + not prefix or name.startswith(prefix), + "Did you mean to call your metric 'commcare.{}'? ".format(name)) + + +def _validate_tag_names(tag_names): + tag_names = tuple(tag_names) + for l in tag_names: + if not METRIC_TAG_NAME_RE.match(l): + raise ValueError('Invalid metric tag name: ' + l) + if RESERVED_METRIC_TAG_NAME_RE.match(l): + raise ValueError('Reserved metric tag name: ' + l) + if l in RESERVED_METRIC_TAG_NAMES: + raise ValueError('Reserved metric tag name: ' + l) + return tag_names + + +class MetricBase: + def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None): + self.name = name + if not METRIC_NAME_RE.match(name): + raise ValueError('Invalid metric name: ' + name) + _enforce_prefix(name, 'commcare') + self.documentation = documentation + self.tag_names = _validate_tag_names(tag_names) + self.tag_values = tag_values + + def tag(self, **tag_kwargs): + if sorted(tag_kwargs) != sorted(self.tag_names): + raise ValueError('Incorrect tag names') + + tag_values = tuple(str(tag_kwargs[t]) for t in self.tag_names) + return self._get_tagged_instance(tag_values) + + def _get_tagged_instance(self, tag_values): + return self.__class__(self.name, self.documentation, self.tag_names, tag_values) + + def _validate_tags(self): + if self.tag_names and not self.tag_values: + raise Exception('Metric has missing tag values.') + + +class HqCounter(MetricBase): + def inc(self, amount: float = 1): + """Increment the counter by the given amount.""" + self._validate_tags() + self._inc(amount) + + def _inc(self, amount: float = 1): + """Override this method to record the metric""" + pass + + +class HqGauge(MetricBase): + def set(self, value: float): + """Set gauge to the given value.""" + self._validate_tags() + self._set(value) + + def _set(self, value: float): + """Override this method to record the metric""" + pass + + +class HqMetrics(metaclass=abc.ABCMeta): + _counter_class = None + _gauge_class = None + + @abstractmethod + def enabled(self) -> bool: + raise NotImplementedError + + def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqCounter: + return self._counter_class(name, documentation, tag_names) + + def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge: + return self._gauge_class(name, documentation, tag_names) + + +class DummyMetrics(HqMetrics): + _counter_class = HqCounter + _gauge_class = HqGauge + + def enabled(self) -> bool: + return True + + +class DelegatedMetrics(HqMetrics): + def __init__(self, delegates): + self.delegates = delegates + + def enabled(self) -> bool: + return True + + def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()): + return DelegatingCounter([ + d.counter(name, documentation, tag_names) for d in self.delegates + ]) + + def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()): + return DelegatingGauge([ + d.gauge(name, documentation, tag_names) for d in self.delegates + ]) + + +class DelegatingMetric: + def __init__(self, delegates): + self._delegates = delegates + + def tag(self, **tag_kwargs): + return self.__class__([ + d.tag(**tag_kwargs) for d in self._delegates + ]) + + +class DelegatingCounter(DelegatingMetric): + def inc(self, amount: float = 1): + for d in self._delegates: + d.inc(amount) + + +class DelegatingGauge(DelegatingMetric): + def set(self, value: float): + for d in self._delegates: + d.set(value) diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py new file mode 100644 index 000000000000..6637f7097520 --- /dev/null +++ b/corehq/util/metrics/prometheus.py @@ -0,0 +1,46 @@ +from typing import Iterable + +import settings +from corehq.util.metrics.metrics import ( + HqCounter, + HqGauge, + HqMetrics, + MetricBase, +) +from prometheus_client import Counter as PCounter +from prometheus_client import Gauge as PGauge + + +class PrometheusMetricBase(MetricBase): + _metric_class = None + + def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None, delegate=None): + name = name.replace('.', '_') + super().__init__(name, documentation, tag_names, tag_values) + self._delegate = delegate or self._metric_class(name, documentation, tag_names, labelvalues=tag_values) + + def _get_tagged_instance(self, tag_values): + delegate = self._delegate.labels(dict(zip(self.tag_names, tag_values))) + return self.__class__(self.name, self.documentation, self.tag_names, self.tag_values, delegate) + + +class Counter(PrometheusMetricBase, HqCounter): + _metric_class = PCounter + + def _inc(self, amount=1): + self._delegate.inc(amount) + + +class Gauge(PrometheusMetricBase, HqGauge): + _metric_class = PGauge + + def _set(self, value: float): + self._delegate.set(value) + + +class PrometheusMetrics(HqMetrics): + _counter_class = Counter + _gauge_class = Gauge + + def enabled(self) -> bool: + return settings.ENABLE_PROMETHEUS_METRICS diff --git a/settings.py b/settings.py index f4939ce9adcd..05431414ab78 100755 --- a/settings.py +++ b/settings.py @@ -340,7 +340,7 @@ 'corehq.motech.openmrs', 'corehq.motech.repeaters', 'corehq.util', - 'corehq.util.datadog.apps.DatadogConfig' + 'corehq.util.datadog.apps.DatadogConfig', 'dimagi.ext', 'corehq.blobs', 'corehq.apps.case_search', @@ -838,6 +838,7 @@ DATADOG_API_KEY = None DATADOG_APP_KEY = None +ENABLE_PROMETHEUS_METRICS = False SYNCLOGS_SQL_DB_ALIAS = 'default' From 0694926fc68420d9ef09b8d5a29f315abd3e8266 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Mar 2020 17:41:01 +0200 Subject: [PATCH 03/20] add histogram and tests --- corehq/util/metrics/datadog.py | 40 ++++- corehq/util/metrics/metrics.py | 58 ++++++-- corehq/util/metrics/prometheus.py | 43 ++++-- corehq/util/metrics/tests/__init__.py | 0 corehq/util/metrics/tests/test_metrics.py | 171 ++++++++++++++++++++++ corehq/util/metrics/tests/utils.py | 22 +++ 6 files changed, 308 insertions(+), 26 deletions(-) create mode 100644 corehq/util/metrics/tests/__init__.py create mode 100644 corehq/util/metrics/tests/test_metrics.py create mode 100644 corehq/util/metrics/tests/utils.py diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py index a0598168e9b1..748718c3f23c 100644 --- a/corehq/util/metrics/datadog.py +++ b/corehq/util/metrics/datadog.py @@ -2,7 +2,13 @@ from django.conf import settings -from corehq.util.metrics.metrics import HqCounter, HqGauge, HqMetrics +from corehq.util.datadog.utils import bucket_value +from corehq.util.metrics.metrics import ( + HqCounter, + HqGauge, + HqHistogram, + HqMetrics, +) from datadog import api from datadog.dogstatsd.base import DogStatsd @@ -30,20 +36,48 @@ def _datadog_record(fn, name, value, tags=None): class Counter(HqCounter): - def _inc(self, amount=1): + def _record(self, amount: float): tags = _format_tags(self.tag_names, self.tag_values) _datadog_record(statsd.increment, self.name, amount, tags) class Gauge(HqGauge): - def _set(self, value): + def _record(self, value): tags = _format_tags(self.tag_names, self.tag_values) _datadog_record(statsd.gauge, self.name, value, tags) +class Histogram(HqHistogram): + """This implementation of histogram uses tagging to record the buckets. + It does not use the Datadog Histogram metric type. + + The metric itself will be incremented by 1 on each call to `observe`. The value + passed to `observe` will be used to create the bucket tag. + + For example: + + h = Histogram( + 'commcare.request.duration', 'description', + bucket_tag='duration', buckets=[1,2,3], bucket_units='ms' + ) + h.observe(1.4) + + # resulting Datadog metric + # commcare.request.duration:1|c|#duration:lt_2ms + + More details: https://help.datadoghq.com/hc/en-us/articles/211545826 + """ + def _record(self, value: float): + tags = _format_tags(self.tag_names, self.tag_values) + bucket = bucket_value(value, self._buckets, self._bucket_unit) + tags.append(f'{self._bucket_tag}:{bucket}') + _datadog_record(statsd.increment, self.name, 1, tags) + + class DatadogMetrics(HqMetrics): _counter_class = Counter _gauge_class = Gauge + _histogram_class = Histogram def enabled(self) -> bool: return api._api_key and api._application_key diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 9b52266bee92..73f7dad36e75 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -1,9 +1,10 @@ import abc import re from abc import abstractmethod -from typing import Iterable +from typing import Iterable, List from corehq.util.soft_assert import soft_assert +from prometheus_client.utils import INF METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:.][a-zA-Z0-9_:.]*$') METRIC_TAG_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') @@ -30,14 +31,19 @@ def _validate_tag_names(tag_names): class MetricBase: - def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None): + def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), **kwargs): self.name = name if not METRIC_NAME_RE.match(name): raise ValueError('Invalid metric name: ' + name) _enforce_prefix(name, 'commcare') self.documentation = documentation self.tag_names = _validate_tag_names(tag_names) - self.tag_values = tag_values + self.tag_values = kwargs.pop('tag_values', None) + self._kwargs = kwargs + self._init_metric() + + def _init_metric(self): + pass def tag(self, **tag_kwargs): if sorted(tag_kwargs) != sorted(self.tag_names): @@ -47,38 +53,54 @@ def tag(self, **tag_kwargs): return self._get_tagged_instance(tag_values) def _get_tagged_instance(self, tag_values): - return self.__class__(self.name, self.documentation, self.tag_names, tag_values) + return self.__class__( + self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs + ) def _validate_tags(self): if self.tag_names and not self.tag_values: raise Exception('Metric has missing tag values.') + def _record(self, value: float): + pass + class HqCounter(MetricBase): def inc(self, amount: float = 1): """Increment the counter by the given amount.""" self._validate_tags() - self._inc(amount) - - def _inc(self, amount: float = 1): - """Override this method to record the metric""" - pass + self._record(amount) class HqGauge(MetricBase): def set(self, value: float): """Set gauge to the given value.""" self._validate_tags() - self._set(value) + self._record(value) - def _set(self, value: float): - """Override this method to record the metric""" - pass + +DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF) + + +class HqHistogram(MetricBase): + + def _init_metric(self): + self._buckets = self._kwargs.get('buckets') or DEFAULT_BUCKETS + self._bucket_unit = self._kwargs.get('bucket_unit', '') + self._bucket_tag = self._kwargs.get('bucket_tag') + if self._bucket_tag in self.tag_names: + self.tag_names = tuple([name for name in self.tag_names if name != self._bucket_tag]) + + def observe(self, value: float): + """Update histogram with the given value.""" + self._validate_tags() + self._record(value) class HqMetrics(metaclass=abc.ABCMeta): _counter_class = None _gauge_class = None + _histogram_class = None @abstractmethod def enabled(self) -> bool: @@ -90,6 +112,16 @@ def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge: return self._gauge_class(name, documentation, tag_names) + def histogram(self, name: str, documentation: str, + bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '', + tag_names: Iterable=tuple()) -> HqGauge: + """Create a histogram metric. Histogram implementations differ between provider. See provider + implementations for details. + """ + return self._histogram_class( + name, documentation, tag_names, bucket_tag=bucket_tag, buckets=buckets, bucket_unit=bucket_unit + ) + class DummyMetrics(HqMetrics): _counter_class = HqCounter diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py index 6637f7097520..c98146f3886d 100644 --- a/corehq/util/metrics/prometheus.py +++ b/corehq/util/metrics/prometheus.py @@ -1,46 +1,69 @@ -from typing import Iterable - import settings from corehq.util.metrics.metrics import ( HqCounter, HqGauge, + HqHistogram, HqMetrics, MetricBase, ) from prometheus_client import Counter as PCounter from prometheus_client import Gauge as PGauge +from prometheus_client import Histogram as PHistogram class PrometheusMetricBase(MetricBase): _metric_class = None - def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None, delegate=None): - name = name.replace('.', '_') - super().__init__(name, documentation, tag_names, tag_values) - self._delegate = delegate or self._metric_class(name, documentation, tag_names, labelvalues=tag_values) + def _init_metric(self): + super()._init_metric() + self.name = self.name.replace('.', '_') + delegate = self._kwargs.get('delegate') + self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names) def _get_tagged_instance(self, tag_values): - delegate = self._delegate.labels(dict(zip(self.tag_names, tag_values))) - return self.__class__(self.name, self.documentation, self.tag_names, self.tag_values, delegate) + delegate = self._delegate.labels(**dict(zip(self.tag_names, tag_values))) + return self.__class__( + self.name, self.documentation, + tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs + ) class Counter(PrometheusMetricBase, HqCounter): + """https://prometheus.io/docs/concepts/metric_types/#counter""" _metric_class = PCounter - def _inc(self, amount=1): + def _record(self, amount: float): self._delegate.inc(amount) class Gauge(PrometheusMetricBase, HqGauge): + """https://prometheus.io/docs/concepts/metric_types/#gauge""" _metric_class = PGauge - def _set(self, value: float): + def _record(self, value: float): self._delegate.set(value) +class Histogram(PrometheusMetricBase, HqHistogram): + """This metric class implements the native Prometheus Histogram type + + https://prometheus.io/docs/concepts/metric_types/#histogram + """ + def _init_metric(self): + HqHistogram._init_metric(self) # skip _init_metric on PrometheusMetricBase + self.name = self.name.replace('.', '_') + self._delegate = self._kwargs.get('delegate') + if not self._delegate: + self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets) + + def _record(self, value: float): + self._delegate.observe(value) + + class PrometheusMetrics(HqMetrics): _counter_class = Counter _gauge_class = Gauge + _histogram_class = Histogram def enabled(self) -> bool: return settings.ENABLE_PROMETHEUS_METRICS diff --git a/corehq/util/metrics/tests/__init__.py b/corehq/util/metrics/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py new file mode 100644 index 000000000000..caf4eb4c0be6 --- /dev/null +++ b/corehq/util/metrics/tests/test_metrics.py @@ -0,0 +1,171 @@ +from typing import Dict, Tuple + +from django.test import SimpleTestCase + +from corehq.util.metrics import DatadogMetrics, PrometheusMetrics +from corehq.util.metrics.metrics import HqCounter, HqGauge, HqHistogram +from corehq.util.metrics.tests.utils import patch_datadog +from prometheus_client.samples import Sample +from prometheus_client.utils import INF + + +class _TestMetrics(SimpleTestCase): + provider_class = None + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.provider = cls.provider_class() + + def test_counter(self): + counter = self.provider.counter('commcare.test.counter', 'Description', tag_names=['t1', 't2']) + counter.tag(t1='a', t2='b').inc() + counter.tag(t1='c', t2='b').inc(2) + counter.tag(t1='c', t2='b').inc() + self.assertCounterMetric(counter, { + (('t1', 'a'), ('t2', 'b')): 1, + (('t1', 'c'), ('t2', 'b')): 3, + }) + + def test_gauge(self): + gauge = self.provider.gauge('commcare.test.gauge', 'Description', tag_names=['t1', 't2']) + gauge.tag(t1='a', t2='b').set(4.2) + gauge.tag(t1='c', t2='b').set(2) + gauge.tag(t1='c', t2='b').set(5) + self.assertGaugeMetric(gauge, { + (('t1', 'a'), ('t2', 'b')): 4.2, + (('t1', 'c'), ('t2', 'b')): 5, + }) + + def test_histogram(self): + histogram = self.provider.histogram( + 'commcare.test.histogram', 'Description', 'duration', + buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] + ) + tagged_1 = histogram.tag(t1='a', t2='b') + tagged_1.observe(0.2) + tagged_1.observe(0.7) + tagged_1.observe(2.5) + + tagged_2 = histogram.tag(t1='c', t2='b') + tagged_2.observe(2) + tagged_2.observe(5) + self.assertHistogramMetric(histogram, { + (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, + (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1} + }) + + def assertCounterMetric(self, metric: HqCounter, expected: Dict[Tuple[Tuple[str, str], ...], float]): + """ + :param metric: metric class + :param expected: dict mapping tag tuples to metric values + """ + raise NotImplementedError + + def assertGaugeMetric(self, metric: HqGauge, expected: Dict[Tuple[Tuple[str, str], ...], float]): + """ + :param metric: metric class + :param expected: dict mapping tag tuples to metric values + """ + raise NotImplementedError + + def assertHistogramMetric(self, + metric: HqHistogram, + expected: Dict[Tuple[Tuple[str, str], ...], Dict[float, int]]): + """ + :param metric: metric class + :param expected: dict mapping tag tuples to a dict of bucket values + """ + raise NotImplementedError + + +class TestDatadogMetrics(_TestMetrics): + provider_class = DatadogMetrics + + def setUp(self) -> None: + super().setUp() + self.patch = patch_datadog() + self.metrics = self.patch.__enter__() + + def tearDown(self) -> None: + self.patch.__exit__(None, None, None) + super().tearDown() + + def assertCounterMetric(self, metric, expected): + self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + actual = { + key[1]: sum(val) for key, val in self.metrics.items() + } + self.assertDictEqual(actual, expected) + + def assertGaugeMetric(self, metric, expected): + self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + actual = { + key[1]: val[-1] for key, val in self.metrics.items() + } + self.assertDictEqual(actual, expected) + + def assertHistogramMetric(self, metric, expected): + self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + expected_samples = {} + for tags, buckets in expected.items(): + for bucket, val in buckets.items(): + prefix = 'lt' + if bucket == INF: + bucket = metric._buckets[-1] + prefix = 'over' + bucket_tag = (metric._bucket_tag, f'{prefix}_{bucket:03d}{metric._bucket_unit}') + expected_samples[tags + (bucket_tag,)] = val + + actual = { + key[1]: sum(val) for key, val in self.metrics.items() + } + self.assertDictEqual(actual, expected_samples) + + +class TestPrometheusMetrics(_TestMetrics): + provider_class = PrometheusMetrics + + def _samples_to_dics(self, samples, filter_name=None): + """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)""" + return { + tuple(sample.labels.items()): sample.value + for sample in samples + if not filter_name or sample.name == filter_name + } + + def assertGaugeMetric(self, metric, expected): + [collected] = metric._delegate.collect() + actual = self._samples_to_dics(collected.samples) + self.assertDictEqual(actual, expected) + + def assertCounterMetric(self, metric, expected): + total_name = f'{metric.name}_total' + [collected] = metric._delegate.collect() + actual = self._samples_to_dics(collected.samples, total_name) + self.assertDictEqual(actual, expected) + + def assertHistogramMetric(self, metric, expected): + # Note that Prometheus histograms are cumulative so we must sum up the successive bucket values + # https://en.wikipedia.org/wiki/Histogram#Cumulative_histogram + [collected] = metric._delegate.collect() + + sample_name = f'{metric.name}_bucket' + expected_samples = [] + for key, value in expected.items(): + cumulative_value = 0 + for bucket in metric._buckets: + val = value.get(bucket, 0) + cumulative_value += val + labels = dict(key + (('le', str(float(bucket))),)) + expected_samples.append(Sample(sample_name, labels, float(cumulative_value), None, None)) + + labels = dict(key + (('le', '+Inf'),)) + cumulative_value += value.get(INF, 0) + expected_samples.append(Sample(sample_name, labels, float(cumulative_value), None, None)) + + actual = [ + s for s in collected.samples + if s.name.endswith('bucket') + ] + self.assertListEqual(actual, expected_samples) diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py new file mode 100644 index 000000000000..9c7f94b2d104 --- /dev/null +++ b/corehq/util/metrics/tests/utils.py @@ -0,0 +1,22 @@ +from collections import defaultdict +from contextlib import contextmanager + +import mock + + +@contextmanager +def patch_datadog(): + def record(fn, name, value, tags=None): + key = tuple([ + name, + tuple([tuple(t.split(':')) for t in (tags or [])]), + ]) + stats[key].append(value) + + stats = defaultdict(list) + patch = mock.patch("corehq.util.metrics.datadog._datadog_record", new=record) + patch.start() + try: + yield stats + finally: + patch.stop() From c1f8270e865523937f4390fb7ea55ba093ec185c Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Mar 2020 17:41:11 +0200 Subject: [PATCH 04/20] bucket values less than or equal --- corehq/util/datadog/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/util/datadog/utils.py b/corehq/util/datadog/utils.py index 9d495679260f..c61a4d099cb1 100644 --- a/corehq/util/datadog/utils.py +++ b/corehq/util/datadog/utils.py @@ -115,7 +115,7 @@ def bucket_value(value, buckets, unit=''): lt_template = "lt_{:0%s}{}" % number_length over_template = "over_{:0%s}{}" % number_length for bucket in buckets: - if value < bucket: + if value <= bucket: return lt_template.format(bucket, unit) return over_template.format(buckets[-1], unit) From 5034156b76b1918a6b784f6cd8883380cc0bbec3 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Tue, 10 Mar 2020 18:08:00 +0200 Subject: [PATCH 05/20] add prometheus client to requirements --- requirements-python3/dev-requirements.txt | 1 + requirements-python3/prod-requirements.txt | 1 + requirements-python3/requirements.txt | 1 + requirements-python3/test-requirements.txt | 1 + requirements/dev-requirements.txt | 1 + requirements/prod-requirements.txt | 1 + requirements/requirements.in | 1 + requirements/requirements.txt | 1 + requirements/test-requirements.txt | 1 + 9 files changed, 9 insertions(+) diff --git a/requirements-python3/dev-requirements.txt b/requirements-python3/dev-requirements.txt index d0741b0bb668..ddaa038ed973 100644 --- a/requirements-python3/dev-requirements.txt +++ b/requirements-python3/dev-requirements.txt @@ -130,6 +130,7 @@ pillow==6.2.1 pip-tools==4.4.0 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 prompt-toolkit==1.0.18 # via ipython psutil==5.1.3 psycogreen==1.0.1 diff --git a/requirements-python3/prod-requirements.txt b/requirements-python3/prod-requirements.txt index 5627994d311c..128b82f8f986 100644 --- a/requirements-python3/prod-requirements.txt +++ b/requirements-python3/prod-requirements.txt @@ -109,6 +109,7 @@ pickleshare==0.7.5 # via ipython pillow==6.2.1 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 prompt-toolkit==1.0.18 # via ipython psycogreen==1.0.1 psycopg2==2.7.7 diff --git a/requirements-python3/requirements.txt b/requirements-python3/requirements.txt index 207e181a974d..148161bb967e 100644 --- a/requirements-python3/requirements.txt +++ b/requirements-python3/requirements.txt @@ -101,6 +101,7 @@ phonenumberslite==8.10.22 pillow==6.2.1 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 psycogreen==1.0.1 psycopg2==2.7.7 py-kissmetrics==1.0.1 diff --git a/requirements-python3/test-requirements.txt b/requirements-python3/test-requirements.txt index 12bf9a824042..6c5950ac74cf 100644 --- a/requirements-python3/test-requirements.txt +++ b/requirements-python3/test-requirements.txt @@ -113,6 +113,7 @@ pillow==6.2.1 pip-tools==4.4.0 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 psycogreen==1.0.1 psycopg2==2.7.7 py-kissmetrics==1.0.1 diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt index d0741b0bb668..ddaa038ed973 100644 --- a/requirements/dev-requirements.txt +++ b/requirements/dev-requirements.txt @@ -130,6 +130,7 @@ pillow==6.2.1 pip-tools==4.4.0 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 prompt-toolkit==1.0.18 # via ipython psutil==5.1.3 psycogreen==1.0.1 diff --git a/requirements/prod-requirements.txt b/requirements/prod-requirements.txt index 5627994d311c..128b82f8f986 100644 --- a/requirements/prod-requirements.txt +++ b/requirements/prod-requirements.txt @@ -109,6 +109,7 @@ pickleshare==0.7.5 # via ipython pillow==6.2.1 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 prompt-toolkit==1.0.18 # via ipython psycogreen==1.0.1 psycopg2==2.7.7 diff --git a/requirements/requirements.in b/requirements/requirements.in index c4754c951eea..25295c647c87 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -114,3 +114,4 @@ werkzeug==0.11.15 CommcareTranslationChecker==0.9.3.5 WeasyPrint==0.42.3 architect==0.5.6 +prometheus-client==0.7.1 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 207e181a974d..148161bb967e 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -101,6 +101,7 @@ phonenumberslite==8.10.22 pillow==6.2.1 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 psycogreen==1.0.1 psycopg2==2.7.7 py-kissmetrics==1.0.1 diff --git a/requirements/test-requirements.txt b/requirements/test-requirements.txt index 12bf9a824042..6c5950ac74cf 100644 --- a/requirements/test-requirements.txt +++ b/requirements/test-requirements.txt @@ -113,6 +113,7 @@ pillow==6.2.1 pip-tools==4.4.0 ply==3.11 # via eulxml, jsonpath-rw polib==1.1.0 +prometheus-client==0.7.1 psycogreen==1.0.1 psycopg2==2.7.7 py-kissmetrics==1.0.1 From 2be4e6a27c6cab64d0ebc696c2f6341915e2e79e Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 11 Mar 2020 14:46:18 +0200 Subject: [PATCH 06/20] make metrics lazy --- corehq/util/metrics/__init__.py | 20 +++---- corehq/util/metrics/datadog.py | 2 + corehq/util/metrics/metrics.py | 63 +++++++++++------------ corehq/util/metrics/tests/test_metrics.py | 60 ++++++++++++++++++--- 4 files changed, 93 insertions(+), 52 deletions(-) diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py index 7672abf5b545..6789a3130bfc 100644 --- a/corehq/util/metrics/__init__.py +++ b/corehq/util/metrics/__init__.py @@ -1,19 +1,15 @@ +from django.utils.functional import SimpleLazyObject + from corehq.util.metrics.datadog import DatadogMetrics from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics from corehq.util.metrics.prometheus import PrometheusMetrics -_metrics = None # singleton/global - -def get_metrics() -> HqMetrics: - global _metrics - if not _metrics: - enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()])) - if not enabled: - _metrics = DummyMetrics() +def _get_metrics(): + enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()])) + if not enabled: + return [DummyMetrics()] + return enabled - if len(enabled) > 1: - _metrics = DelegatedMetrics(enabled) - _metrics = enabled[0] - return _metrics +metrics = DelegatedMetrics(SimpleLazyObject(_get_metrics)) # singleton/global diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py index 748718c3f23c..e13d5c3d9a1a 100644 --- a/corehq/util/metrics/datadog.py +++ b/corehq/util/metrics/datadog.py @@ -69,6 +69,8 @@ class Histogram(HqHistogram): """ def _record(self, value: float): tags = _format_tags(self.tag_names, self.tag_values) + if not tags: + tags = [] bucket = bucket_value(value, self._buckets, self._bucket_unit) tags.append(f'{self._bucket_tag}:{bucket}') _datadog_record(statsd.increment, self.name, 1, tags) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 73f7dad36e75..c4038abaf6ed 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -3,6 +3,8 @@ from abc import abstractmethod from typing import Iterable, List +from django.utils.functional import SimpleLazyObject + from corehq.util.soft_assert import soft_assert from prometheus_client.utils import INF @@ -31,7 +33,7 @@ def _validate_tag_names(tag_names): class MetricBase: - def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), **kwargs): + def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(), **kwargs): self.name = name if not METRIC_NAME_RE.match(name): raise ValueError('Invalid metric name: ' + name) @@ -62,7 +64,7 @@ def _validate_tags(self): raise Exception('Metric has missing tag values.') def _record(self, value: float): - pass + raise NotImplementedError class HqCounter(MetricBase): @@ -114,7 +116,7 @@ def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> H def histogram(self, name: str, documentation: str, bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '', - tag_names: Iterable=tuple()) -> HqGauge: + tag_names: Iterable=tuple()) -> HqHistogram: """Create a histogram metric. Histogram implementations differ between provider. See provider implementations for details. """ @@ -131,41 +133,38 @@ def enabled(self) -> bool: return True -class DelegatedMetrics(HqMetrics): +class DelegatedMetrics: def __init__(self, delegates): self.delegates = delegates - - def enabled(self) -> bool: - return True - - def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()): - return DelegatingCounter([ - d.counter(name, documentation, tag_names) for d in self.delegates - ]) - - def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()): - return DelegatingGauge([ - d.gauge(name, documentation, tag_names) for d in self.delegates - ]) + self._types = { + 'counter': 'inc', + 'gauge': 'set', + 'histogram': 'observe', + } + + def __getattr__(self, item): + if item in self._types: + def _make_type(*args, **kwargs): + return SimpleLazyObject(lambda: DelegatingMetric([ + getattr(d, item)(*args, **kwargs) for d in self.delegates + ], self._types[item])) + return _make_type + raise AttributeError class DelegatingMetric: - def __init__(self, delegates): + def __init__(self, delegates, record_fn_name): self._delegates = delegates + self._record_fn_name = record_fn_name - def tag(self, **tag_kwargs): - return self.__class__([ - d.tag(**tag_kwargs) for d in self._delegates - ]) + def tag(self, *args, **kwargs): + return self.__class__([d.tag(*args, **kwargs) for d in self._delegates]) + def __getattr__(self, item): + if item == self._record_fn_name: + def record(*args, **kwargs): + for metric in self._delegates: + getattr(metric, item)(*args, **kwargs) + return record -class DelegatingCounter(DelegatingMetric): - def inc(self, amount: float = 1): - for d in self._delegates: - d.inc(amount) - - -class DelegatingGauge(DelegatingMetric): - def set(self, value: float): - for d in self._delegates: - d.set(value) + raise AttributeError diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py index caf4eb4c0be6..c6d721921780 100644 --- a/corehq/util/metrics/tests/test_metrics.py +++ b/corehq/util/metrics/tests/test_metrics.py @@ -1,12 +1,19 @@ from typing import Dict, Tuple from django.test import SimpleTestCase +from django.utils.functional import SimpleLazyObject from corehq.util.metrics import DatadogMetrics, PrometheusMetrics -from corehq.util.metrics.metrics import HqCounter, HqGauge, HqHistogram +from corehq.util.metrics.metrics import ( + DelegatedMetrics, + HqCounter, + HqGauge, + HqHistogram, +) from corehq.util.metrics.tests.utils import patch_datadog from prometheus_client.samples import Sample from prometheus_client.utils import INF +from testil import eq class _TestMetrics(SimpleTestCase): @@ -85,28 +92,28 @@ class TestDatadogMetrics(_TestMetrics): def setUp(self) -> None: super().setUp() self.patch = patch_datadog() - self.metrics = self.patch.__enter__() + self.recorded_metrics = self.patch.__enter__() def tearDown(self) -> None: self.patch.__exit__(None, None, None) super().tearDown() def assertCounterMetric(self, metric, expected): - self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name}) actual = { - key[1]: sum(val) for key, val in self.metrics.items() + key[1]: sum(val) for key, val in self.recorded_metrics.items() } self.assertDictEqual(actual, expected) def assertGaugeMetric(self, metric, expected): - self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name}) actual = { - key[1]: val[-1] for key, val in self.metrics.items() + key[1]: val[-1] for key, val in self.recorded_metrics.items() } self.assertDictEqual(actual, expected) def assertHistogramMetric(self, metric, expected): - self.assertEqual({key[0] for key in self.metrics}, {metric.name}) + self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name}) expected_samples = {} for tags, buckets in expected.items(): for bucket, val in buckets.items(): @@ -118,7 +125,7 @@ def assertHistogramMetric(self, metric, expected): expected_samples[tags + (bucket_tag,)] = val actual = { - key[1]: sum(val) for key, val in self.metrics.items() + key[1]: sum(val) for key, val in self.recorded_metrics.items() } self.assertDictEqual(actual, expected_samples) @@ -169,3 +176,40 @@ def assertHistogramMetric(self, metric, expected): if s.name.endswith('bucket') ] self.assertListEqual(actual, expected_samples) + + +def test_delegate_lazy(): + metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()]) + + def _check(metric): + assert isinstance(metric, SimpleLazyObject), '' + + test_cases = [ + metrics.counter('commcare.name.1', ''), + metrics.gauge('commcare.name.2', ''), + metrics.histogram('commcare.name.3', '', 'duration'), + ] + for metric in test_cases: + yield _check, metric + + +def test_lazy_recording(): + metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()]) + + def _check(metric, method_name): + with patch_datadog() as stats: + getattr(metric, method_name)(1) + + dd_metric, prom_metric = metric._delegates + [collected] = prom_metric._delegate.collect() + + eq(len(stats), 1, stats) + eq(len(collected.samples) >= 1, True, collected.samples) + + test_cases = [ + (metrics.counter('commcare.name.1', ''), 'inc'), + (metrics.gauge('commcare.name.2', ''), 'set'), + (metrics.histogram('commcare.name.3', '', 'duration'), 'observe'), + ] + for metric, method_name in test_cases: + yield _check, metric, method_name From eb8afe3fdc83b34a41d2531fc47dcd00952c9f6e Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 11 Mar 2020 16:45:31 +0200 Subject: [PATCH 07/20] example histogram --- corehq/apps/api/odata/utils.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/corehq/apps/api/odata/utils.py b/corehq/apps/api/odata/utils.py index f09608e12274..6992808fa2e4 100644 --- a/corehq/apps/api/odata/utils.py +++ b/corehq/apps/api/odata/utils.py @@ -4,6 +4,7 @@ from corehq.apps.export.models import ExportInstance from corehq.util.datadog.gauges import datadog_counter from corehq.util.datadog.utils import bucket_value +from corehq.util.metrics import metrics FieldMetadata = namedtuple('FieldMetadata', ['name', 'odata_type']) @@ -54,6 +55,13 @@ def _get_dot_path(export_column): return metadata +odata_feed_access_histogram = metrics.histogram( + 'commcare.odata_feed.test_v3', 'Odata Feed Access', + bucket_tag='duration_bucket', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s', + tag_names=['domain', 'feed_id', 'feed_type', 'username', 'row_count', 'column_count', 'size'] +) + + def record_feed_access_in_datadog(request, config_id, duration, response): config = ExportInstance.get(config_id) username = request.couch_user.username @@ -64,14 +72,12 @@ def record_feed_access_in_datadog(request, config_id, duration, response): column_count = len(rows[0]) except IndexError: column_count = 0 - datadog_counter('commcare.odata_feed.test_v3', tags=[ - 'domain:{}'.format(request.domain), - 'feed_id:{}'.format(config_id), - 'feed_type:{}'.format(config.type), - 'username:{}'.format(username), - 'row_count:{}'.format(row_count), - 'column_count:{}'.format(column_count), - 'size:{}'.format(len(response.content)), - 'duration:{}'.format(duration), - 'duration_bucket:{}'.format(bucket_value(duration, (1, 5, 20, 60, 120, 300, 600), 's')), - ]) + odata_feed_access_histogram.tag( + domain=request.domain, + feed_id=config_id, + feed_type=config.type, + username=username, + row_count=row_count, + column_count=column_count, + size=len(response.content), + ).observe(duration) From 54f2d1bef3026d87557e78fa4b8b5c01ade70951 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 11 Mar 2020 17:09:47 +0200 Subject: [PATCH 08/20] convert sumbission metrics --- corehq/apps/receiverwrapper/views.py | 69 +++++++++++++++++----------- corehq/util/datadog/metrics.py | 2 - 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py index 10922e0b4e45..6f27ed881bfe 100644 --- a/corehq/apps/receiverwrapper/views.py +++ b/corehq/apps/receiverwrapper/views.py @@ -10,6 +10,7 @@ import couchforms from casexml.apps.case.xform import get_case_updates, is_device_report +from corehq.util.metrics import metrics from couchforms import openrosa_response from couchforms.const import MAGIC_PROPERTY, BadRequest from couchforms.getters import MultimediaBug @@ -50,18 +51,38 @@ convert_xform_to_json, should_use_sql_backend, ) -from corehq.util.datadog.gauges import datadog_counter, datadog_gauge -from corehq.util.datadog.metrics import ( - MULTIMEDIA_SUBMISSION_ERROR_COUNT, - XFORM_LOCKED_COUNT, -) -from corehq.util.datadog.utils import bucket_value from corehq.util.timer import TimingContext PROFILE_PROBABILITY = float(os.getenv('COMMCARE_PROFILE_SUBMISSION_PROBABILITY', 0)) PROFILE_LIMIT = os.getenv('COMMCARE_PROFILE_SUBMISSION_LIMIT') PROFILE_LIMIT = int(PROFILE_LIMIT) if PROFILE_LIMIT is not None else 1 +corrupt_multimedia_counter = metrics.counter( + 'commcare.corrupt_multimedia_submissions', 'Count of requests with corrupt multimedia', + tag_names=['domain', 'authenticated'] +) + +xform_locked_error_counter = metrics.counter( + 'commcare.xformlocked.count', 'Count of locking errors', + tag_names=['domain', 'authenticated'] +) + +submission_counter = metrics.counter( + 'commcare.xform_submissions.count', 'Count of form submissions', + tag_names=['domain', 'backend', 'submission_type', 'status_code'] +) + +submission_duration_histogram = metrics.histogram( + 'commcare.xform_submissions.duration.seconds', 'Count of form submissions', + bucket_tag='duration', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s', + tag_names=['domain', 'backend', 'submission_type', 'status_code'] +) + +submission_lag_histogram = metrics.histogram( + 'commcare.xform_submissions.lag.days', 'Count of form submissions', + bucket_tag='lag', buckets=(1, 2, 4, 7, 14, 31, 90), bucket_unit='d', + tag_names=['domain', 'backend', 'submission_type', 'status_code'] +) @profile_prod('commcare_receiverwapper_process_form.prof', probability=PROFILE_PROBABILITY, limit=PROFILE_LIMIT) def _process_form(request, domain, app_id, user_id, authenticated, @@ -70,10 +91,10 @@ def _process_form(request, domain, app_id, user_id, authenticated, if rate_limit_submission(domain): return HttpTooManyRequests() - metric_tags = [ - 'backend:sql' if should_use_sql_backend(domain) else 'backend:couch', - 'domain:{}'.format(domain), - ] + metric_tags = { + 'backend': 'sql' if should_use_sql_backend(domain) else 'couch', + 'domain': domain + } try: instance, attachments = couchforms.get_instance_and_attachment(request) @@ -85,9 +106,9 @@ def _process_form(request, domain, app_id, user_id, authenticated, except: meta = {} + corrupt_multimedia_counter.tag(domain=domain, authenticated=authenticated).inc() return _submission_error( - request, "Received a submission with POST.keys()", - MULTIMEDIA_SUBMISSION_ERROR_COUNT, metric_tags, + request, "Received a submission with POST.keys()", metric_tags, domain, app_id, user_id, authenticated, meta, ) @@ -133,8 +154,9 @@ def _process_form(request, domain, app_id, user_id, authenticated, try: result = submission_post.run() except XFormLockError as err: + xform_locked_error_counter.tag(domain=domain, authenticated=authenticated).inc() return _submission_error( - request, "XFormLockError: %s" % err, XFORM_LOCKED_COUNT, + request, "XFormLockError: %s" % err, metric_tags, domain, app_id, user_id, authenticated, status=423, notify=False, ) @@ -145,7 +167,7 @@ def _process_form(request, domain, app_id, user_id, authenticated, return response -def _submission_error(request, message, count_metric, metric_tags, +def _submission_error(request, message, metric_tags, domain, app_id, user_id, authenticated, meta=None, status=400, notify=True): """Notify exception, datadog count, record metrics, construct response @@ -157,7 +179,6 @@ def _submission_error(request, message, count_metric, metric_tags, "domain:{}".format(domain), "authenticated:{}".format(authenticated), ] - datadog_counter(count_metric, tags=details) if notify: details.extend([ "user_id:{}".format(user_id), @@ -172,24 +193,20 @@ def _submission_error(request, message, count_metric, metric_tags, def _record_metrics(tags, submission_type, response, timer=None, xform=None): - if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: - lag = xform.received_on - xform.metadata.timeEnd - lag_days = lag.total_seconds() / 86400 - tags += [ - 'lag:%s' % bucket_value(lag_days, (1, 2, 4, 7, 14, 31, 90), 'd') - ] - tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] + if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: + lag = xform.received_on - xform.metadata.timeEnd + lag_days = lag.total_seconds() / 86400 + submission_lag_histogram.tag(**tags).observe(lag_days) + if timer: - tags += [ - 'duration:%s' % bucket_value(timer.duration, (1, 5, 20, 60, 120, 300, 600), 's'), - ] + submission_duration_histogram.tag(**tags).observe(timer.duration) - datadog_counter('commcare.xform_submissions.count', tags=tags) + submission_counter.tag(**tags).inc() @location_safe diff --git a/corehq/util/datadog/metrics.py b/corehq/util/datadog/metrics.py index 545be07a5a85..d56dea5154db 100644 --- a/corehq/util/datadog/metrics.py +++ b/corehq/util/datadog/metrics.py @@ -2,6 +2,4 @@ ERROR_COUNT = 'commcare.error.count' REPEATER_ERROR_COUNT = 'commcare.repeaters.error' REPEATER_SUCCESS_COUNT = 'commcare.repeaters.success' -MULTIMEDIA_SUBMISSION_ERROR_COUNT = 'commcare.corrupt-multimedia-submission.error.count' DATE_OPENED_CASEBLOCK_ERROR_COUNT = 'commcare.date-opened-caseblock-bug.error.count' -XFORM_LOCKED_COUNT = 'commcare.xformlocked.count' From eba69c4f8937352143272b8d27eda4064bba91cd Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 11 Mar 2020 17:29:04 +0200 Subject: [PATCH 09/20] docstrings --- corehq/util/metrics/metrics.py | 2 ++ corehq/util/metrics/prometheus.py | 1 + 2 files changed, 3 insertions(+) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index c4038abaf6ed..65ea9def39b1 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -134,6 +134,8 @@ def enabled(self) -> bool: class DelegatedMetrics: + """This class makes the metric class instantiation lazy and + also multiple metrics providers to be used.""" def __init__(self, delegates): self.delegates = delegates self._types = { diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py index c98146f3886d..30d4604d0fee 100644 --- a/corehq/util/metrics/prometheus.py +++ b/corehq/util/metrics/prometheus.py @@ -50,6 +50,7 @@ class Histogram(PrometheusMetricBase, HqHistogram): https://prometheus.io/docs/concepts/metric_types/#histogram """ def _init_metric(self): + """Overriding this so that we can pass in the buckets to the Prometheus class""" HqHistogram._init_metric(self) # skip _init_metric on PrometheusMetricBase self.name = self.name.replace('.', '_') self._delegate = self._kwargs.get('delegate') From 01fbc076c50d8a67299340e290d2c5039102ea5a Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Wed, 11 Mar 2020 17:30:30 +0200 Subject: [PATCH 10/20] stickler --- corehq/util/metrics/__init__.py | 2 +- corehq/util/metrics/metrics.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py index 6789a3130bfc..1e6a6053ea49 100644 --- a/corehq/util/metrics/__init__.py +++ b/corehq/util/metrics/__init__.py @@ -1,7 +1,7 @@ from django.utils.functional import SimpleLazyObject from corehq.util.metrics.datadog import DatadogMetrics -from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics +from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics from corehq.util.metrics.prometheus import PrometheusMetrics diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 65ea9def39b1..006971c30e9e 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -108,15 +108,15 @@ class HqMetrics(metaclass=abc.ABCMeta): def enabled(self) -> bool: raise NotImplementedError - def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqCounter: + def counter(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqCounter: return self._counter_class(name, documentation, tag_names) - def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge: + def gauge(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqGauge: return self._gauge_class(name, documentation, tag_names) def histogram(self, name: str, documentation: str, bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '', - tag_names: Iterable=tuple()) -> HqHistogram: + tag_names: Iterable = tuple()) -> HqHistogram: """Create a histogram metric. Histogram implementations differ between provider. See provider implementations for details. """ From 450f211179d41bc243c02470b269c0bdfdd3203f Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 11:17:25 +0200 Subject: [PATCH 11/20] keep tag_values as dict instead of splitting and re-combining --- corehq/util/metrics/datadog.py | 12 ++++++------ corehq/util/metrics/metrics.py | 15 ++++++++------- corehq/util/metrics/prometheus.py | 4 ++-- corehq/util/metrics/tests/test_metrics.py | 2 +- corehq/util/metrics/tests/utils.py | 8 ++------ 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py index e13d5c3d9a1a..574cf8de58bf 100644 --- a/corehq/util/metrics/datadog.py +++ b/corehq/util/metrics/datadog.py @@ -19,12 +19,12 @@ statsd = DogStatsd(constant_tags=COMMON_TAGS) -def _format_tags(tag_names, tag_values): - if not tag_names or not tag_values: +def _format_tags(tag_values: dict): + if not tag_values: return None return [ - f'{name}:{value}' for name, value in zip(tag_names, tag_values) + f'{name}:{value}' for name, value in tag_values.items() ] @@ -37,13 +37,13 @@ def _datadog_record(fn, name, value, tags=None): class Counter(HqCounter): def _record(self, amount: float): - tags = _format_tags(self.tag_names, self.tag_values) + tags = _format_tags(self.tag_values) _datadog_record(statsd.increment, self.name, amount, tags) class Gauge(HqGauge): def _record(self, value): - tags = _format_tags(self.tag_names, self.tag_values) + tags = _format_tags(self.tag_values) _datadog_record(statsd.gauge, self.name, value, tags) @@ -68,7 +68,7 @@ class Histogram(HqHistogram): More details: https://help.datadoghq.com/hc/en-us/articles/211545826 """ def _record(self, value: float): - tags = _format_tags(self.tag_names, self.tag_values) + tags = _format_tags(self.tag_values) if not tags: tags = [] bucket = bucket_value(value, self._buckets, self._bucket_unit) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 006971c30e9e..d1bf13680b7f 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -21,7 +21,7 @@ def _enforce_prefix(name, prefix): def _validate_tag_names(tag_names): - tag_names = tuple(tag_names) + tag_names = set(tag_names) for l in tag_names: if not METRIC_TAG_NAME_RE.match(l): raise ValueError('Invalid metric tag name: ' + l) @@ -41,6 +41,10 @@ def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(), self.documentation = documentation self.tag_names = _validate_tag_names(tag_names) self.tag_values = kwargs.pop('tag_values', None) + if self.tag_values: + assert isinstance(self.tag_values, dict) + if self.tag_values.keys() != self.tag_names: + raise ValueError('Incorrect tag names') self._kwargs = kwargs self._init_metric() @@ -48,13 +52,10 @@ def _init_metric(self): pass def tag(self, **tag_kwargs): - if sorted(tag_kwargs) != sorted(self.tag_names): - raise ValueError('Incorrect tag names') + tag_kwargs = {t: str(v) for t, v in tag_kwargs.items()} + return self._get_tagged_instance(tag_kwargs) - tag_values = tuple(str(tag_kwargs[t]) for t in self.tag_names) - return self._get_tagged_instance(tag_values) - - def _get_tagged_instance(self, tag_values): + def _get_tagged_instance(self, tag_values: dict): return self.__class__( self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs ) diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py index 30d4604d0fee..604b02ae7aaf 100644 --- a/corehq/util/metrics/prometheus.py +++ b/corehq/util/metrics/prometheus.py @@ -20,8 +20,8 @@ def _init_metric(self): delegate = self._kwargs.get('delegate') self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names) - def _get_tagged_instance(self, tag_values): - delegate = self._delegate.labels(**dict(zip(self.tag_names, tag_values))) + def _get_tagged_instance(self, tag_values: dict): + delegate = self._delegate.labels(**tag_values) return self.__class__( self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py index c6d721921780..f77d7e929e9f 100644 --- a/corehq/util/metrics/tests/test_metrics.py +++ b/corehq/util/metrics/tests/test_metrics.py @@ -122,7 +122,7 @@ def assertHistogramMetric(self, metric, expected): bucket = metric._buckets[-1] prefix = 'over' bucket_tag = (metric._bucket_tag, f'{prefix}_{bucket:03d}{metric._bucket_unit}') - expected_samples[tags + (bucket_tag,)] = val + expected_samples[tuple(sorted(tags + (bucket_tag,)))] = val actual = { key[1]: sum(val) for key, val in self.recorded_metrics.items() diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py index 9c7f94b2d104..bbd9119e3113 100644 --- a/corehq/util/metrics/tests/utils.py +++ b/corehq/util/metrics/tests/utils.py @@ -9,14 +9,10 @@ def patch_datadog(): def record(fn, name, value, tags=None): key = tuple([ name, - tuple([tuple(t.split(':')) for t in (tags or [])]), + tuple(sorted([tuple(t.split(':')) for t in (tags or [])])), ]) stats[key].append(value) stats = defaultdict(list) - patch = mock.patch("corehq.util.metrics.datadog._datadog_record", new=record) - patch.start() - try: + with mock.patch("corehq.util.metrics.datadog._datadog_record", new=record): yield stats - finally: - patch.stop() From 2c3edefb732f894c64c891cd960bc42252f5a30f Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 13:55:57 +0200 Subject: [PATCH 12/20] update links --- corehq/util/metrics/datadog.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py index 574cf8de58bf..e463cccfcd53 100644 --- a/corehq/util/metrics/datadog.py +++ b/corehq/util/metrics/datadog.py @@ -65,7 +65,9 @@ class Histogram(HqHistogram): # resulting Datadog metric # commcare.request.duration:1|c|#duration:lt_2ms - More details: https://help.datadoghq.com/hc/en-us/articles/211545826 + For more details see: + * https://github.com/dimagi/commcare-hq/pull/17080 + * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700 """ def _record(self, value: float): tags = _format_tags(self.tag_values) From 8664161faf69621076752cfb29fb1e07441441c2 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 13:56:59 +0200 Subject: [PATCH 13/20] remove unnecessary list Co-Authored-By: Daniel Miller --- corehq/util/metrics/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index d1bf13680b7f..6520d6f5b6cd 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -92,7 +92,7 @@ def _init_metric(self): self._bucket_unit = self._kwargs.get('bucket_unit', '') self._bucket_tag = self._kwargs.get('bucket_tag') if self._bucket_tag in self.tag_names: - self.tag_names = tuple([name for name in self.tag_names if name != self._bucket_tag]) + self.tag_names = tuple(name for name in self.tag_names if name != self._bucket_tag) def observe(self, value: float): """Update histogram with the given value.""" From 0bcedea6a39896985f37058701f8344367523904 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 13:58:34 +0200 Subject: [PATCH 14/20] replace typle() with () --- corehq/util/metrics/metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index d1bf13680b7f..cef24f31cb87 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -33,7 +33,7 @@ def _validate_tag_names(tag_names): class MetricBase: - def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(), **kwargs): + def __init__(self, name: str, documentation: str, tag_names: Iterable = (), **kwargs): self.name = name if not METRIC_NAME_RE.match(name): raise ValueError('Invalid metric name: ' + name) @@ -109,15 +109,15 @@ class HqMetrics(metaclass=abc.ABCMeta): def enabled(self) -> bool: raise NotImplementedError - def counter(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqCounter: + def counter(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqCounter: return self._counter_class(name, documentation, tag_names) - def gauge(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqGauge: + def gauge(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqGauge: return self._gauge_class(name, documentation, tag_names) def histogram(self, name: str, documentation: str, bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '', - tag_names: Iterable = tuple()) -> HqHistogram: + tag_names: Iterable = ()) -> HqHistogram: """Create a histogram metric. Histogram implementations differ between provider. See provider implementations for details. """ From 904e358edeebc5078507700554157cdc933276d4 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 15:34:36 +0200 Subject: [PATCH 15/20] fix tags --- corehq/apps/receiverwrapper/views.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py index 6f27ed881bfe..22e53df3c76d 100644 --- a/corehq/apps/receiverwrapper/views.py +++ b/corehq/apps/receiverwrapper/views.py @@ -193,10 +193,10 @@ def _submission_error(request, message, metric_tags, def _record_metrics(tags, submission_type, response, timer=None, xform=None): - tags += [ - 'submission_type:{}'.format(submission_type), - 'status_code:{}'.format(response.status_code) - ] + tags.update({ + 'submission_type': submission_type, + 'status_code': response.status_code + }) if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: lag = xform.received_on - xform.metadata.timeEnd From d68e104a2df754e02b3b3154382f34489e03c5de Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 15:34:42 +0200 Subject: [PATCH 16/20] pass other args --- corehq/util/metrics/metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 4fdf53446923..3d37368e4a40 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -161,7 +161,7 @@ def __init__(self, delegates, record_fn_name): self._record_fn_name = record_fn_name def tag(self, *args, **kwargs): - return self.__class__([d.tag(*args, **kwargs) for d in self._delegates]) + return self.__class__([d.tag(*args, **kwargs) for d in self._delegates], self._record_fn_name) def __getattr__(self, item): if item == self._record_fn_name: From 5c000528e4c114005cb25faa7e3fbc288722a8c7 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Thu, 12 Mar 2020 16:53:42 +0200 Subject: [PATCH 17/20] revert change to datadog bucketing boundry --- corehq/util/datadog/utils.py | 2 +- corehq/util/metrics/tests/test_metrics.py | 63 +++++++++++++---------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/corehq/util/datadog/utils.py b/corehq/util/datadog/utils.py index c61a4d099cb1..9d495679260f 100644 --- a/corehq/util/datadog/utils.py +++ b/corehq/util/datadog/utils.py @@ -115,7 +115,7 @@ def bucket_value(value, buckets, unit=''): lt_template = "lt_{:0%s}{}" % number_length over_template = "over_{:0%s}{}" % number_length for bucket in buckets: - if value <= bucket: + if value < bucket: return lt_template.format(bucket, unit) return over_template.format(buckets[-1], unit) diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py index f77d7e929e9f..ea6b878f6e8b 100644 --- a/corehq/util/metrics/tests/test_metrics.py +++ b/corehq/util/metrics/tests/test_metrics.py @@ -44,24 +44,6 @@ def test_gauge(self): (('t1', 'c'), ('t2', 'b')): 5, }) - def test_histogram(self): - histogram = self.provider.histogram( - 'commcare.test.histogram', 'Description', 'duration', - buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] - ) - tagged_1 = histogram.tag(t1='a', t2='b') - tagged_1.observe(0.2) - tagged_1.observe(0.7) - tagged_1.observe(2.5) - - tagged_2 = histogram.tag(t1='c', t2='b') - tagged_2.observe(2) - tagged_2.observe(5) - self.assertHistogramMetric(histogram, { - (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, - (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1} - }) - def assertCounterMetric(self, metric: HqCounter, expected: Dict[Tuple[Tuple[str, str], ...], float]): """ :param metric: metric class @@ -76,15 +58,6 @@ def assertGaugeMetric(self, metric: HqGauge, expected: Dict[Tuple[Tuple[str, str """ raise NotImplementedError - def assertHistogramMetric(self, - metric: HqHistogram, - expected: Dict[Tuple[Tuple[str, str], ...], Dict[float, int]]): - """ - :param metric: metric class - :param expected: dict mapping tag tuples to a dict of bucket values - """ - raise NotImplementedError - class TestDatadogMetrics(_TestMetrics): provider_class = DatadogMetrics @@ -98,6 +71,24 @@ def tearDown(self) -> None: self.patch.__exit__(None, None, None) super().tearDown() + def test_histogram(self): + histogram = self.provider.histogram( + 'commcare.test.histogram', 'Description', 'duration', + buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] + ) + tagged_1 = histogram.tag(t1='a', t2='b') + tagged_1.observe(0.2) + tagged_1.observe(0.7) + tagged_1.observe(2.5) + + tagged_2 = histogram.tag(t1='c', t2='b') + tagged_2.observe(2) + tagged_2.observe(5) + self.assertHistogramMetric(histogram, { + (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, + (('t1', 'c'), ('t2', 'b')): {3: 1, INF: 1} + }) + def assertCounterMetric(self, metric, expected): self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name}) actual = { @@ -133,6 +124,24 @@ def assertHistogramMetric(self, metric, expected): class TestPrometheusMetrics(_TestMetrics): provider_class = PrometheusMetrics + def test_histogram(self): + histogram = self.provider.histogram( + 'commcare.test.histogram', 'Description', 'duration', + buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] + ) + tagged_1 = histogram.tag(t1='a', t2='b') + tagged_1.observe(0.2) + tagged_1.observe(0.7) + tagged_1.observe(2.5) + + tagged_2 = histogram.tag(t1='c', t2='b') + tagged_2.observe(2) + tagged_2.observe(5) + self.assertHistogramMetric(histogram, { + (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, + (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1} + }) + def _samples_to_dics(self, samples, filter_name=None): """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)""" return { From e00b7e176bb3c296f003d49d7e39354e5f3b1390 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 16 Mar 2020 13:40:39 +0200 Subject: [PATCH 18/20] remove unnecessary list --- corehq/util/metrics/tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py index bbd9119e3113..3c012c0ff9d5 100644 --- a/corehq/util/metrics/tests/utils.py +++ b/corehq/util/metrics/tests/utils.py @@ -9,7 +9,7 @@ def patch_datadog(): def record(fn, name, value, tags=None): key = tuple([ name, - tuple(sorted([tuple(t.split(':')) for t in (tags or [])])), + tuple(sorted(tuple(t.split(':')) for t in (tags or []))), ]) stats[key].append(value) From 20e8a6172ebbef0560bdfd607041b25313a2bbcf Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 16 Mar 2020 13:44:35 +0200 Subject: [PATCH 19/20] apply tags at the same time as recording the metric This change means we don't have to instantiate a new class when applying the tags which simplifies the class structure a lot. --- corehq/apps/api/odata/utils.py | 9 ++- corehq/apps/receiverwrapper/views.py | 10 ++-- corehq/util/metrics/datadog.py | 12 ++-- corehq/util/metrics/metrics.py | 68 ++++++++++------------- corehq/util/metrics/prometheus.py | 51 +++++++---------- corehq/util/metrics/tests/test_metrics.py | 42 +++++++------- 6 files changed, 83 insertions(+), 109 deletions(-) diff --git a/corehq/apps/api/odata/utils.py b/corehq/apps/api/odata/utils.py index 6992808fa2e4..7628a03eb8c7 100644 --- a/corehq/apps/api/odata/utils.py +++ b/corehq/apps/api/odata/utils.py @@ -2,8 +2,6 @@ from collections import namedtuple from corehq.apps.export.models import ExportInstance -from corehq.util.datadog.gauges import datadog_counter -from corehq.util.datadog.utils import bucket_value from corehq.util.metrics import metrics FieldMetadata = namedtuple('FieldMetadata', ['name', 'odata_type']) @@ -72,12 +70,13 @@ def record_feed_access_in_datadog(request, config_id, duration, response): column_count = len(rows[0]) except IndexError: column_count = 0 - odata_feed_access_histogram.tag( + odata_feed_access_histogram.observe( + duration, domain=request.domain, feed_id=config_id, feed_type=config.type, username=username, row_count=row_count, column_count=column_count, - size=len(response.content), - ).observe(duration) + size=len(response.content) + ) diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py index 22e53df3c76d..16d28bf5a38b 100644 --- a/corehq/apps/receiverwrapper/views.py +++ b/corehq/apps/receiverwrapper/views.py @@ -106,7 +106,7 @@ def _process_form(request, domain, app_id, user_id, authenticated, except: meta = {} - corrupt_multimedia_counter.tag(domain=domain, authenticated=authenticated).inc() + corrupt_multimedia_counter.inc(domain=domain, authenticated=authenticated) return _submission_error( request, "Received a submission with POST.keys()", metric_tags, domain, app_id, user_id, authenticated, meta, @@ -154,7 +154,7 @@ def _process_form(request, domain, app_id, user_id, authenticated, try: result = submission_post.run() except XFormLockError as err: - xform_locked_error_counter.tag(domain=domain, authenticated=authenticated).inc() + xform_locked_error_counter.inc(domain=domain, authenticated=authenticated) return _submission_error( request, "XFormLockError: %s" % err, metric_tags, domain, app_id, user_id, authenticated, status=423, @@ -201,12 +201,12 @@ def _record_metrics(tags, submission_type, response, timer=None, xform=None): if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: lag = xform.received_on - xform.metadata.timeEnd lag_days = lag.total_seconds() / 86400 - submission_lag_histogram.tag(**tags).observe(lag_days) + submission_lag_histogram.observe(lag_days, **tags) if timer: - submission_duration_histogram.tag(**tags).observe(timer.duration) + submission_duration_histogram.observe(timer.duration, **tags) - submission_counter.tag(**tags).inc() + submission_counter.inc(**tags) @location_safe diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py index e463cccfcd53..0163e5696924 100644 --- a/corehq/util/metrics/datadog.py +++ b/corehq/util/metrics/datadog.py @@ -36,14 +36,14 @@ def _datadog_record(fn, name, value, tags=None): class Counter(HqCounter): - def _record(self, amount: float): - tags = _format_tags(self.tag_values) + def _record(self, amount: float, tag_values: dict): + tags = _format_tags(tag_values) _datadog_record(statsd.increment, self.name, amount, tags) class Gauge(HqGauge): - def _record(self, value): - tags = _format_tags(self.tag_values) + def _record(self, value: float, tag_values: dict): + tags = _format_tags(tag_values) _datadog_record(statsd.gauge, self.name, value, tags) @@ -69,8 +69,8 @@ class Histogram(HqHistogram): * https://github.com/dimagi/commcare-hq/pull/17080 * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700 """ - def _record(self, value: float): - tags = _format_tags(self.tag_values) + def _record(self, value: float, tag_values: dict): + tags = _format_tags(tag_values) if not tags: tags = [] bucket = bucket_value(value, self._buckets, self._bucket_unit) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index 3d37368e4a40..f4a699145ace 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -33,53 +33,43 @@ def _validate_tag_names(tag_names): class MetricBase: - def __init__(self, name: str, documentation: str, tag_names: Iterable = (), **kwargs): + def __init__(self, name: str, documentation: str, tag_names: Iterable = ()): self.name = name if not METRIC_NAME_RE.match(name): raise ValueError('Invalid metric name: ' + name) _enforce_prefix(name, 'commcare') self.documentation = documentation self.tag_names = _validate_tag_names(tag_names) - self.tag_values = kwargs.pop('tag_values', None) - if self.tag_values: - assert isinstance(self.tag_values, dict) - if self.tag_values.keys() != self.tag_names: - raise ValueError('Incorrect tag names') - self._kwargs = kwargs self._init_metric() def _init_metric(self): pass - def tag(self, **tag_kwargs): - tag_kwargs = {t: str(v) for t, v in tag_kwargs.items()} - return self._get_tagged_instance(tag_kwargs) - - def _get_tagged_instance(self, tag_values: dict): - return self.__class__( - self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs - ) - - def _validate_tags(self): - if self.tag_names and not self.tag_values: + def _validate_tags(self, tag_values: dict): + if self.tag_names and not tag_values: raise Exception('Metric has missing tag values.') - def _record(self, value: float): + if tag_values: + assert isinstance(tag_values, dict) + if tag_values.keys() != self.tag_names: + raise ValueError('Incorrect tag names') + + def _record(self, value: float, tags: dict): raise NotImplementedError class HqCounter(MetricBase): - def inc(self, amount: float = 1): + def inc(self, amount: float = 1, **tags): """Increment the counter by the given amount.""" - self._validate_tags() - self._record(amount) + self._validate_tags(tags) + self._record(amount, tags) class HqGauge(MetricBase): - def set(self, value: float): + def set(self, value: float, **tags): """Set gauge to the given value.""" - self._validate_tags() - self._record(value) + self._validate_tags(tags) + self._record(value, tags) DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF) @@ -87,17 +77,20 @@ def set(self, value: float): class HqHistogram(MetricBase): - def _init_metric(self): - self._buckets = self._kwargs.get('buckets') or DEFAULT_BUCKETS - self._bucket_unit = self._kwargs.get('bucket_unit', '') - self._bucket_tag = self._kwargs.get('bucket_tag') - if self._bucket_tag in self.tag_names: - self.tag_names = tuple(name for name in self.tag_names if name != self._bucket_tag) - - def observe(self, value: float): + def __init__(self, name: str, documentation: str, + bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '', + tag_names: Iterable = ()): + self._bucket_tag = bucket_tag + self._buckets = buckets + self._bucket_unit = bucket_unit + if self._bucket_tag in tag_names: + tag_names = tuple(name for name in tag_names if name != bucket_tag) + super().__init__(name, documentation, tag_names) + + def observe(self, value: float, **tags): """Update histogram with the given value.""" - self._validate_tags() - self._record(value) + self._validate_tags(tags) + self._record(value, tags) class HqMetrics(metaclass=abc.ABCMeta): @@ -122,7 +115,7 @@ def histogram(self, name: str, documentation: str, implementations for details. """ return self._histogram_class( - name, documentation, tag_names, bucket_tag=bucket_tag, buckets=buckets, bucket_unit=bucket_unit + name, documentation, bucket_tag, buckets=buckets, bucket_unit=bucket_unit, tag_names=tag_names ) @@ -160,9 +153,6 @@ def __init__(self, delegates, record_fn_name): self._delegates = delegates self._record_fn_name = record_fn_name - def tag(self, *args, **kwargs): - return self.__class__([d.tag(*args, **kwargs) for d in self._delegates], self._record_fn_name) - def __getattr__(self, item): if item == self._record_fn_name: def record(*args, **kwargs): diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py index 604b02ae7aaf..cce9a0c671ed 100644 --- a/corehq/util/metrics/prometheus.py +++ b/corehq/util/metrics/prometheus.py @@ -11,54 +11,43 @@ from prometheus_client import Histogram as PHistogram -class PrometheusMetricBase(MetricBase): - _metric_class = None +class Counter(HqCounter): + """https://prometheus.io/docs/concepts/metric_types/#counter""" def _init_metric(self): - super()._init_metric() self.name = self.name.replace('.', '_') - delegate = self._kwargs.get('delegate') - self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names) - - def _get_tagged_instance(self, tag_values: dict): - delegate = self._delegate.labels(**tag_values) - return self.__class__( - self.name, self.documentation, - tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs - ) - - -class Counter(PrometheusMetricBase, HqCounter): - """https://prometheus.io/docs/concepts/metric_types/#counter""" - _metric_class = PCounter + self._delegate = PCounter(self.name, self.documentation, self.tag_names) - def _record(self, amount: float): - self._delegate.inc(amount) + def _record(self, amount: float, tags): + _get_labeled(self._delegate, tags).inc(amount) -class Gauge(PrometheusMetricBase, HqGauge): +class Gauge(HqGauge): """https://prometheus.io/docs/concepts/metric_types/#gauge""" - _metric_class = PGauge - def _record(self, value: float): - self._delegate.set(value) + def _init_metric(self): + self.name = self.name.replace('.', '_') + self._delegate = PGauge(self.name, self.documentation, self.tag_names) + + def _record(self, value: float, tags): + _get_labeled(self._delegate, tags).set(value) -class Histogram(PrometheusMetricBase, HqHistogram): +class Histogram(HqHistogram): """This metric class implements the native Prometheus Histogram type https://prometheus.io/docs/concepts/metric_types/#histogram """ def _init_metric(self): - """Overriding this so that we can pass in the buckets to the Prometheus class""" - HqHistogram._init_metric(self) # skip _init_metric on PrometheusMetricBase self.name = self.name.replace('.', '_') - self._delegate = self._kwargs.get('delegate') - if not self._delegate: - self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets) + self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets) + + def _record(self, value: float, tags: dict): + _get_labeled(self._delegate, tags).observe(value) + - def _record(self, value: float): - self._delegate.observe(value) +def _get_labeled(metric, labels): + return metric.labels(**labels) if labels else metric class PrometheusMetrics(HqMetrics): diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py index ea6b878f6e8b..bb9c8232a00b 100644 --- a/corehq/util/metrics/tests/test_metrics.py +++ b/corehq/util/metrics/tests/test_metrics.py @@ -26,9 +26,9 @@ def setUpClass(cls): def test_counter(self): counter = self.provider.counter('commcare.test.counter', 'Description', tag_names=['t1', 't2']) - counter.tag(t1='a', t2='b').inc() - counter.tag(t1='c', t2='b').inc(2) - counter.tag(t1='c', t2='b').inc() + counter.inc(t1='a', t2='b') + counter.inc(2, t1='c', t2='b') + counter.inc(t1='c', t2='b') self.assertCounterMetric(counter, { (('t1', 'a'), ('t2', 'b')): 1, (('t1', 'c'), ('t2', 'b')): 3, @@ -36,9 +36,9 @@ def test_counter(self): def test_gauge(self): gauge = self.provider.gauge('commcare.test.gauge', 'Description', tag_names=['t1', 't2']) - gauge.tag(t1='a', t2='b').set(4.2) - gauge.tag(t1='c', t2='b').set(2) - gauge.tag(t1='c', t2='b').set(5) + gauge.set(4.2, t1='a', t2='b') + gauge.set(2, t1='c', t2='b') + gauge.set(5, t1='c', t2='b') self.assertGaugeMetric(gauge, { (('t1', 'a'), ('t2', 'b')): 4.2, (('t1', 'c'), ('t2', 'b')): 5, @@ -76,14 +76,12 @@ def test_histogram(self): 'commcare.test.histogram', 'Description', 'duration', buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] ) - tagged_1 = histogram.tag(t1='a', t2='b') - tagged_1.observe(0.2) - tagged_1.observe(0.7) - tagged_1.observe(2.5) - - tagged_2 = histogram.tag(t1='c', t2='b') - tagged_2.observe(2) - tagged_2.observe(5) + histogram.observe(0.2, t1='a', t2='b') + histogram.observe(0.7, t1='a', t2='b') + histogram.observe(2.5, t1='a', t2='b') + + histogram.observe(2, t1='c', t2='b') + histogram.observe(5, t1='c', t2='b') self.assertHistogramMetric(histogram, { (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, (('t1', 'c'), ('t2', 'b')): {3: 1, INF: 1} @@ -129,14 +127,12 @@ def test_histogram(self): 'commcare.test.histogram', 'Description', 'duration', buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2'] ) - tagged_1 = histogram.tag(t1='a', t2='b') - tagged_1.observe(0.2) - tagged_1.observe(0.7) - tagged_1.observe(2.5) - - tagged_2 = histogram.tag(t1='c', t2='b') - tagged_2.observe(2) - tagged_2.observe(5) + histogram.observe(0.2, t1='a', t2='b') + histogram.observe(0.7, t1='a', t2='b') + histogram.observe(2.5, t1='a', t2='b') + + histogram.observe(2, t1='c', t2='b') + histogram.observe(5, t1='c', t2='b') self.assertHistogramMetric(histogram, { (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1}, (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1} @@ -145,7 +141,7 @@ def test_histogram(self): def _samples_to_dics(self, samples, filter_name=None): """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)""" return { - tuple(sample.labels.items()): sample.value + tuple(sorted(sample.labels.items())): sample.value for sample in samples if not filter_name or sample.name == filter_name } From 0ab0006ac2660650e5ca743eb669b5dc841a11e6 Mon Sep 17 00:00:00 2001 From: Simon Kelly Date: Mon, 16 Mar 2020 15:04:49 +0200 Subject: [PATCH 20/20] dummy metric --- corehq/util/metrics/metrics.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py index f4a699145ace..5abb6811f308 100644 --- a/corehq/util/metrics/metrics.py +++ b/corehq/util/metrics/metrics.py @@ -119,9 +119,20 @@ def histogram(self, name: str, documentation: str, ) +class DummyMetric: + def __init__(self, *args, **kwargs): + pass + + def __getattr__(self, item): + if item in ('inc', 'set', 'observe'): + return lambda *args, **kwargs: None + raise AttributeError + + class DummyMetrics(HqMetrics): - _counter_class = HqCounter - _gauge_class = HqGauge + _counter_class = DummyMetric + _gauge_class = DummyMetric + _histogram_class = DummyMetric def enabled(self) -> bool: return True