-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bug-1898345: fix metrics key prefix #1020
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,7 +143,7 @@ def extract_payload(self, req): | |
|
||
# Decompress payload if it's compressed | ||
if req.env.get("HTTP_CONTENT_ENCODING") == "gzip": | ||
METRICS.incr("breakpad_resource.gzipped_crash") | ||
METRICS.incr("collector.breakpad_resource.gzipped_crash") | ||
crash_report.payload_compressed = "1" | ||
|
||
# If the content is gzipped, we pull it out and decompress it. We | ||
|
@@ -154,13 +154,13 @@ def extract_payload(self, req): | |
try: | ||
data = zlib.decompress(req.stream.read(content_length), gzip_header) | ||
METRICS.histogram( | ||
"breakpad_resource.gzipped_crash_decompress", | ||
"collector.breakpad_resource.gzipped_crash_decompress", | ||
value=(time.perf_counter() - start_time) * 1000.0, | ||
tags=["result:success"], | ||
) | ||
except zlib.error as exc: | ||
METRICS.histogram( | ||
"breakpad_resource.gzipped_crash_decompress", | ||
"collector.breakpad_resource.gzipped_crash_decompress", | ||
value=(time.perf_counter() - start_time) * 1000.0, | ||
tags=["result:fail"], | ||
) | ||
|
@@ -178,15 +178,15 @@ def extract_payload(self, req): | |
|
||
data = io.BytesIO(data) | ||
METRICS.histogram( | ||
"breakpad_resource.crash_size", | ||
"collector.breakpad_resource.crash_size", | ||
value=content_length, | ||
tags=["payload:compressed"], | ||
) | ||
|
||
else: | ||
data = req.bounded_stream | ||
METRICS.histogram( | ||
"breakpad_resource.crash_size", | ||
"collector.breakpad_resource.crash_size", | ||
value=content_length, | ||
tags=["payload:uncompressed"], | ||
) | ||
|
@@ -261,7 +261,7 @@ def extract_payload(self, req): | |
|
||
if has_json and has_kvpairs: | ||
# If the crash payload has both kvpairs and a JSON blob, then it's malformed | ||
# so we add a note and log it. | ||
# so we add a note and log it, but we don't reject it | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I clarified this here because it used to be that we rejected these crash reports, then we changed the behavior and current me wants future me to be less confused if I have to look at this again. |
||
msg = "includes annotations in both json-encoded extra and formdata parts" | ||
LOGGER.info(msg) | ||
crash_report.notes.append(msg) | ||
|
@@ -302,7 +302,7 @@ def cleanup_crash_report(self, raw_crash): | |
del raw_crash[bad_field] | ||
notes.append("Removed %s from raw crash." % bad_field) | ||
|
||
@METRICS.timer_decorator("breakpad_resource.on_post.time") | ||
@METRICS.timer_decorator("collector.breakpad_resource.on_post.time") | ||
def on_post(self, req, resp): | ||
"""Handle incoming HTTP POSTs. | ||
|
||
|
@@ -324,12 +324,14 @@ def on_post(self, req, resp): | |
except MalformedCrashReport as exc: | ||
# If this is malformed, then reject it with malformed error code. | ||
msg = str(exc) | ||
METRICS.incr("breakpad_resource.malformed", tags=["reason:%s" % msg]) | ||
METRICS.incr( | ||
"collector.breakpad_resource.malformed", tags=["reason:%s" % msg] | ||
) | ||
resp.status = falcon.HTTP_400 | ||
resp.text = "Discarded=malformed_%s" % msg | ||
return | ||
|
||
METRICS.incr("breakpad_resource.incoming_crash") | ||
METRICS.incr("collector.breakpad_resource.incoming_crash") | ||
|
||
raw_crash = crash_report.annotations | ||
|
||
|
@@ -380,9 +382,11 @@ def on_post(self, req, resp): | |
rule_name, | ||
RESULT_TO_TEXT[throttle_result], | ||
) | ||
METRICS.incr("breakpad_resource.throttle_rule", tags=["rule:%s" % rule_name]) | ||
METRICS.incr( | ||
"breakpad_resource.throttle", | ||
"collector.breakpad_resource.throttle_rule", tags=["rule:%s" % rule_name] | ||
) | ||
METRICS.incr( | ||
"collector.breakpad_resource.throttle", | ||
tags=["result:%s" % RESULT_TO_TEXT[throttle_result].lower()], | ||
) | ||
raw_crash["metadata"]["throttle_rule"] = rule_name | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,34 @@ | |
from testlib.mini_poster import compress, multipart_encode | ||
|
||
|
||
class AnyTagValue: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the other PR, I did a ridiculous thing to solve the same problem this solves because I thought I couldn't solve it this way on account of how Markus The thing this does is two fold:
|
||
"""Matches a markus metrics tag with any value""" | ||
|
||
def __init__(self, key): | ||
self.key = key | ||
|
||
def __repr__(self): | ||
return f"<AnyTagValue {self.key}>" | ||
|
||
def get_other_key(self, other): | ||
# This is comparing against a tag string | ||
if ":" in other: | ||
other_key, _ = other.split(":") | ||
else: | ||
other_key = other | ||
return other_key | ||
|
||
def __eq__(self, other): | ||
if isinstance(other, AnyTagValue): | ||
return self.key == other.key | ||
return self.key == self.get_other_key(other) | ||
|
||
def __lt__(self, other): | ||
if isinstance(other, AnyTagValue): | ||
return self.key < other.key | ||
return self.key < self.get_other_key(other) | ||
|
||
|
||
class FakeCrashMover: | ||
"""Fake crash mover that raises an error when used""" | ||
|
||
|
@@ -346,13 +374,14 @@ def test_extract_payload_invalid_json_not_dict(self, request_generator): | |
with pytest.raises(MalformedCrashReport, match="invalid_json_value"): | ||
bsp.extract_payload(req) | ||
|
||
def text_extract_payload_kvpairs_and_json(self, request_generator, metricsmock): | ||
# If there's a JSON blob and also kv pairs, then that's a malformed | ||
# crash | ||
def test_extract_payload_kvpairs_and_json(self, request_generator, metricsmock): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test never ran because it started with Once I fixed the test method name, it ran and failed because it didn't account for the change in how we handle this situation. I updated this test. |
||
# If there's a JSON blob and also kv pairs, use the annotations from "extra" and | ||
# log a note | ||
data, headers = multipart_encode( | ||
{ | ||
"extra": '{"ProductName":"Firefox","Version":"1.0"}', | ||
"BadKey": "BadValue", | ||
# This annotation is dropped because it's not in "extra" | ||
"IgnoredAnnotation": "someval", | ||
"upload_file_minidump": ("fakecrash.dump", io.BytesIO(b"abcd1234")), | ||
} | ||
) | ||
|
@@ -363,10 +392,20 @@ def text_extract_payload_kvpairs_and_json(self, request_generator, metricsmock): | |
bsp = BreakpadSubmitterResource( | ||
config=EMPTY_CONFIG, crashmover=FakeCrashMover() | ||
) | ||
with metricsmock as metrics: | ||
result = bsp.extract_payload(req) | ||
assert result == ({}, {}) | ||
assert metrics.has_record(stat="malformed", tags=["reason:has_json_and_kv"]) | ||
crash_report = CrashReport( | ||
annotations={ | ||
"ProductName": "Firefox", | ||
"Version": "1.0", | ||
}, | ||
dumps={"upload_file_minidump": b"abcd1234"}, | ||
notes=[ | ||
"includes annotations in both json-encoded extra and formdata parts" | ||
], | ||
payload="json", | ||
payload_compressed="0", | ||
payload_size=542, | ||
) | ||
assert bsp.extract_payload(req) == crash_report | ||
|
||
|
||
@pytest.mark.parametrize( | ||
|
@@ -405,7 +444,7 @@ def test_get_throttle_result(client): | |
|
||
|
||
class TestBreakpadSubmitterResourceIntegration: | ||
def test_submit_crash_report(self, client): | ||
def test_submit_crash_report(self, client, metricsmock): | ||
data, headers = multipart_encode( | ||
{ | ||
"ProductName": "Firefox", | ||
|
@@ -415,7 +454,9 @@ def test_submit_crash_report(self, client): | |
} | ||
) | ||
|
||
result = client.simulate_post("/submit", headers=headers, body=data) | ||
with metricsmock as mm: | ||
result = client.simulate_post("/submit", headers=headers, body=data) | ||
|
||
assert result.status_code == 200 | ||
assert result.headers["Content-Type"].startswith("text/plain") | ||
assert result.content.startswith(b"CrashID=bp") | ||
|
@@ -446,6 +487,22 @@ def test_submit_crash_report(self, client): | |
"version": 2, | ||
} | ||
|
||
mm.assert_histogram("socorro.collector.breakpad_resource.crash_size", value=632) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. None of the tests assert anything about metrics which is bad. I added this section here to this test to assert what metrics should get emitted. |
||
mm.assert_incr("socorro.collector.breakpad_resource.incoming_crash") | ||
mm.assert_incr( | ||
"socorro.collector.breakpad_resource.throttle_rule", | ||
tags=["rule:is_nightly", AnyTagValue("host")], | ||
) | ||
mm.assert_incr( | ||
"socorro.collector.breakpad_resource.throttle", | ||
tags=["result:accept", AnyTagValue("host")], | ||
) | ||
mm.assert_timing("socorro.collector.crashmover.crash_save.time") | ||
mm.assert_timing("socorro.collector.crashmover.crash_publish.time") | ||
mm.assert_incr("socorro.collector.crashmover.save_crash.count") | ||
mm.assert_timing("socorro.collector.crashmover.crash_handling.time") | ||
mm.assert_timing("socorro.collector.breakpad_resource.on_post.time") | ||
|
||
def test_existing_uuid(self, client): | ||
"""Verify if the crash report has a uuid already, it's reused.""" | ||
crash_id = "de1bb258-cbbf-4589-a673-34f800160918" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This changes the key to
socorro.collector.sentry_scrub_error
which is more like the other things we're doing. I'll update the dashboard once this lands.