forked from pytorch/benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression_detector.py
193 lines (163 loc) · 8.76 KB
/
regression_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
"""
The regression detector of TorchBench Userbenchmark.
"""
import json
import argparse
import importlib
from dataclasses import dataclass, asdict
import os
import yaml
from pathlib import Path
import time
from datetime import datetime
from typing import Any, Dict, Optional
from userbenchmark.utils import PLATFORMS, USERBENCHMARK_OUTPUT_PREFIX, REPO_PATH, \
TorchBenchABTestResult, get_date_from_metrics, \
get_ub_name, get_latest_n_jsons_from_s3
from utils.s3_utils import S3Client, USERBENCHMARK_S3_BUCKET, USERBENCHMARK_S3_OBJECT
GITHUB_ISSUE_TEMPLATE = """
TorchBench CI has detected a performance signal.
Base PyTorch commit: {start}
Affected PyTorch commit: {end}
Affected Tests:
{test_details}
Tests that were no longer run on affected commit:
{control_only_tests}
Tests that were newly added on affected commit:
{treatment_only_tests}
cc {owner}
"""
def call_userbenchmark_detector(detector, start_file: str, end_file: str) -> Optional[TorchBenchABTestResult]:
return detector(start_file, end_file)
def get_default_output_path(bm_name: str) -> str:
output_path = os.path.join(REPO_PATH, USERBENCHMARK_OUTPUT_PREFIX, bm_name)
fname = "regression-{}.yaml".format(datetime.fromtimestamp(time.time()).strftime("%Y%m%d%H%M%S"))
return os.path.join(output_path, fname)
def generate_regression_dict(control, treatment) -> Dict[Any, Any]:
assert control["name"] == treatment["name"], f'Expected the same userbenchmark name from metrics files, \
but getting {control["name"]} and {treatment["name"]}.'
bm_name = control["name"]
detector = importlib.import_module(f"userbenchmark.{bm_name}.regression_detector").run
# Process control and treatment to include only shared keys
filtered_control_metrics = {}
control_only_metrics = {}
filtered_treatment_metrics = {}
treatment_only_metrics = {}
for control_name, control_metric in control["metrics"].items():
if control_name in treatment["metrics"]:
filtered_control_metrics[control_name] = control_metric
else:
control_only_metrics[control_name] = control_metric
for treatment_name, treatment_metric in treatment["metrics"].items():
if treatment_name in control["metrics"]:
filtered_treatment_metrics[treatment_name] = treatment_metric
else:
treatment_only_metrics[treatment_name] = treatment_metric
control["metrics"] = filtered_control_metrics
treatment["metrics"] = filtered_treatment_metrics
assert filtered_control_metrics.keys() == filtered_treatment_metrics.keys()
# Local file comparison, return the regression detection object
result = call_userbenchmark_detector(detector, control, treatment)
if result or control_only_metrics or treatment_only_metrics:
# organize metrics into a JSON dictionary
result_dict = asdict(result)
result_dict["control_only_metrics"] = control_only_metrics
result_dict["treatment_only_metrics"] = treatment_only_metrics
return result_dict if result_dict else {}
def process_regressions_into_yaml(regressions_dict, output_path: str, control_file: str, treatment_file: str) -> None:
# Write result to $REPO_DIR/.userbenchmark/<userbenchmark-name>/regression-<time>.json
if regressions_dict == {}:
print(f"No performance signal detected between file {control_file} and {treatment_file}.")
return
# create the output directory if doesn't exist
output_dir = Path(os.path.dirname(output_path))
output_dir.mkdir(parents=True, exist_ok=True)
output_yaml_str = yaml.safe_dump(regressions_dict, sort_keys=False)
print(output_yaml_str)
with open(output_path, "w") as ofptr:
ofptr.write(output_yaml_str)
print(f"Wrote above yaml to {output_path}.")
def process_regressions_into_gh_issue(regressions_dict, owner: str, output_path: str) -> None:
troubled_tests = ""
for test, stats in regressions_dict["details"].items():
delta = stats["delta"]
if delta != 0:
sign = "+" if delta > 0 else ""
troubled_tests += f"- {test}: {sign}{delta:.5f}%\n"
control_only_tests = ""
for test, stat in regressions_dict["control_only_metrics"].items():
control_only_tests += f"- {test}: {stat}\n"
treatment_only_tests = ""
for test, stat in regressions_dict["treatment_only_metrics"].items():
treatment_only_tests += f"- {test}: {stat}\n"
control_commit = regressions_dict["control_env"]["pytorch_git_version"]
treatment_commit = regressions_dict["treatment_env"]["pytorch_git_version"]
if troubled_tests == "" and control_only_tests == "" and treatment_only_tests == "":
print(f"No regressions found between {control_commit} and {treatment_commit}.")
return
fname = os.environ["GITHUB_ENV"]
content = f"TORCHBENCH_REGRESSION_DETECTED='{treatment_commit}'\n"
with open(fname, 'a') as fo:
fo.write(content)
issue_config: Dict[str, str] = {
"start": control_commit,
"end": treatment_commit,
"test_details": troubled_tests,
"control_only_tests": control_only_tests,
"treatment_only_tests": treatment_only_tests,
"owner": owner
}
issue_body = GITHUB_ISSUE_TEMPLATE.format(**issue_config)
print(issue_body)
with open(output_path, "w") as f:
f.write(issue_body)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# Local metrics file comparison
parser.add_argument("--control", default=None, help="The control group metrics file for comparison. "
"If unprovided, will attempt to download and compare the previous JSON from S3 "
"within the past week. The platform flag must be specified in this case.")
parser.add_argument("--treatment", default=None, help="The treatment metrics file for comparison.")
# S3 metrics file comparison
parser.add_argument("--name", help="Name of the userbenchmark to detect regression.")
parser.add_argument("--platform", choices=PLATFORMS, default=None, help="The name of platform of the regression.")
parser.add_argument("--start-date", default=None, help="The start date to detect regression.")
parser.add_argument("--end-date", default=None, help="The latest date to detect regression.")
# output file path
parser.add_argument("--output", default=None, help="Output path to print the regression detection file.")
# GitHub issue details
parser.add_argument("--owner", nargs="*", default=["@xuzhao9"], help="Owner(s) to cc on regression issues, e.g., @janeyx99.")
parser.add_argument("--gh-issue-path", default="gh-issue.md", help="Output path to print the issue body")
args = parser.parse_args()
if args.control and args.treatment:
with open(args.control, "r") as cfptr:
control = json.load(cfptr)
with open(args.treatment, "r") as tfptr:
treatment = json.load(tfptr)
output_path = args.output if args.output else get_default_output_path(control["name"])
regressions_dict = generate_regression_dict(control, treatment)
process_regressions_into_yaml(regressions_dict, output_path, args.control, args.treatment)
elif not args.control and args.treatment:
if not args.platform:
raise ValueError("A platform must be specified with the --platform flag to retrieve the "
"previous metrics JSONs as control from S3.")
# Download control from S3
json_path = Path(args.treatment)
assert json_path.exists(), f"Specified result json path {args.treatment} does not exist."
date: str = get_date_from_metrics(json_path.stem)
userbenchmark_name: str = get_ub_name(args.treatment)
latest_metrics_jsons = get_latest_n_jsons_from_s3(1, userbenchmark_name, args.platform, date)
if len(latest_metrics_jsons) == 0:
raise RuntimeWarning("No previous JSONS found to compare against. No regression info has been generated.")
s3 = S3Client(USERBENCHMARK_S3_BUCKET, USERBENCHMARK_S3_OBJECT)
control = s3.get_file_as_json(latest_metrics_jsons[0])
with open(json_path, "r") as cfptr:
treatment = json.load(cfptr)
regressions_dict = generate_regression_dict(control, treatment)
output_path = args.output if args.output else get_default_output_path(control["name"])
process_regressions_into_yaml(regressions_dict, output_path, args.control, args.treatment)
owner = " ".join(args.owner) if args.owner else "@xuzhao9"
process_regressions_into_gh_issue(regressions_dict, owner, args.gh_issue_path)
else:
# S3 path
print("Comparison for metrics from Amazon S3 given a userbenchmark name is WIP.")