-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move analytics scripts from builder (#6111)
Part of pytorch/builder#2054
- Loading branch information
Showing
7 changed files
with
1,283 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#!/usr/bin/env python3 | ||
# Tool for analyzing sizes of CUDA kernels for various GPU architectures | ||
import os | ||
import struct | ||
import subprocess | ||
import sys | ||
from tempfile import TemporaryDirectory | ||
from typing import Dict | ||
|
||
|
||
# Try to auto-import elftools | ||
try: | ||
from elftools.elf.elffile import ELFFile | ||
except ModuleNotFoundError: | ||
print(f'elftools module not found, trying to install it from pip') | ||
from pip._internal import main as pip_main | ||
try: | ||
pip_main(["install", "pyelftools", "--user"]) | ||
except SystemExit: | ||
print(f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"') | ||
sys.exit(-1) | ||
from elftools.elf.elffile import ELFFile | ||
|
||
|
||
# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size | ||
def sizeof_fmt(num, suffix='B'): | ||
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: | ||
if abs(num) < 1024.0: | ||
return "%3.1f%s%s" % (num, unit, suffix) | ||
num /= 1024.0 | ||
return "%.1f%s%s" % (num, 'Yi', suffix) | ||
|
||
|
||
def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False): | ||
with open(file_name, 'rb') as f: | ||
elf_file = ELFFile(f) | ||
nv_fatbin = elf_file.get_section_by_name(section_name) | ||
if nv_fatbin is None: | ||
return {} | ||
data = nv_fatbin.data() | ||
idx, offs = 0, 0 | ||
elf_sizes = {} | ||
while offs < len(data): | ||
(magic, version, header_size, fatbin_size) = struct.unpack('IHHL', data[offs: offs + 16]) | ||
if magic != 0xba55ed50 or version != 1: | ||
raise RuntimeError(f"Unexpected fatbin magic {hex(magic)} or version {version}") | ||
if debug: | ||
print(f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}") | ||
offs += header_size | ||
fatbin_end = offs + fatbin_size | ||
while offs < fatbin_end: | ||
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = struct.unpack('HHILLIH', data[offs: offs + 30]) | ||
if version != 0x0101 or kind not in [1, 2]: | ||
raise RuntimeError(f"Unexpected cubin version {hex(version)} or kind {kind}") | ||
sm_ver = f'{"ptx" if kind == 1 else "sm"}_{sm_ver}' | ||
if debug: | ||
print(f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}") | ||
if sm_ver not in elf_sizes: | ||
elf_sizes[sm_ver] = 0 | ||
elf_sizes[sm_ver] += elf_size | ||
idx, offs = idx + 1, offs + hdr_size + elf_size | ||
offs = fatbin_end | ||
return elf_sizes | ||
|
||
|
||
class ArFileCtx: | ||
def __init__(self, ar_name: str) -> None: | ||
self.ar_name = os.path.abspath(ar_name) | ||
self._tmpdir = TemporaryDirectory() | ||
|
||
def __enter__(self) -> str: | ||
self._pwd = os.getcwd() | ||
rc = self._tmpdir.__enter__() | ||
subprocess.check_call(['ar', 'x', self.ar_name]) | ||
return rc | ||
|
||
def __exit__(self, ex, value, tb) -> None: | ||
os.chdir(self._pwd) | ||
return self._tmpdir.__exit__(ex, value, tb) | ||
|
||
|
||
def dict_add(rc: Dict[str, int], b: Dict[str, int]) -> Dict[str, int]: | ||
for key, val in b.items(): | ||
rc[key] = (rc[key] if key in rc else 0) + val | ||
return rc | ||
|
||
|
||
def main(): | ||
if sys.platform != 'linux': | ||
print('This script only works with Linux ELF files') | ||
return | ||
if len(sys.argv) < 2: | ||
print(f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda") | ||
import torch | ||
fname = os.path.join(os.path.dirname(torch.__file__), 'lib', 'libtorch_cuda.so') | ||
else: | ||
fname = sys.argv[1] | ||
|
||
if not os.path.exists(fname): | ||
print(f"Can't find {fname}") | ||
sys.exit(-1) | ||
|
||
section_names = ['.nv_fatbin', '__nv_relfatbin'] | ||
results = {name: {} for name in section_names} | ||
print(f"Analyzing {fname}") | ||
if os.path.splitext(fname)[1] == '.a': | ||
with ArFileCtx(fname): | ||
for fname in os.listdir("."): | ||
if not fname.endswith(".o"): continue | ||
for section_name in section_names: | ||
elf_sizes = compute_cubin_sizes(fname, section_name) | ||
dict_add(results[section_name], elf_sizes) | ||
else: | ||
for section_name in ['.nv_fatbin', '__nv_relfatbin']: | ||
dict_add(results[section_name], compute_cubin_sizes(fname, section_name)) | ||
|
||
for section_name in section_names: | ||
elf_sizes = results[section_name] | ||
print(f"{section_name} size {sizeof_fmt(sum(elf_sizes.values()))}") | ||
for (sm_ver, total_size) in elf_sizes.items(): | ||
print(f" {sm_ver}: {sizeof_fmt(total_size)}") | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
from collections import defaultdict | ||
from datetime import datetime, timedelta, timezone | ||
import gzip | ||
import os | ||
import re | ||
import urllib | ||
|
||
from tqdm import tqdm | ||
import boto3 | ||
|
||
S3 = boto3.resource('s3') | ||
CLIENT = boto3.client('s3') | ||
BUCKET = S3.Bucket('pytorch') | ||
|
||
class CacheEntry: | ||
_size = None | ||
|
||
def __init__(self, download_uri: str): | ||
self.download_uri = download_uri | ||
self.bytes_sent = 0 | ||
|
||
@property | ||
def os_type(self) -> str: | ||
os_type = "linux" | ||
if "win" in self.download_uri: | ||
os_type = "windows" | ||
elif "macosx" in self.download_uri: | ||
os_type = "macos" | ||
return os_type | ||
|
||
@property | ||
def target_arch(self) -> str: | ||
target_arch = "cpu" | ||
result = re.search(r"cu[0-9]+", self.download_uri) | ||
if result: | ||
target_arch = result[0] | ||
return target_arch | ||
|
||
@property | ||
def package_name(self) -> str: | ||
filename_contents = os.path.basename(self.download_uri).split('-') | ||
return filename_contents[0] | ||
|
||
@property | ||
def package_version(self) -> str: | ||
if "dev" in self.download_uri: | ||
results = re.search( | ||
r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+", | ||
self.download_uri | ||
) | ||
else: | ||
results = re.search( | ||
r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri | ||
) | ||
if not results: | ||
raise Exception("Wtf there's no version o.O") | ||
return results[0] | ||
|
||
@property | ||
def size(self) -> int: | ||
if self._size is None: | ||
for key in BUCKET.objects.filter( | ||
Prefix=self.download_uri.lstrip("/") | ||
): | ||
self._size = key.size | ||
if self._size is None: | ||
raise Exception( | ||
f"No object found for prefix {self.download_uri}" | ||
) | ||
return self._size | ||
|
||
@property | ||
def downloads(self): | ||
return self.bytes_sent // self.size | ||
|
||
def parse_logs(log_directory: str) -> dict: | ||
bytes_cache = {} | ||
for (dirpath, _, filenames) in os.walk(log_directory): | ||
for filename in tqdm(filenames): | ||
with gzip.open(os.path.join(dirpath, filename), 'r') as gf: | ||
string = gf.read().decode("utf-8") | ||
entries = [] | ||
entries += string.splitlines()[2:] | ||
for entry in entries: | ||
columns = entry.split('\t') | ||
bytes_sent = int(columns[3]) | ||
download_uri = urllib.parse.unquote( | ||
urllib.parse.unquote(columns[7]) | ||
) | ||
status = columns[8] | ||
if not all([ | ||
status.startswith("2"), | ||
download_uri.endswith((".whl", ".zip")) | ||
]): | ||
continue | ||
if not bytes_cache.get(download_uri): | ||
bytes_cache[download_uri] = CacheEntry(download_uri) | ||
bytes_cache[download_uri].bytes_sent += bytes_sent | ||
return bytes_cache | ||
|
||
def output_results(bytes_cache: dict) -> None: | ||
os_results = defaultdict(int) | ||
arch_results = defaultdict(int) | ||
package_results = defaultdict(lambda: defaultdict(int)) | ||
for _, val in tqdm(bytes_cache.items()): | ||
try: | ||
os_results[val.os_type] += val.downloads | ||
arch_results[val.target_arch] += val.downloads | ||
package_results[val.package_name][val.package_version] += ( | ||
val.downloads | ||
) | ||
except Exception: | ||
pass | ||
print("=-=-= Results =-=-=") | ||
print("=-=-= OS =-=-=") | ||
total_os_num = sum(os_results.values()) | ||
for os_type, num in os_results.items(): | ||
print( | ||
f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)" | ||
) | ||
|
||
print("=-=-= ARCH =-=-=") | ||
total_arch_num = sum(arch_results.values()) | ||
for arch_type, num in arch_results.items(): | ||
print( | ||
f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)" | ||
) | ||
|
||
print("=-=-= By Package =-=-=") | ||
for package_name, upper_val in package_results.items(): | ||
print(f"=-=-= {package_name} =-=-=") | ||
total_package_num = sum(upper_val.values()) | ||
for package_version, num in upper_val.items(): | ||
print( | ||
f"\t* {package_version}: {num} ({(num/total_package_num) * 100:.2f}%)" | ||
) | ||
|
||
def download_logs(log_directory: str, since: float): | ||
dt_now = datetime.now(timezone.utc) | ||
dt_end = datetime(dt_now.year, dt_now.month, dt_now.day, tzinfo=timezone.utc) | ||
dt_start = dt_end - timedelta(days=1, hours=1) # Add 1 hour padding to account for potentially missed logs due to timing | ||
for key in tqdm(BUCKET.objects.filter(Prefix='cflogs')): | ||
remote_fname = key.key | ||
local_fname = os.path.join(log_directory, remote_fname) | ||
# Only download things from yesterday | ||
dt_modified = key.last_modified.replace(tzinfo=timezone.utc) | ||
if dt_start >= dt_modified or dt_end < dt_modified: | ||
continue | ||
# TODO: Do this in parallel | ||
if not os.path.exists(local_fname): | ||
dirname = os.path.dirname(local_fname) | ||
if not os.path.exists(dirname): | ||
os.makedirs(dirname) | ||
CLIENT.download_file("pytorch", remote_fname, local_fname) | ||
|
||
|
||
if __name__ == "__main__": | ||
print("Downloading logs") | ||
download_logs('cache', 1) | ||
print("Parsing logs") | ||
cache = parse_logs('cache/cflogs/') | ||
print("Calculating results") | ||
output_results(cache) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env python3 | ||
from typing import Dict, List | ||
from subprocess import check_output | ||
import os | ||
import sys | ||
|
||
|
||
def get_defined_symbols(fname: str, verbose: bool = False) -> Dict[str, int]: | ||
if verbose: | ||
print(f"Processing {fname}...", end='', flush=True) | ||
if sys.platform == 'darwin': | ||
lines = check_output(['nm', '--defined-only', '-n', fname]).decode('ascii').split("\n")[:-1] | ||
rc = {} | ||
for idx, line in enumerate(lines): | ||
addr, stype, name = line.split(' ') | ||
size = 4 if idx + 1 == len(lines) else (int(lines[idx + 1].split(' ')[0], 16) - int(addr, 16)) | ||
rc[name] = size | ||
else: | ||
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split('\n') | ||
rc = {e[3]: int(e[1], 16) for e in [line.split() for line in lines] if len(e) == 4} | ||
if verbose: | ||
print("done") | ||
return rc | ||
|
||
|
||
def get_deps(fname: str) -> List[str]: | ||
if sys.platform == 'darwin': | ||
rc = [] | ||
lines = check_output(['otool', '-l', fname]).decode('ascii').split("\n")[1:-1] | ||
for idx, line in enumerate(lines): | ||
if line.strip() != 'cmd LC_LOAD_DYLIB': | ||
continue | ||
path = lines[idx + 2].strip() | ||
assert path.startswith('name') | ||
rc.append(os.path.basename(path.split(' ')[1])) | ||
return rc | ||
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split('\n') | ||
return [line.split('[')[1][:-1] for line in lines if '(NEEDED)' in line] | ||
|
||
|
||
def humansize(size): | ||
if size < 1024: | ||
return f"{size} bytes" | ||
if size < 1024**2: | ||
return f"{int(size/1024)} Kb" | ||
if size < 1024**3: | ||
return f"{size/(1024.0**2):.2f} Mb" | ||
return f"{size/(1024.0**3):.2f} Gb" | ||
|
||
|
||
def print_sizes(libname, depth: int = 2) -> None: | ||
libs = [libname] | ||
depth = 2 | ||
symbols = {os.path.basename(libname): get_defined_symbols(libname, verbose=True)} | ||
for _ in range(depth): | ||
for lib in libs: | ||
dirname = os.path.dirname(lib) | ||
for dep in get_deps(lib): | ||
path = os.path.join(dirname, dep) | ||
if not os.path.exists(path): | ||
continue | ||
if path not in libs: | ||
libs.append(path) | ||
symbols[dep] = get_defined_symbols(path, verbose=True) | ||
|
||
for lib in libs: | ||
lib_symbols = symbols[os.path.basename(lib)] | ||
lib_keys = set(lib_symbols.keys()) | ||
rc = f"{lib} symbols size {humansize(sum(lib_symbols.values()))}" | ||
for dep in get_deps(lib): | ||
if dep not in symbols: | ||
continue | ||
dep_overlap = lib_keys.intersection(set(symbols[dep].keys())) | ||
overlap_size = sum(lib_symbols[k] for k in dep_overlap) | ||
if overlap_size > 0: | ||
rc += f" {dep} overlap is {humansize(overlap_size)}" | ||
print(rc) | ||
|
||
|
||
def print_symbols_overlap(libname1: str, libname2: str) -> None: | ||
sym1 = get_defined_symbols(libname1, verbose=True) | ||
sym2 = get_defined_symbols(libname2, verbose=True) | ||
sym1_size = sum(sym1.values()) | ||
sym2_size = sum(sym2.values()) | ||
sym_overlap = set(sym1.keys()).intersection(set(sym2.keys())) | ||
overlap_size = sum(sym1[s] for s in sym_overlap) | ||
if overlap_size == 0: | ||
print(f"{libname1} symbols size {humansize(sym1_size)} does not overlap with {libname2}") | ||
return | ||
print(f"{libname1} symbols size {humansize(sym1_size)} overlap {humansize(overlap_size)} ({100.0 * overlap_size/sym1_size :.2f}%)") | ||
for sym in sym_overlap: | ||
print(sym) | ||
|
||
|
||
if __name__ == '__main__': | ||
if len(sys.argv) == 3: | ||
print_symbols_overlap(sys.argv[1], sys.argv[2]) | ||
else: | ||
print_sizes(sys.argv[1] if len(sys.argv) > 1 else "lib/libtorch_cuda.so") |
Oops, something went wrong.