Skip to content

Commit

Permalink
Move analytics scripts from builder (#6111)
Browse files Browse the repository at this point in the history
  • Loading branch information
atalman authored Dec 24, 2024
1 parent 9f4ff02 commit 9cef6c2
Show file tree
Hide file tree
Showing 7 changed files with 1,283 additions and 0 deletions.
125 changes: 125 additions & 0 deletions tools/analytics/cubinsizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
# Tool for analyzing sizes of CUDA kernels for various GPU architectures
import os
import struct
import subprocess
import sys
from tempfile import TemporaryDirectory
from typing import Dict


# Try to auto-import elftools
try:
from elftools.elf.elffile import ELFFile
except ModuleNotFoundError:
print(f'elftools module not found, trying to install it from pip')
from pip._internal import main as pip_main
try:
pip_main(["install", "pyelftools", "--user"])
except SystemExit:
print(f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"')
sys.exit(-1)
from elftools.elf.elffile import ELFFile


# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)


def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False):
with open(file_name, 'rb') as f:
elf_file = ELFFile(f)
nv_fatbin = elf_file.get_section_by_name(section_name)
if nv_fatbin is None:
return {}
data = nv_fatbin.data()
idx, offs = 0, 0
elf_sizes = {}
while offs < len(data):
(magic, version, header_size, fatbin_size) = struct.unpack('IHHL', data[offs: offs + 16])
if magic != 0xba55ed50 or version != 1:
raise RuntimeError(f"Unexpected fatbin magic {hex(magic)} or version {version}")
if debug:
print(f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}")
offs += header_size
fatbin_end = offs + fatbin_size
while offs < fatbin_end:
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = struct.unpack('HHILLIH', data[offs: offs + 30])
if version != 0x0101 or kind not in [1, 2]:
raise RuntimeError(f"Unexpected cubin version {hex(version)} or kind {kind}")
sm_ver = f'{"ptx" if kind == 1 else "sm"}_{sm_ver}'
if debug:
print(f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}")
if sm_ver not in elf_sizes:
elf_sizes[sm_ver] = 0
elf_sizes[sm_ver] += elf_size
idx, offs = idx + 1, offs + hdr_size + elf_size
offs = fatbin_end
return elf_sizes


class ArFileCtx:
def __init__(self, ar_name: str) -> None:
self.ar_name = os.path.abspath(ar_name)
self._tmpdir = TemporaryDirectory()

def __enter__(self) -> str:
self._pwd = os.getcwd()
rc = self._tmpdir.__enter__()
subprocess.check_call(['ar', 'x', self.ar_name])
return rc

def __exit__(self, ex, value, tb) -> None:
os.chdir(self._pwd)
return self._tmpdir.__exit__(ex, value, tb)


def dict_add(rc: Dict[str, int], b: Dict[str, int]) -> Dict[str, int]:
for key, val in b.items():
rc[key] = (rc[key] if key in rc else 0) + val
return rc


def main():
if sys.platform != 'linux':
print('This script only works with Linux ELF files')
return
if len(sys.argv) < 2:
print(f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda")
import torch
fname = os.path.join(os.path.dirname(torch.__file__), 'lib', 'libtorch_cuda.so')
else:
fname = sys.argv[1]

if not os.path.exists(fname):
print(f"Can't find {fname}")
sys.exit(-1)

section_names = ['.nv_fatbin', '__nv_relfatbin']
results = {name: {} for name in section_names}
print(f"Analyzing {fname}")
if os.path.splitext(fname)[1] == '.a':
with ArFileCtx(fname):
for fname in os.listdir("."):
if not fname.endswith(".o"): continue
for section_name in section_names:
elf_sizes = compute_cubin_sizes(fname, section_name)
dict_add(results[section_name], elf_sizes)
else:
for section_name in ['.nv_fatbin', '__nv_relfatbin']:
dict_add(results[section_name], compute_cubin_sizes(fname, section_name))

for section_name in section_names:
elf_sizes = results[section_name]
print(f"{section_name} size {sizeof_fmt(sum(elf_sizes.values()))}")
for (sm_ver, total_size) in elf_sizes.items():
print(f" {sm_ver}: {sizeof_fmt(total_size)}")


if __name__ == '__main__':
main()
163 changes: 163 additions & 0 deletions tools/analytics/download_count_wheels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from collections import defaultdict
from datetime import datetime, timedelta, timezone
import gzip
import os
import re
import urllib

from tqdm import tqdm
import boto3

S3 = boto3.resource('s3')
CLIENT = boto3.client('s3')
BUCKET = S3.Bucket('pytorch')

class CacheEntry:
_size = None

def __init__(self, download_uri: str):
self.download_uri = download_uri
self.bytes_sent = 0

@property
def os_type(self) -> str:
os_type = "linux"
if "win" in self.download_uri:
os_type = "windows"
elif "macosx" in self.download_uri:
os_type = "macos"
return os_type

@property
def target_arch(self) -> str:
target_arch = "cpu"
result = re.search(r"cu[0-9]+", self.download_uri)
if result:
target_arch = result[0]
return target_arch

@property
def package_name(self) -> str:
filename_contents = os.path.basename(self.download_uri).split('-')
return filename_contents[0]

@property
def package_version(self) -> str:
if "dev" in self.download_uri:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+",
self.download_uri
)
else:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri
)
if not results:
raise Exception("Wtf there's no version o.O")
return results[0]

@property
def size(self) -> int:
if self._size is None:
for key in BUCKET.objects.filter(
Prefix=self.download_uri.lstrip("/")
):
self._size = key.size
if self._size is None:
raise Exception(
f"No object found for prefix {self.download_uri}"
)
return self._size

@property
def downloads(self):
return self.bytes_sent // self.size

def parse_logs(log_directory: str) -> dict:
bytes_cache = {}
for (dirpath, _, filenames) in os.walk(log_directory):
for filename in tqdm(filenames):
with gzip.open(os.path.join(dirpath, filename), 'r') as gf:
string = gf.read().decode("utf-8")
entries = []
entries += string.splitlines()[2:]
for entry in entries:
columns = entry.split('\t')
bytes_sent = int(columns[3])
download_uri = urllib.parse.unquote(
urllib.parse.unquote(columns[7])
)
status = columns[8]
if not all([
status.startswith("2"),
download_uri.endswith((".whl", ".zip"))
]):
continue
if not bytes_cache.get(download_uri):
bytes_cache[download_uri] = CacheEntry(download_uri)
bytes_cache[download_uri].bytes_sent += bytes_sent
return bytes_cache

def output_results(bytes_cache: dict) -> None:
os_results = defaultdict(int)
arch_results = defaultdict(int)
package_results = defaultdict(lambda: defaultdict(int))
for _, val in tqdm(bytes_cache.items()):
try:
os_results[val.os_type] += val.downloads
arch_results[val.target_arch] += val.downloads
package_results[val.package_name][val.package_version] += (
val.downloads
)
except Exception:
pass
print("=-=-= Results =-=-=")
print("=-=-= OS =-=-=")
total_os_num = sum(os_results.values())
for os_type, num in os_results.items():
print(
f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)"
)

print("=-=-= ARCH =-=-=")
total_arch_num = sum(arch_results.values())
for arch_type, num in arch_results.items():
print(
f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)"
)

print("=-=-= By Package =-=-=")
for package_name, upper_val in package_results.items():
print(f"=-=-= {package_name} =-=-=")
total_package_num = sum(upper_val.values())
for package_version, num in upper_val.items():
print(
f"\t* {package_version}: {num} ({(num/total_package_num) * 100:.2f}%)"
)

def download_logs(log_directory: str, since: float):
dt_now = datetime.now(timezone.utc)
dt_end = datetime(dt_now.year, dt_now.month, dt_now.day, tzinfo=timezone.utc)
dt_start = dt_end - timedelta(days=1, hours=1) # Add 1 hour padding to account for potentially missed logs due to timing
for key in tqdm(BUCKET.objects.filter(Prefix='cflogs')):
remote_fname = key.key
local_fname = os.path.join(log_directory, remote_fname)
# Only download things from yesterday
dt_modified = key.last_modified.replace(tzinfo=timezone.utc)
if dt_start >= dt_modified or dt_end < dt_modified:
continue
# TODO: Do this in parallel
if not os.path.exists(local_fname):
dirname = os.path.dirname(local_fname)
if not os.path.exists(dirname):
os.makedirs(dirname)
CLIENT.download_file("pytorch", remote_fname, local_fname)


if __name__ == "__main__":
print("Downloading logs")
download_logs('cache', 1)
print("Parsing logs")
cache = parse_logs('cache/cflogs/')
print("Calculating results")
output_results(cache)
99 changes: 99 additions & 0 deletions tools/analytics/duplicates_analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
from typing import Dict, List
from subprocess import check_output
import os
import sys


def get_defined_symbols(fname: str, verbose: bool = False) -> Dict[str, int]:
if verbose:
print(f"Processing {fname}...", end='', flush=True)
if sys.platform == 'darwin':
lines = check_output(['nm', '--defined-only', '-n', fname]).decode('ascii').split("\n")[:-1]
rc = {}
for idx, line in enumerate(lines):
addr, stype, name = line.split(' ')
size = 4 if idx + 1 == len(lines) else (int(lines[idx + 1].split(' ')[0], 16) - int(addr, 16))
rc[name] = size
else:
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split('\n')
rc = {e[3]: int(e[1], 16) for e in [line.split() for line in lines] if len(e) == 4}
if verbose:
print("done")
return rc


def get_deps(fname: str) -> List[str]:
if sys.platform == 'darwin':
rc = []
lines = check_output(['otool', '-l', fname]).decode('ascii').split("\n")[1:-1]
for idx, line in enumerate(lines):
if line.strip() != 'cmd LC_LOAD_DYLIB':
continue
path = lines[idx + 2].strip()
assert path.startswith('name')
rc.append(os.path.basename(path.split(' ')[1]))
return rc
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split('\n')
return [line.split('[')[1][:-1] for line in lines if '(NEEDED)' in line]


def humansize(size):
if size < 1024:
return f"{size} bytes"
if size < 1024**2:
return f"{int(size/1024)} Kb"
if size < 1024**3:
return f"{size/(1024.0**2):.2f} Mb"
return f"{size/(1024.0**3):.2f} Gb"


def print_sizes(libname, depth: int = 2) -> None:
libs = [libname]
depth = 2
symbols = {os.path.basename(libname): get_defined_symbols(libname, verbose=True)}
for _ in range(depth):
for lib in libs:
dirname = os.path.dirname(lib)
for dep in get_deps(lib):
path = os.path.join(dirname, dep)
if not os.path.exists(path):
continue
if path not in libs:
libs.append(path)
symbols[dep] = get_defined_symbols(path, verbose=True)

for lib in libs:
lib_symbols = symbols[os.path.basename(lib)]
lib_keys = set(lib_symbols.keys())
rc = f"{lib} symbols size {humansize(sum(lib_symbols.values()))}"
for dep in get_deps(lib):
if dep not in symbols:
continue
dep_overlap = lib_keys.intersection(set(symbols[dep].keys()))
overlap_size = sum(lib_symbols[k] for k in dep_overlap)
if overlap_size > 0:
rc += f" {dep} overlap is {humansize(overlap_size)}"
print(rc)


def print_symbols_overlap(libname1: str, libname2: str) -> None:
sym1 = get_defined_symbols(libname1, verbose=True)
sym2 = get_defined_symbols(libname2, verbose=True)
sym1_size = sum(sym1.values())
sym2_size = sum(sym2.values())
sym_overlap = set(sym1.keys()).intersection(set(sym2.keys()))
overlap_size = sum(sym1[s] for s in sym_overlap)
if overlap_size == 0:
print(f"{libname1} symbols size {humansize(sym1_size)} does not overlap with {libname2}")
return
print(f"{libname1} symbols size {humansize(sym1_size)} overlap {humansize(overlap_size)} ({100.0 * overlap_size/sym1_size :.2f}%)")
for sym in sym_overlap:
print(sym)


if __name__ == '__main__':
if len(sys.argv) == 3:
print_symbols_overlap(sys.argv[1], sys.argv[2])
else:
print_sizes(sys.argv[1] if len(sys.argv) > 1 else "lib/libtorch_cuda.so")
Loading

0 comments on commit 9cef6c2

Please sign in to comment.