Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move analytics scripts from builder #6111

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions tools/analytics/cubinsizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python3
# Tool for analyzing sizes of CUDA kernels for various GPU architectures
import os
import struct
import subprocess
import sys
from tempfile import TemporaryDirectory
from typing import Dict


# Try to auto-import elftools
try:
from elftools.elf.elffile import ELFFile
except ModuleNotFoundError:
print(f'elftools module not found, trying to install it from pip')
from pip._internal import main as pip_main
try:
pip_main(["install", "pyelftools", "--user"])
except SystemExit:
print(f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"')
sys.exit(-1)
from elftools.elf.elffile import ELFFile


# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)


def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False):
with open(file_name, 'rb') as f:
elf_file = ELFFile(f)
nv_fatbin = elf_file.get_section_by_name(section_name)
if nv_fatbin is None:
return {}
data = nv_fatbin.data()
idx, offs = 0, 0
elf_sizes = {}
while offs < len(data):
(magic, version, header_size, fatbin_size) = struct.unpack('IHHL', data[offs: offs + 16])
if magic != 0xba55ed50 or version != 1:
raise RuntimeError(f"Unexpected fatbin magic {hex(magic)} or version {version}")
if debug:
print(f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}")
offs += header_size
fatbin_end = offs + fatbin_size
while offs < fatbin_end:
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = struct.unpack('HHILLIH', data[offs: offs + 30])
if version != 0x0101 or kind not in [1, 2]:
raise RuntimeError(f"Unexpected cubin version {hex(version)} or kind {kind}")
sm_ver = f'{"ptx" if kind == 1 else "sm"}_{sm_ver}'
if debug:
print(f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}")
if sm_ver not in elf_sizes:
elf_sizes[sm_ver] = 0
elf_sizes[sm_ver] += elf_size
idx, offs = idx + 1, offs + hdr_size + elf_size
offs = fatbin_end
return elf_sizes


class ArFileCtx:
def __init__(self, ar_name: str) -> None:
self.ar_name = os.path.abspath(ar_name)
self._tmpdir = TemporaryDirectory()

def __enter__(self) -> str:
self._pwd = os.getcwd()
rc = self._tmpdir.__enter__()
subprocess.check_call(['ar', 'x', self.ar_name])
return rc

def __exit__(self, ex, value, tb) -> None:
os.chdir(self._pwd)
return self._tmpdir.__exit__(ex, value, tb)


def dict_add(rc: Dict[str, int], b: Dict[str, int]) -> Dict[str, int]:
for key, val in b.items():
rc[key] = (rc[key] if key in rc else 0) + val
return rc


def main():
if sys.platform != 'linux':
print('This script only works with Linux ELF files')
return
if len(sys.argv) < 2:
print(f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda")
import torch
fname = os.path.join(os.path.dirname(torch.__file__), 'lib', 'libtorch_cuda.so')
else:
fname = sys.argv[1]

if not os.path.exists(fname):
print(f"Can't find {fname}")
sys.exit(-1)

section_names = ['.nv_fatbin', '__nv_relfatbin']
results = {name: {} for name in section_names}
print(f"Analyzing {fname}")
if os.path.splitext(fname)[1] == '.a':
with ArFileCtx(fname):
for fname in os.listdir("."):
if not fname.endswith(".o"): continue
for section_name in section_names:
elf_sizes = compute_cubin_sizes(fname, section_name)
dict_add(results[section_name], elf_sizes)
else:
for section_name in ['.nv_fatbin', '__nv_relfatbin']:
dict_add(results[section_name], compute_cubin_sizes(fname, section_name))

for section_name in section_names:
elf_sizes = results[section_name]
print(f"{section_name} size {sizeof_fmt(sum(elf_sizes.values()))}")
for (sm_ver, total_size) in elf_sizes.items():
print(f" {sm_ver}: {sizeof_fmt(total_size)}")


if __name__ == '__main__':
main()
163 changes: 163 additions & 0 deletions tools/analytics/download_count_wheels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from collections import defaultdict
from datetime import datetime, timedelta, timezone
import gzip
import os
import re
import urllib

from tqdm import tqdm
import boto3

S3 = boto3.resource('s3')
CLIENT = boto3.client('s3')
BUCKET = S3.Bucket('pytorch')

class CacheEntry:
_size = None

def __init__(self, download_uri: str):
self.download_uri = download_uri
self.bytes_sent = 0

@property
def os_type(self) -> str:
os_type = "linux"
if "win" in self.download_uri:
os_type = "windows"
elif "macosx" in self.download_uri:
os_type = "macos"
return os_type

@property
def target_arch(self) -> str:
target_arch = "cpu"
result = re.search(r"cu[0-9]+", self.download_uri)
if result:
target_arch = result[0]
return target_arch

@property
def package_name(self) -> str:
filename_contents = os.path.basename(self.download_uri).split('-')
return filename_contents[0]

@property
def package_version(self) -> str:
if "dev" in self.download_uri:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+",
self.download_uri
)
else:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri
)
if not results:
raise Exception("Wtf there's no version o.O")
return results[0]

@property
def size(self) -> int:
if self._size is None:
for key in BUCKET.objects.filter(
Prefix=self.download_uri.lstrip("/")
):
self._size = key.size
if self._size is None:
raise Exception(
f"No object found for prefix {self.download_uri}"
)
return self._size

@property
def downloads(self):
return self.bytes_sent // self.size

def parse_logs(log_directory: str) -> dict:
bytes_cache = {}
for (dirpath, _, filenames) in os.walk(log_directory):
for filename in tqdm(filenames):
with gzip.open(os.path.join(dirpath, filename), 'r') as gf:
string = gf.read().decode("utf-8")
entries = []
entries += string.splitlines()[2:]
for entry in entries:
columns = entry.split('\t')
bytes_sent = int(columns[3])
download_uri = urllib.parse.unquote(
urllib.parse.unquote(columns[7])
)
status = columns[8]
if not all([
status.startswith("2"),
download_uri.endswith((".whl", ".zip"))
]):
continue
if not bytes_cache.get(download_uri):
bytes_cache[download_uri] = CacheEntry(download_uri)
bytes_cache[download_uri].bytes_sent += bytes_sent
return bytes_cache

def output_results(bytes_cache: dict) -> None:
os_results = defaultdict(int)
arch_results = defaultdict(int)
package_results = defaultdict(lambda: defaultdict(int))
for _, val in tqdm(bytes_cache.items()):
try:
os_results[val.os_type] += val.downloads
arch_results[val.target_arch] += val.downloads
package_results[val.package_name][val.package_version] += (
val.downloads
)
except Exception:
pass
print("=-=-= Results =-=-=")
print("=-=-= OS =-=-=")
total_os_num = sum(os_results.values())
for os_type, num in os_results.items():
print(
f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)"
)

print("=-=-= ARCH =-=-=")
total_arch_num = sum(arch_results.values())
for arch_type, num in arch_results.items():
print(
f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)"
)

print("=-=-= By Package =-=-=")
for package_name, upper_val in package_results.items():
print(f"=-=-= {package_name} =-=-=")
total_package_num = sum(upper_val.values())
for package_version, num in upper_val.items():
print(
f"\t* {package_version}: {num} ({(num/total_package_num) * 100:.2f}%)"
)

def download_logs(log_directory: str, since: float):
dt_now = datetime.now(timezone.utc)
dt_end = datetime(dt_now.year, dt_now.month, dt_now.day, tzinfo=timezone.utc)
dt_start = dt_end - timedelta(days=1, hours=1) # Add 1 hour padding to account for potentially missed logs due to timing
for key in tqdm(BUCKET.objects.filter(Prefix='cflogs')):
remote_fname = key.key
local_fname = os.path.join(log_directory, remote_fname)
# Only download things from yesterday
dt_modified = key.last_modified.replace(tzinfo=timezone.utc)
if dt_start >= dt_modified or dt_end < dt_modified:
continue
# TODO: Do this in parallel
if not os.path.exists(local_fname):
dirname = os.path.dirname(local_fname)
if not os.path.exists(dirname):
os.makedirs(dirname)
CLIENT.download_file("pytorch", remote_fname, local_fname)


if __name__ == "__main__":
print("Downloading logs")
download_logs('cache', 1)
print("Parsing logs")
cache = parse_logs('cache/cflogs/')
print("Calculating results")
output_results(cache)
99 changes: 99 additions & 0 deletions tools/analytics/duplicates_analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
from typing import Dict, List
from subprocess import check_output
import os
import sys


def get_defined_symbols(fname: str, verbose: bool = False) -> Dict[str, int]:
if verbose:
print(f"Processing {fname}...", end='', flush=True)
if sys.platform == 'darwin':
lines = check_output(['nm', '--defined-only', '-n', fname]).decode('ascii').split("\n")[:-1]
rc = {}
for idx, line in enumerate(lines):
addr, stype, name = line.split(' ')
size = 4 if idx + 1 == len(lines) else (int(lines[idx + 1].split(' ')[0], 16) - int(addr, 16))
rc[name] = size
else:
lines = check_output(['nm', '--print-size', '--defined-only', fname]).decode('ascii').split('\n')
rc = {e[3]: int(e[1], 16) for e in [line.split() for line in lines] if len(e) == 4}
if verbose:
print("done")
return rc


def get_deps(fname: str) -> List[str]:
if sys.platform == 'darwin':
rc = []
lines = check_output(['otool', '-l', fname]).decode('ascii').split("\n")[1:-1]
for idx, line in enumerate(lines):
if line.strip() != 'cmd LC_LOAD_DYLIB':
continue
path = lines[idx + 2].strip()
assert path.startswith('name')
rc.append(os.path.basename(path.split(' ')[1]))
return rc
lines = check_output(['readelf', '--dynamic', fname]).decode('ascii').split('\n')
return [line.split('[')[1][:-1] for line in lines if '(NEEDED)' in line]


def humansize(size):
if size < 1024:
return f"{size} bytes"
if size < 1024**2:
return f"{int(size/1024)} Kb"
if size < 1024**3:
return f"{size/(1024.0**2):.2f} Mb"
return f"{size/(1024.0**3):.2f} Gb"


def print_sizes(libname, depth: int = 2) -> None:
libs = [libname]
depth = 2
symbols = {os.path.basename(libname): get_defined_symbols(libname, verbose=True)}
for _ in range(depth):
for lib in libs:
dirname = os.path.dirname(lib)
for dep in get_deps(lib):
path = os.path.join(dirname, dep)
if not os.path.exists(path):
continue
if path not in libs:
libs.append(path)
symbols[dep] = get_defined_symbols(path, verbose=True)

for lib in libs:
lib_symbols = symbols[os.path.basename(lib)]
lib_keys = set(lib_symbols.keys())
rc = f"{lib} symbols size {humansize(sum(lib_symbols.values()))}"
for dep in get_deps(lib):
if dep not in symbols:
continue
dep_overlap = lib_keys.intersection(set(symbols[dep].keys()))
overlap_size = sum(lib_symbols[k] for k in dep_overlap)
if overlap_size > 0:
rc += f" {dep} overlap is {humansize(overlap_size)}"
print(rc)


def print_symbols_overlap(libname1: str, libname2: str) -> None:
sym1 = get_defined_symbols(libname1, verbose=True)
sym2 = get_defined_symbols(libname2, verbose=True)
sym1_size = sum(sym1.values())
sym2_size = sum(sym2.values())
sym_overlap = set(sym1.keys()).intersection(set(sym2.keys()))
overlap_size = sum(sym1[s] for s in sym_overlap)
if overlap_size == 0:
print(f"{libname1} symbols size {humansize(sym1_size)} does not overlap with {libname2}")
return
print(f"{libname1} symbols size {humansize(sym1_size)} overlap {humansize(overlap_size)} ({100.0 * overlap_size/sym1_size :.2f}%)")
for sym in sym_overlap:
print(sym)


if __name__ == '__main__':
if len(sys.argv) == 3:
print_symbols_overlap(sys.argv[1], sys.argv[2])
else:
print_sizes(sys.argv[1] if len(sys.argv) > 1 else "lib/libtorch_cuda.so")
Loading
Loading