Skip to content

Commit

Permalink
pfm parsing / nvidia gpu profiling init (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan authored Dec 8, 2024
1 parent 8810bad commit 8fbc0eb
Show file tree
Hide file tree
Showing 22 changed files with 421 additions and 25 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ bin
__pycache__
dist
phlop.egg-info/
scope_timer.txt

*scope_timer.txt
tpp
32 changes: 26 additions & 6 deletions inc/phlop/timing/threaded_scope_timer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ struct ScopeTimerMan
}
_headers.clear();
thread_storage.clear();
thread_reports.clear();
active = false;
}

Expand Down Expand Up @@ -132,10 +133,16 @@ struct ScopeTimerMan
std::unique_lock<std::mutex> lk(work_);
thread_storage.emplace_back(std::move(pt.reports), std::move(pt.traces));
}
void move(std::shared_ptr<RunTimerReport>& report)
{
std::unique_lock<std::mutex> lk(work_);
thread_reports.emplace_back(std::move(report));
}

std::mutex work_;
std::vector<std::pair<std::vector<RunTimerReport*>, std::vector<RunTimerReportSnapshot*>>>
thread_storage;
std::vector<std::shared_ptr<RunTimerReport>> thread_reports; // keep alive
};


Expand All @@ -159,9 +166,11 @@ struct RunTimerReportSnapshot
std::vector<RunTimerReportSnapshot*> childs;
};


struct RunTimerReport
{
std::string_view k, f;
std::string const k; // key
std::string const f; // function
std::uint32_t l = 0;

RunTimerReport(std::string_view const& _k, std::string_view const& _f, std::uint32_t const& _l)
Expand All @@ -175,13 +184,20 @@ struct RunTimerReport

~RunTimerReport() {}


auto operator()(std::size_t i) { return snapshots[i].get(); }
auto size() { return snapshots.size(); }

std::vector<std::shared_ptr<RunTimerReportSnapshot>> snapshots; // emplace back breaks pointers!
};


struct ThreadLifeWatcher
{
~ThreadLifeWatcher() { ScopeTimerMan::INSTANCE().move(report); }

std::shared_ptr<RunTimerReport> report;
};


struct scope_timer
Expand Down Expand Up @@ -264,7 +280,9 @@ struct BinaryTimerFile
template<typename Trace>
void recurse_traces_for_keys(Trace const& c)
{
std::string s{c->self->k};
assert(c);
assert(c->self);
auto const& s = c->self->k;
if (!key_ids.count(s))
{
auto [it, b] = key_ids.emplace(s, key_ids.size());
Expand Down Expand Up @@ -359,11 +377,13 @@ namespace detail
#endif

#define PHLOP_SCOPE_TIMER(key) \
static phlop::threaded::RunTimerReport PHLOP_STR_CAT(ridx_, __LINE__){key, __FILE__, \
__LINE__}; \
static thread_local auto PHLOP_STR_CAT(ridx_, __LINE__) \
= std::make_shared<phlop::threaded::RunTimerReport>(key, __FILE__, __LINE__); \
static thread_local phlop::threaded::ThreadLifeWatcher PHLOP_STR_CAT(_watcher_, __LINE__){ \
PHLOP_STR_CAT(ridx_, __LINE__)}; \
phlop::threaded::scope_timer PHLOP_STR_CAT(_scope_timer_, \
__LINE__){PHLOP_STR_CAT(ridx_, __LINE__)}; \
phlop::threaded::ScopeTimerMan::local().report_stack_ptr = &PHLOP_STR_CAT(ridx_, __LINE__);
__LINE__){*PHLOP_STR_CAT(ridx_, __LINE__)}; \
phlop::threaded::ScopeTimerMan::local().report_stack_ptr = PHLOP_STR_CAT(ridx_, __LINE__).get();


#endif /*_PHLOP_TIMING_THREADED_SCOPE_TIMER_HPP_*/
29 changes: 29 additions & 0 deletions mkn.pfm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#! clean build test run -p scope_timer,threaded_scope_timer -Oa "-fPIC -std=c++20" -W 9

# run script first: ./sh/setup_pfm.sh

name: phlop.pfm
parent: base

profile:
- name: base
inc: inc

- name: pfm
inc: tpp/pfm/include

- name: pfm_lib
parent: pfm
mode: static
inc: tpp/pfm/lib, 0
src: tpp/pfm/lib

- name: pfm_events
self: pfm_lib
main: tpp/pfm/examples/check_events.c
out: check_events

- name: pfm_info
self: pfm_lib
main: tpp/pfm/examples/showevtinfo.c
out: showevtinfo
2 changes: 2 additions & 0 deletions phlop/app/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
phlop.app.cmake
phlop.app.test_cases
phlop.app.git
phlop.app.nvidia
phlop.app.pfm
phlop.app.perf"""

print(available_modules)
5 changes: 5 additions & 0 deletions phlop/app/nvidia/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#
#
#
#
#
12 changes: 12 additions & 0 deletions phlop/app/nvidia/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
#
#
#
#


available_modules = """Available:
phlop.app.nvidia.csan
phlop.app.nvidia.ncu"""

print(available_modules)
63 changes: 63 additions & 0 deletions phlop/app/nvidia/csan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# compute sanitizer frontend

# https://docs.nvidia.com/compute-sanitizer/ComputeSanitizer/index.html

## samples
# compute-sanitizer --tool memcheck [sanitizer_options] app_name [app_options]
# compute-sanitizer --tool racecheck [sanitizer_options] app_name [app_options]
#
#
#


from phlop.dict import ValDict
from phlop.proc import run

metrics = [
"all",
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum", # read
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum", # wrte
]


def build_command(cli_args):
cmd_parts = [
"compute-sanitizer",
f"--tool {cli_args.tool}",
cli_args.extra if cli_args.extra else "",
" ".join(cli_args.remaining) if cli_args.remaining else "",
]
return " ".join(filter(None, cmd_parts))


def exec(cli_args):
return run(build_command(cli_args), check=True, cwd=cli_args.dir)


def cli_args_parser(description="compute-sanitizer tool"):
import argparse

_help = ValDict(
dir="working directory",
quiet="Redirect output to /dev/null",
logging="0=off, 1=on non zero exit code, 2=always",
outfile="path for saved file if active",
tool="Sanitizer tool to use (memcheck, racecheck, initcheck, synccheck)",
extra="forward string to csan command",
)

parser = argparse.ArgumentParser(
description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("remaining", nargs=argparse.REMAINDER)
parser.add_argument("-d", "--dir", default=".", help=_help.dir)
parser.add_argument("-o", "--outfile", default=None, help=_help.outfile)
parser.add_argument("-t", "--tool", default="memcheck", help=_help.tool)
parser.add_argument("--logging", type=int, default=1, help=_help.logging)
parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra)

return parser


def verify_cli_args(cli_args):
return cli_args
57 changes: 57 additions & 0 deletions phlop/app/nvidia/ncu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Nsight Compute CLI

# https://docs.nvidia.com/nsight-compute/ProfilingGuide/index.html

## samples
# ncu --help
# ncu --metrics all
# ncu --metrics l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum
# ncu --target-processes all -o <report-name> mpirun [mpi arguments] <app> [app arguments]
#


from phlop.dict import ValDict
from phlop.proc import run

metrics = [
"all",
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum", # read
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum", # wrte
]


def build_command(cli_args):
return f"ncu {cli_args.remaining}"


def exec(cli_args):
return run(build_command(cli_args), check=True)


def cli_args_parser(description="ncu tool"):
import argparse

_help = ValDict(
dir="working directory",
quiet="Redirect output to /dev/null",
logging="0=off, 1=on non zero exit code, 2=always",
outfile="path for saved file if active",
tool="",
extra="forward string to csan command",
)

parser = argparse.ArgumentParser(
description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("remaining", nargs=argparse.REMAINDER)
parser.add_argument("-d", "--dir", default=".", help=_help.dir)
parser.add_argument("-o", "--outfile", default=None, help=_help.outfile)
parser.add_argument("-t", "--tool", default="stat", help=_help.tool)
parser.add_argument("--logging", type=int, default=1, help=_help.logging)
parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra)

return parser


def verify_cli_args(cli_args):
return cli_args
6 changes: 3 additions & 3 deletions phlop/app/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,10 @@ def cli_args_parser(description="Perf tool"):
quiet="Redirect output to /dev/null",
cores="Parallism core/thread count",
infiles="infiles",
print_only="Print only, no execution",
regex="Filter out non-matching execution strings",
logging="0=off, 1=on non zero exit code, 2=always",
outfile="path for saved file if active",
tool="stat/record/etc",
extra="forward string to perf command",
)

parser = argparse.ArgumentParser(
Expand All @@ -129,10 +128,11 @@ def cli_args_parser(description="Perf tool"):
"-p", "--print_only", action="store_true", default=False, help=_help.print_only
)
parser.add_argument("-i", "--infiles", default=None, help=_help.infiles)
parser.add_argument("-r", "--regex", default=None, help=_help.regex)
parser.add_argument("-o", "--outfile", default=None, help=_help.outfile)
parser.add_argument("-t", "--tool", default="stat", help=_help.tool)
parser.add_argument("--logging", type=int, default=1, help=_help.logging)
parser.add_argument("-e", "--extra", type=str, default="", help=_help.extra)

return parser


Expand Down
5 changes: 5 additions & 0 deletions phlop/app/pfm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#
#
#
#
#
12 changes: 12 additions & 0 deletions phlop/app/pfm/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
#
#
#
#


available_modules = """Available:
phlop.app.pfm.check_events
phlop.app.pfm.showevtinfo"""

print(available_modules)
48 changes: 48 additions & 0 deletions phlop/app/pfm/check_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#
#
#
#
#


import logging
from pathlib import Path

from phlop.os import pushd
from phlop.proc import run
from phlop.string import decode_bytes

FILE_DIR = Path(__file__).resolve().parent

logger = logging.getLogger(__name__)
check_events_start = "Total events:"


def parse_check_events_output(lines):
return lines[-1].split(":")[1].strip().replace("0x", "r")


def run_check_events(code):
with pushd(FILE_DIR.parent.parent.parent):
return decode_bytes(
run(f"./tpp/pfm/examples/check_events {code}").stdout
).splitlines()


def get_evt_perf_code(code):
return parse_check_events_output(run_check_events(code))


if __name__ == "__main__":
from phlop.app.pfm.showevtinfo import get_evt_info

key, code = "[MULT_FLOPS]", ""
for info in get_evt_info():
if key in info.umask:
code = f"{info.name}:{info.umask[key].code}"
break

assert code != ""

# print("get_evt_perf_code", get_evt_perf_code(code))
print(run(f"perf stat -e {get_evt_perf_code(code)} sleep 5"))
Loading

0 comments on commit 8fbc0eb

Please sign in to comment.