diff --git a/.gitignore b/.gitignore index 4f7ae2f..9be92f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ /perforator /cmd/perforator/perforator todo.txt -test/sum +/test/sum + +perforator.1 +/perforator-* diff --git a/Makefile b/Makefile index f547a22..1c41460 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,23 @@ GOVARS = -X main.Version=$(VERSION) build: go build -trimpath -ldflags "-s -w $(GOVARS)" ./cmd/perforator + install: go install -trimpath -ldflags "-s -w $(GOVARS)" ./cmd/perforator + +perforator.1: man/perforator.md + pandoc man/perforator.md -s -t man -o perforator.1 + +package: build perforator.1 + mkdir perforator-$(VERSION) + cp README.md perforator-$(VERSION) + cp LICENSE perforator-$(VERSION) + cp perforator.1 perforator-$(VERSION) + cp perforator perforator-$(VERSION) + tar -czf perforator-$(VERSION).tar.gz perforator-$(VERSION) + clean: - rm -f perforator + rm -f perforator perforator.1 perforator-*.tar.gz + rm -rf perforator-*/ -.PHONY: build clean install +.PHONY: build clean install package diff --git a/go.sum b/go.sum index 6daad36..5407556 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,5 @@ acln.ro/perf v0.0.0-20200512125540-4d8e4e566115 h1:LXxt66Jv2YsU40F2p/VykMEUMulv2oRHrZHrEe+0V/0= acln.ro/perf v0.0.0-20200512125540-4d8e4e566115/go.mod h1:YNATxll6AOOkbTRJWdm3bSvTzXor3Hs5U9IzIYpfBCI= -github.com/blang/semver v1.1.0 h1:ol1rO7QQB5uy7umSNV7VAmLugfLRD+17sYJujRNYPhg= github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/jessevdk/go-flags v1.4.0 h1:4IU2WS7AumrZ/40jfhf4QVDMsQwqA7VEHozFRrGARJA= diff --git a/man/Makefile b/man/Makefile deleted file mode 100644 index dcda60c..0000000 --- a/man/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -perforator.1: perforator.md - pandoc perforator.md -s -t man -o perforator.1 diff --git a/man/perforator.1 b/man/perforator.1 deleted file mode 100644 index 29c5e54..0000000 --- a/man/perforator.1 +++ /dev/null @@ -1,196 +0,0 @@ -.\" Automatically generated by Pandoc 2.11.2 -.\" -.TH "perforator" "1" "" "" "Perforator Manual" -.hy -.SH NAME -.PP -Perforator - Performance analysis and tracing tool for Linux -.SH SYNOPSIS -.PP -perforator \f[C][--version] [--help] [OPTIONS] COMMAND [ARGS]\f[R] -.SH DESCRIPTION -.PP -Perforator is a tool for measuring performance metrics on individual -functions and regions using the Linux \f[B]perf_event_open\f[R](2) -interface. -Perforator supports measuring instructions executed, cache misses, -branch mispredictions, etc\&... during a single function call or region -of user code. -.SH EVENTS -.PP -Perforator supports tracing the following events (some may not be -available on your system, use \f[C]perforator --list [event-type]\f[R] -to view available events). -The following descriptions are adapted from -\f[B]perf_event_open\f[R](2), the system call used by Perforator to -record metrics. -.TP -\f[I]hardware\f[R] -.IP \[bu] 2 -\f[B]instructions\f[R]: Retired instructions. -Be careful, these can be affected by various issues, most notable -hardware interrupt counts. -.IP \[bu] 2 -\f[B]cpu-cycles\f[R]: Total cycles. -Be wary of what happens during CPU frequency scaling. -.IP \[bu] 2 -\f[B]cache-references\f[R]: Cache accesses. -Usually this indicates Last Level Cache accesses but this may vary -depending on your CPU. -This may include prefetches and coherency messages; again this depends -on the design of your CPU. -.IP \[bu] 2 -\f[B]cache-misses\f[R]: Cache misses. -Usually this indicates Last Level Cache misses; this is intended to be -used in conjunction with \f[B]cache-references\f[R] to calculate cache -miss rates. -.IP \[bu] 2 -\f[B]branch-instructions\f[R]: Retired branch instructions. -.IP \[bu] 2 -\f[B]branch-misses\f[R]: Mispredicted branch instructions. -.IP \[bu] 2 -\f[B]stalled-cycles-frontent\f[R]: Stalled cycles during issue. -.IP \[bu] 2 -\f[B]stalled-cycles-backend\f[R]: Stalled cycles during retirement. -.IP \[bu] 2 -\f[B]ref-cycles\f[R]: Total cycles; not affected by CPU frequency -scaling. -.TP -\f[I]software\f[R] -.IP \[bu] 2 -\f[B]cpu-clock\f[R]: The CPU clock, a high-resolution per-CPU timer. -.IP \[bu] 2 -\f[B]task-clock\f[R]: A clock count specific to the task that is -running. -.IP \[bu] 2 -\f[B]context-switches\f[R]: Context switches. -.IP \[bu] 2 -\f[B]cpu-migrations\f[R]: The number of times the process has migrated -to a new CPU. -.IP \[bu] 2 -\f[B]page-faults\f[R]: The number of page faults. -.IP \[bu] 2 -\f[B]major-faults\f[R]: The number of major page faults. -.IP \[bu] 2 -\f[B]minor-faults\f[R]: The number of minor page faults. -.IP \[bu] 2 -\f[B]alignment-faults\f[R]: The number of alignment faults. -These happen when unaligned memory accesses happen; the kernel can -handle these but it reduces performance. -This happens only on some architectures (never on x86). -.IP \[bu] 2 -\f[B]emulation-faults\f[R]: The number of emulation faults. -The kernel sometimes traps on unimplemented instructions and emulates -them for user space. -This can negatively impact performance. -.TP -\f[I]cache\f[R] -A cache event is made up of three parts: a cache, an operation type, and -an operation result. -The resulting event is written as \f[B]cache-type-result\f[R] \[en] for -example \f[B]l1d-read-misses\f[R]. -.RS -.PP -\f[I]caches\f[R] -.RE -.RS -.IP \[bu] 2 -\f[B]l1d\f[R]: for measuring the Level 1 Data Cache. -.IP \[bu] 2 -\f[B]l1i\f[R]: for measuring the Level 1 Instruction Cache. -.IP \[bu] 2 -\f[B]ll\f[R]: for measuring the Last-Level Cache. -.IP \[bu] 2 -\f[B]dtlb\f[R]: for measuring the Data TLB. -.IP \[bu] 2 -\f[B]itlb\f[R]: for measuring the Instruction TLB. -.IP \[bu] 2 -\f[B]bpu\f[R]: for measuring the branch prediction unit. -.IP \[bu] 2 -\f[B]node\f[R]: for measuring local memory accesses. -.RE -.RS -.PP -\f[I]operation types\f[R] -.RE -.RS -.IP \[bu] 2 -\f[B]read\f[R]: for reads. -.IP \[bu] 2 -\f[B]write\f[R]: for writes. -.IP \[bu] 2 -\f[B]prefetch\f[R]: for prefetches. -.RE -.RS -.PP -\f[I]operation results\f[R] -.RE -.RS -.IP \[bu] 2 -\f[B]accesses\f[R]: to measure accesses. -.IP \[bu] 2 -\f[B]misses\f[R]: to measure misses. -.RE -.TP -\f[I]trace\f[R] -System-dependent. -Usually this includes kernel trace events, such as system call entry -points to count the number of times a system call is executed. -.SH OPTIONS -.TP -\f[B]\f[CB]-l, --list=\f[B]\f[R] -List available events for {hardware, software, cache, trace} event -types. -.TP -\f[B]\f[CB]-e, --events=\f[B]\f[R] -Comma-separated list of events to profile. -.TP -\f[B]\f[CB]-g, --group=\f[B]\f[R] -Comma-separated list of events to profile together as a group. -.TP -\f[B]\f[CB]-r, --region=\f[B]\f[R] -Region(s) to profile: `function' or `start-end'; start/end locations may -be file:line or hex addresses. -.TP -\f[B]\f[CB]--kernel\f[B]\f[R] -Include kernel code in measurements. -.TP -\f[B]\f[CB]--hypervisor\f[B]\f[R] -Include hypervisor code in measurements. -.TP -\f[B]\f[CB]--exclude-user\f[B]\f[R] -Exclude user code from measurements. -.TP -\f[B]\f[CB]-s, --summary\f[B]\f[R] -Instead of printing results immediately, show an aggregated summary -afterwards. -.TP -\f[B]\f[CB]--sort-key=\f[B]\f[R] -Key to sort summary tables with. -.TP -\f[B]\f[CB]--reverse-sort\f[B]\f[R] -Reverse summary table sorting. -.TP -\f[B]\f[CB]--csv\f[B]\f[R] -Write summary output in CSV format. -.TP -\f[B]\f[CB]-o, --output=\f[B]\f[R] -Write summary output to file. -.TP -\f[B]\f[CB]-V, --verbose\f[B]\f[R] -Show verbose debug information. -.TP -\f[B]\f[CB]-v, --version\f[B]\f[R] -Show version information. -.TP -\f[B]\f[CB]-h, --help\f[B]\f[R] -Show this help message. -.SH BUGS -.PP -See GitHub Issues: -.SH AUTHOR -.PP -Zachary Yedidia -.SH SEE ALSO -.PP -\f[B]perf(1)\f[R], \f[B]perf_event_open(2)\f[R]