Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make subprocess child shows different process name #391

Merged
Merged
3 changes: 3 additions & 0 deletions src/viztracer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,10 @@ def parse(self, argv: List[str]) -> VizProcedureResult:
# It's not practical to cover this line as it requires coverage
# instrumentation on subprocess.
output_file = self.ofile # pragma: no cover
process_name = sys.argv[0]
else:
output_file = os.path.join(self.multiprocess_output_dir, "result.json")
process_name = None

if options.log_multiprocess or options.log_subprocess: # pragma: no cover
color_print(
Expand Down Expand Up @@ -263,6 +265,7 @@ def parse(self, argv: List[str]) -> VizProcedureResult:
"sanitize_function_name": options.sanitize_function_name,
"dump_raw": True,
"minimize_memory": options.minimize_memory,
"process_name": process_name,
}

return True, None
Expand Down
86 changes: 56 additions & 30 deletions src/viztracer/modules/snaptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,20 +650,27 @@ snaptrace_load(TracerObject* self, PyObject* args)
PyObject* dict = PyDict_New();
PyObject* args = PyDict_New();
PyObject* process_name_string = PyUnicode_FromString("process_name");
PyObject* current_process_method = PyObject_GetAttrString(multiprocessing_module, "current_process");
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
PyObject* current_process = PyObject_CallObject(current_process_method, NULL);
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
PyObject* process_name = PyObject_GetAttrString(current_process, "name");
PyObject* process_name = NULL;

Py_DECREF(current_process_method);
Py_DECREF(current_process);
if (self->process_name) {
process_name = self->process_name;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New reference here as well

Py_INCREF(process_name);
} else {
PyObject* current_process_method = PyObject_GetAttrString(multiprocessing_module, "current_process");
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
PyObject* current_process = PyObject_CallObject(current_process_method, NULL);
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
process_name = PyObject_GetAttrString(current_process, "name");
Py_DECREF(current_process_method);
Py_DECREF(current_process);
}

PyDict_SetItemString(dict, "ph", ph_M);
PyDict_SetItemString(dict, "pid", pid);
PyDict_SetItemString(dict, "tid", pid);
Expand Down Expand Up @@ -898,22 +905,30 @@ snaptrace_dump(TracerObject* self, PyObject* args)
// == Load the metadata first ==
// Process Name
{
PyObject* current_process_method = PyObject_GetAttrString(multiprocessing_module, "current_process");
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
PyObject* current_process = PyObject_CallObject(current_process_method, NULL);
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
PyObject* process_name = NULL;
if (self->process_name) {
process_name = self->process_name;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are losing reference here. You need to increment the reference because you are decreasing it later.

Py_INCREF(process_name);
} else {
PyObject* current_process_method = PyObject_GetAttrString(multiprocessing_module, "current_process");
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
PyObject* current_process = PyObject_CallObject(current_process_method, NULL);
if (!current_process_method) {
perror("Failed to access multiprocessing.current_process()");
exit(-1);
}
process_name = PyObject_GetAttrString(current_process, "name");
Py_DECREF(current_process_method);
Py_DECREF(current_process);
}
PyObject* process_name = PyObject_GetAttrString(current_process, "name");

Py_DECREF(current_process_method);
Py_DECREF(current_process);
fprintf(fptr, "{\"ph\":\"M\",\"pid\":%lu,\"tid\":%lu,\"name\":\"process_name\",\"args\":{\"name\":\"%s\"}},",
pid, pid, PyUnicode_AsUTF8(process_name));
fprintf(fptr, "{\"ph\":\"M\",\"pid\":%lu,\"tid\":%lu,\"name\":\"process_name\",\"args\":{\"name\":\"",
pid, pid);
fprint_escape(fptr, PyUnicode_AsUTF8(process_name));
fprintf(fptr, "\"}},");
Py_DECREF(process_name);
}

Expand Down Expand Up @@ -1140,11 +1155,12 @@ snaptrace_config(TracerObject* self, PyObject* args, PyObject* kw)
static char* kwlist[] = {"verbose", "lib_file_path", "max_stack_depth",
"include_files", "exclude_files", "ignore_c_function", "ignore_frozen",
"log_func_retval", "log_func_args", "log_async", "trace_self",
"min_duration",
"min_duration", "process_name",
NULL};
int kw_verbose = -1;
int kw_max_stack_depth = 0;
char* kw_lib_file_path = NULL;
PyObject* kw_process_name = NULL;
PyObject* kw_include_files = NULL;
PyObject* kw_exclude_files = NULL;
int kw_ignore_c_function = -1;
Expand All @@ -1154,7 +1170,7 @@ snaptrace_config(TracerObject* self, PyObject* args, PyObject* kw)
int kw_log_async = -1;
int kw_trace_self = -1;
double kw_min_duration = 0;
if (!PyArg_ParseTupleAndKeywords(args, kw, "|isiOOppppppd", kwlist,
if (!PyArg_ParseTupleAndKeywords(args, kw, "|isiOOppppppdO", kwlist,
&kw_verbose,
&kw_lib_file_path,
&kw_max_stack_depth,
Expand All @@ -1166,7 +1182,8 @@ snaptrace_config(TracerObject* self, PyObject* args, PyObject* kw)
&kw_log_func_args,
&kw_log_async,
&kw_trace_self,
&kw_min_duration)) {
&kw_min_duration,
&kw_process_name)) {
return NULL;
}

Expand All @@ -1190,6 +1207,15 @@ snaptrace_config(TracerObject* self, PyObject* args, PyObject* kw)
strcpy(self->lib_file_path, kw_lib_file_path);
}

if (kw_process_name && !Py_IsNone(kw_process_name)) {
if (!PyUnicode_CheckExact(kw_process_name)) {
PyErr_SetString(PyExc_TypeError, "process_name must be a string");
return NULL;
}
Py_INCREF(kw_process_name);
Py_XSETREF(self->process_name, kw_process_name);
}

if (kw_ignore_c_function == 1) {
SET_FLAG(self->check_flags, SNAPTRACE_IGNORE_C_FUNCTION);
} else if (kw_ignore_c_function == 0) {
Expand Down
1 change: 1 addition & 0 deletions src/viztracer/modules/snaptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ typedef struct {
int verbose;
char* lib_file_path;
int max_stack_depth;
PyObject* process_name;
PyObject* include_files;
PyObject* exclude_files;
double min_duration;
Expand Down
5 changes: 4 additions & 1 deletion src/viztracer/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def __init__(
log_gc: bool = False,
log_async: bool = False,
trace_self: bool = False,
min_duration: float = 0) -> None:
min_duration: float = 0,
process_name: Optional[str] = None) -> None:
self.initialized = False
self.enable = False
self.parsed = False
Expand All @@ -53,6 +54,7 @@ def __init__(
self.total_entries = 0
self.gc_start_args: Dict[str, int] = {}
self.initialized = True
self.process_name = process_name

@property
def max_stack_depth(self) -> int:
Expand Down Expand Up @@ -237,6 +239,7 @@ def config(self) -> None:
"log_async": self.log_async,
"trace_self": self.trace_self,
"min_duration": self.min_duration,
"process_name": self.process_name,
}

self._tracer.config(**cfg)
Expand Down
2 changes: 2 additions & 0 deletions src/viztracer/viztracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def __init__(self,
minimize_memory: bool = False,
dump_raw: bool = False,
sanitize_function_name: bool = False,
process_name: Optional[str] = None,
output_file: str = "result.json",
plugins: Sequence[Union[VizPluginBase, str]] = []) -> None:
super().__init__(
Expand All @@ -58,6 +59,7 @@ def __init__(self,
log_async=log_async,
trace_self=trace_self,
min_duration=min_duration,
process_name=process_name,
)
self._tracer: Any
self.verbose = verbose
Expand Down
19 changes: 19 additions & 0 deletions tests/test_multiprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys
import tempfile
import unittest
import json

from .cmdline_tmpl import CmdlineTmpl

Expand Down Expand Up @@ -44,6 +45,11 @@ def fib(n):
print(subprocess.call(["python", "-m", "timeit", "-n", "100", "'1+1'"]))
"""

file_subprocess_code = """
import subprocess
p = subprocess.Popen(["python", "-c", "print('test')"])
"""

file_fork = """
import os
import time
Expand Down Expand Up @@ -240,13 +246,26 @@ def test_child_process(self):
self.template(["viztracer", "-o", os.path.join(tmpdir, "result.json"), "--subprocess_child", "child.py"],
expected_output_file=None)
self.assertEqual(len(os.listdir(tmpdir)), 1)
with open(os.path.join(tmpdir, os.listdir(tmpdir)[0])) as f:
trace_events = json.load(f)["traceEvents"]
for entry in trace_events:
if entry["name"] == "process_name":
self.assertNotEqual(entry["args"]["name"], "MainProcess")
break
else:
self.fail("no process_name event found")

def test_module(self):
self.template(["viztracer", "-o", "result.json", "cmdline_test.py"],
expected_output_file="result.json",
expected_stdout=".*100 loops.*",
script=file_subprocess_module)

def test_code(self):
self.template(["viztracer", "-o", "result.json", "cmdline_test.py"],
expected_output_file="result.json",
script=file_subprocess_code)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's also check the process name of the subprocess on this, as it's known - should be python I believe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the current process_name is site-packages/viztracer/__main__.py because sys.argv is processed in VizUI.run(), which is before the config.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a process with a name python is almost as bad as MainProcess. Let's set the process_name in run_code. Update self.init_kwargs before create the VizTracer instance. The subprocess would be distinguishable with the distinct names.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If setting process_name in run_code. For module, the process_name is module name. For run python file, the process_name is python file name. For run code by -c, the process_name is -c.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah "-c" is a bit weird. Let's do another check for cmd_string and if it's set, set the process name to "python -c".


def test_nested(self):
def check_func(data):
pids = set()
Expand Down
Loading