diff --git a/gputop-client-c/gputop-client-c-bindings.cpp b/gputop-client-c/gputop-client-c-bindings.cpp index ab0548bb..fb24c107 100644 --- a/gputop-client-c/gputop-client-c-bindings.cpp +++ b/gputop-client-c/gputop-client-c-bindings.cpp @@ -115,6 +115,8 @@ gputop_cc_handle_i915_perf_message_binding(const v8::FunctionCallbackInfo struct gputop_cc_stream *stream = (struct gputop_cc_stream *)ptr->ptr_; unsigned int len = args[2]->NumberValue(); + unsigned int ctx_hw_id = args[5]->NumberValue(); + unsigned int idle_flag = args[6]->NumberValue(); if (!args[1]->IsArrayBufferView()) { isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "Expected 2nd argument to be an ArrayBufferView"))); @@ -167,7 +169,9 @@ gputop_cc_handle_i915_perf_message_binding(const v8::FunctionCallbackInfo static_cast(data_contents.Data()) + offset, len, accumulators, - n_accumulators); + n_accumulators, + ctx_hw_id, + idle_flag); } void diff --git a/gputop-client-c/gputop-client-c-runtime-bindings.cpp b/gputop-client-c/gputop-client-c-runtime-bindings.cpp index 37c95251..a97ffce3 100644 --- a/gputop-client-c/gputop-client-c-runtime-bindings.cpp +++ b/gputop-client-c/gputop-client-c-runtime-bindings.cpp @@ -160,5 +160,15 @@ _gputop_cr_accumulator_end_update(void) fn->Call(gputop, ARRAY_LENGTH(argv), argv); } +void +_gputop_cr_send_idle_flag(int idle_flag) +{ + Isolate* isolate = Isolate::GetCurrent(); + HandleScope scope(isolate); + Local gputop = Local::New(isolate, gputop_cc_singleton); + Local fn = Local::Cast(gputop->Get(String::NewFromUtf8(isolate, "send_idle_flag"))); + Local argv[] = {Number::New(isolate, idle_flag)}; + fn->Call(gputop, ARRAY_LENGTH(argv), argv); +} diff --git a/gputop-client-c/gputop-client-c-runtime.h b/gputop-client-c/gputop-client-c-runtime.h index 2d2e37ca..ac6262fc 100644 --- a/gputop-client-c/gputop-client-c-runtime.h +++ b/gputop-client-c/gputop-client-c-runtime.h @@ -66,7 +66,7 @@ bool _gputop_cr_accumulator_start_update(struct gputop_cc_stream *stream, void _gputop_cr_accumulator_append_count(int counter, double max, double value); void _gputop_cr_accumulator_end_update(void); - +void _gputop_cr_send_idle_flag(int idle_flag); #ifdef __cplusplus } #endif diff --git a/gputop-client-c/gputop-client-c.c b/gputop-client-c/gputop-client-c.c index 6b0b5dd4..9a3695f3 100644 --- a/gputop-client-c/gputop-client-c.c +++ b/gputop-client-c/gputop-client-c.c @@ -95,6 +95,30 @@ gputop_cc_get_counter_id(const char *hw_config_guid, const char *counter_symbol_ return -1; } +static void +reset_forward_oa_accumulator_events(struct gputop_cc_stream *stream, + struct gputop_cc_oa_accumulator *oa_accumulator, + uint32_t events) +{ + struct gputop_metric_set *oa_metric_set = stream->oa_metric_set; + + if (!_gputop_cr_accumulator_start_update(stream, + oa_accumulator, + events, + oa_accumulator->first_timestamp, + oa_accumulator->last_timestamp)) + return; + + for (int i = 0; i < oa_metric_set->n_counters; i++) { + + double d_value = 0; + uint64_t max = 0; + _gputop_cr_accumulator_append_count(i, max, d_value); + } + + _gputop_cr_accumulator_end_update(); +} + static void forward_oa_accumulator_events(struct gputop_cc_stream *stream, struct gputop_cc_oa_accumulator *oa_accumulator, @@ -204,14 +228,16 @@ void EMSCRIPTEN_KEEPALIVE gputop_cc_handle_i915_perf_message(struct gputop_cc_stream *stream, uint8_t *data, int data_len, struct gputop_cc_oa_accumulator **accumulators, - int n_accumulators) + int n_accumulators, + int ctx_hw_id, + int idle_flag) { const struct drm_i915_perf_record_header *header; uint8_t *last = NULL; assert(stream); - if (stream->continuation_report) + if (stream->continuation_report && (idle_flag < 4)) last = stream->continuation_report; else { for (int i = 0; i < n_accumulators; i++) { @@ -220,9 +246,14 @@ gputop_cc_handle_i915_perf_message(struct gputop_cc_stream *stream, assert(oa_accumulator); gputop_cc_oa_accumulator_clear(oa_accumulator); + reset_forward_oa_accumulator_events(stream, oa_accumulator, 1); + idle_flag = 4; } } + if (idle_flag < 4) + idle_flag++; + //int i = 0; for (header = (void *)data; (uint8_t *)header < (data + data_len); @@ -250,34 +281,35 @@ gputop_cc_handle_i915_perf_message(struct gputop_cc_stream *stream, case DRM_I915_PERF_RECORD_SAMPLE: { struct oa_sample *sample = (struct oa_sample *)header; - if (last) { - for (int i = 0; i < n_accumulators; i++) { - struct gputop_cc_oa_accumulator *oa_accumulator = - accumulators[i]; + if (sample->oa_report[8] == ctx_hw_id || ctx_hw_id == 0) { + idle_flag = 0; + + if (last) { + for (int i = 0; i < n_accumulators; i++) { + struct gputop_cc_oa_accumulator *oa_accumulator = + accumulators[i]; - assert(oa_accumulator); + assert(oa_accumulator); - if (gputop_cc_oa_accumulate_reports(oa_accumulator, + if (gputop_cc_oa_accumulate_reports(oa_accumulator, last, sample->oa_report)) - { - uint64_t elapsed = (oa_accumulator->last_timestamp - + { + uint64_t elapsed = (oa_accumulator->last_timestamp - oa_accumulator->first_timestamp); - uint32_t events = 0; - //gputop_cr_console_log("i915_oa: accumulated reports\n"); - - if (elapsed > oa_accumulator->aggregation_period) { - //gputop_cr_console_log("i915_oa: PERIOD ELAPSED (%d)\n", (int)oa_accumulator->aggregation_period); - events |= ACCUMULATOR_EVENT_PERIOD_ELAPSED; + uint32_t events = 0; + //gputop_cr_console_log("i915_oa: accumulated reports\n"); + + if (elapsed > oa_accumulator->aggregation_period) { + //gputop_cr_console_log("i915_oa: PERIOD ELAPSED (%d)\n", (int)oa_accumulator->aggregation_period); + events |= ACCUMULATOR_EVENT_PERIOD_ELAPSED; + } + if (events) + forward_oa_accumulator_events(stream, oa_accumulator, events); } - - if (events) - forward_oa_accumulator_events(stream, oa_accumulator, events); } } - } - last = sample->oa_report; - + } break; } @@ -287,6 +319,8 @@ gputop_cc_handle_i915_perf_message(struct gputop_cc_stream *stream, } } + _gputop_cr_send_idle_flag(idle_flag); + if (last) { int raw_size = stream->oa_metric_set->perf_raw_size; diff --git a/gputop-client-c/gputop-client-c.h b/gputop-client-c/gputop-client-c.h index 39b7899c..48840188 100644 --- a/gputop-client-c/gputop-client-c.h +++ b/gputop-client-c/gputop-client-c.h @@ -92,7 +92,9 @@ int gputop_cc_get_counter_id(const char *guid, const char *counter_symbol_name); void gputop_cc_handle_i915_perf_message(struct gputop_cc_stream *stream, uint8_t *data, int data_len, struct gputop_cc_oa_accumulator **accumulators, - int n_accumulators); + int n_accumulators, + int ctx_hw_id, + int idle_flag); void gputop_cc_reset_system_properties(void); void gputop_cc_set_system_property(const char *name, double value); diff --git a/gputop-client-c/gputop-web-lib.js b/gputop-client-c/gputop-web-lib.js index 21763695..1374574e 100644 --- a/gputop-client-c/gputop-web-lib.js +++ b/gputop-client-c/gputop-web-lib.js @@ -93,6 +93,13 @@ var LibraryGpuTopWeb = { else console.error("Gputop singleton not initialized"); }, + _gputop_cr_send_idle_flag: function(idle_flag) { + var gputop = Module['gputop_singleton']; + if (gputop !== undefined) + gputop.send_idle_flag.call(gputop, idle_flag); + else + console.error("Gputop singleton not initialized"); + }, }; autoAddDeps(LibraryGpuTopWeb, '$GPUTop'); diff --git a/gputop-client/gputop.js b/gputop-client/gputop.js index 17302a1c..3d30558a 100644 --- a/gputop-client/gputop.js +++ b/gputop-client/gputop.js @@ -30,6 +30,11 @@ var is_nodejs = false; var using_emscripten = true; +var ctx_hw_id_ = []; +var vgpu_id_ = []; +var ctx_mode=['Global']; +var map_vgpuID_hwID = [0]; + if (typeof module !== 'undefined' && module.exports) { var WebSocket = require('ws'); @@ -479,7 +484,7 @@ Gputop.prototype.clear_accumulated_metrics = function(metric) { } } -Gputop.prototype.replay_i915_perf_history = function(metric) { +Gputop.prototype.replay_i915_perf_history = function(metric, hw_id) { this.clear_accumulated_metrics(metric); var stream = metric.stream; @@ -515,7 +520,9 @@ Gputop.prototype.replay_i915_perf_history = function(metric) { stack_data, data.length, vec, - n_accumulators); + n_accumulators, + hw_id, + this.idle_flag); } else { var vec = []; for (var j = 0; j < n_accumulators; j++) { @@ -527,7 +534,9 @@ Gputop.prototype.replay_i915_perf_history = function(metric) { stack_data, data.length, vec, - n_accumulators); + n_accumulators, + hw_id, + this.idle_flag); } cc.Runtime.stackRestore(sp); @@ -664,6 +673,10 @@ Gputop.prototype.accumulator_end_update = function () { update.events_mask); } +Gputop.prototype.send_idle_flag = function (idle_flag) { + this.idle_flag = idle_flag; +} + Gputop.prototype.accumulator_clear = function (accumulator) { cc._gputop_cc_oa_accumulator_clear(accumulator.cc_accumulator_ptr_); } @@ -737,6 +750,7 @@ Gputop.prototype.set_demo_architecture = function(architecture) { this.demo_architecture = architecture; this.is_connected_ = true; + this.request_hw_id_map(); this.request_features(); } @@ -1383,6 +1397,16 @@ Gputop.prototype.rpc_request = function(method, value, closure) { } } +Gputop.prototype.request_hw_id_map = function() { + if (!this.is_demo()) { + if (this.socket_.readyState == is_nodejs ? 1 : WebSocket.OPEN) { + this.rpc_request('get_hw_id_map', true); + } else { + this.log("Can't request context hardware ID map while not connected", this.ERROR); + } + } +} + Gputop.prototype.request_features = function() { if (!this.is_demo()) { if (this.socket_.readyState == is_nodejs ? 1 : WebSocket.OPEN) { @@ -1671,6 +1695,9 @@ function gputop_socket_on_message(evt) { stream.dispatchEvent(ev); } break; + case 'hw_id': + this.update_vgpuID_hwID(msg.hw_id); + break; } if (msg.reply_uuid in this.rpc_closures_) { @@ -1720,7 +1747,9 @@ function gputop_socket_on_message(evt) { stack_data, data.length, vec, - n_accumulators); + n_accumulators, + this.current_hw_id, + this.idle_flag); } else { var vec = []; for (var i = 0; i < n_accumulators; i++) { @@ -1732,7 +1761,9 @@ function gputop_socket_on_message(evt) { stack_data, data.length, vec, - n_accumulators); + n_accumulators, + this.current_hw_id, + this.idle_flag); } cc.Runtime.stackRestore(sp); @@ -1824,6 +1855,7 @@ Gputop.prototype.connect = function(address, onopen, onclose, onerror) { this.log('Connecting to ' + websocket_url); this.socket_ = this.connect_web_socket(websocket_url, () => { //onopen this.is_connected_ = true; + this.request_hw_id_map(); this.request_features(); var ev = { type: "open" }; @@ -1839,6 +1871,7 @@ Gputop.prototype.connect = function(address, onopen, onclose, onerror) { }); } else { this.is_connected_ = true; + this.request_hw_id_map(); this.request_features(); var ev = { type: "open" }; diff --git a/gputop-csv/gputop-csv.js b/gputop-csv/gputop-csv.js index 5797057b..04ad5fdf 100755 --- a/gputop-csv/gputop-csv.js +++ b/gputop-csv/gputop-csv.js @@ -59,6 +59,8 @@ function GputopCSV(pretty_print) this.endl = process.platform === "win32" ? "\r\n" : "\n"; this.term_row_ = 0; + this.current_hw_id = 0; + this.idle_flag = 0; this.console = { log: (msg) => { @@ -134,6 +136,59 @@ GputopCSV.prototype.list_metric_set_counters = function(metric) { stderr_log.log("\nALL: " + all); } +var ctx_hw_id_ = []; +var vgpu_id_ = []; +var map_vgpuID_hwID = [0]; + +GputopCSV.prototype.get_vgpu_id = function() { + var vgpu_id; + for (var i = 0; i < vgpu_id_.length; i++) { + if (vgpu_id_[i] === parseInt(args.vgpu)) { + vgpu_id = vgpu_id_[i]; + break; + } + } + return vgpu_id; +} + +GputopCSV.prototype.update_vgpuID_hwID = function(hw_id) { + + hw_id.ctx_hw_id.forEach((ctx_hw_id, i) => { + ctx_hw_id_.push(ctx_hw_id.toInt()); + }); + hw_id.vgpu_id.forEach((vgpu_id, i) => { + vgpu_id_.push(vgpu_id.toInt()); + }); + + + var map_length = Math.max.apply(Math, vgpu_id_); + + for (var i = 0; i < map_length; i++ ) { + map_vgpuID_hwID[vgpu_id_[i]] = ctx_hw_id_[i]; + } + vgpu_id_.sort(); + + if (args.vgpu === 'list') { + if (vgpu_id_.length === 0) { + stderr_log.log("\nNo vGPU is running now!"); + } else { + stderr_log.log("\nList of vGPU ID selectable with --vgpu=..."); + for (var i = 0; i < vgpu_id_.length; i++) + stderr_log.log(vgpu_id_[i]); + } + } else { + var vgpu_id; + vgpu_id = this.get_vgpu_id(); + this.current_hw_id = map_vgpuID_hwID[vgpu_id]; + + if (this.current_hw_id === undefined) { + stderr_log.error("Failed to look up to vGPU ID " + args.vgpu); + process.exit(1); + return; + } + } +} + GputopCSV.prototype.update_features = function(features) { if (features.supported_oa_uuids.length == 0) { @@ -241,6 +296,7 @@ GputopCSV.prototype.update_features = function(features) var col_width = 0; if (this.pretty_print_csv_) { + if (counter.symbol_name === "Timestamp") { var units = "(ns)"; var camel_name = "TimeStamp"; @@ -350,6 +406,7 @@ GputopCSV.prototype.update_features = function(features) this.column_titles_.map((line) => { this.stream.write(line + this.endl); }); + if (this.pretty_print_csv_) this.stream.write(this.column_units_ + this.endl); }, @@ -362,6 +419,8 @@ GputopCSV.prototype.update_features = function(features) } } +var n_rows; + function write_rows(metric, accumulator) { /* Note: this ref[erence] counter is pre-dermined to be one with @@ -375,7 +434,7 @@ function write_rows(metric, accumulator) stderr_log.assert(ref_accumulated_counter.counter === ref_counter, "Spurious reference counter state"); - var n_rows = ref_accumulated_counter.updates.length; + n_rows = ref_accumulated_counter.updates.length; if (n_rows <= 1) return; @@ -456,6 +515,8 @@ function write_rows(metric, accumulator) } } +var flag = 0; + GputopCSV.prototype.notify_accumulator_events = function(metric, accumulator, events_mask) { if (events_mask & 1) //period elapsed this.accumulator_clear(accumulator); @@ -463,12 +524,33 @@ GputopCSV.prototype.notify_accumulator_events = function(metric, accumulator, ev if (this.write_queued_) return; - setTimeout(() => { - this.write_queued_ = false; - write_rows.call(this, metric, accumulator); - }, 0.2); + if (this.idle_flag < 4) { + flag = 0; + setTimeout(() => { + this.write_queued_ = false; + write_rows.call(this, metric, accumulator); + }, 0.2); - this.write_queued_ = true; + this.write_queued_ = true; + } else { + if (flag === 0) { + flag = 1; + if (this.pretty_print_csv_) + stderr_log.error("No context is running on this vGPU now"); + else + this.stream.write("No context is running on this vGPU now\n"); + } + + for (var c = 0; c < this.counters_.length; c++) { + var counter = this.counters_[c]; + if (counter.record_data === true) { + var accumulated_counter = + accumulator.accumulated_counters[counter.cc_counter_id_]; + n_rows = 2; + accumulated_counter.updates.splice(0, n_rows); + } + } + } } var parser = new ArgumentParser({ @@ -485,6 +567,16 @@ parser.addArgument( } ); +parser.addArgument( + [ '-vgpu', '--vgpu' ], + { + help: "specific vgpu mode to observe (default 'list')", + defaultValue: 'list', + constant: 'list', + nargs: '?' + } +); + parser.addArgument( [ '-m', '--metrics' ], { diff --git a/gputop-data/gputop.proto b/gputop-data/gputop.proto index b071d1aa..2912028b 100644 --- a/gputop-data/gputop.proto +++ b/gputop-data/gputop.proto @@ -134,6 +134,11 @@ message TracepointInfo required string sample_format = 2; } +message HW_ID +{ + repeated uint64 ctx_hw_id = 1; + repeated uint64 vgpu_id = 2; +} message Message { @@ -149,6 +154,7 @@ message Message ProcessInfo process_info = 8; CpuStatsSet cpu_stats = 9; TracepointInfo tracepoint_info = 10; + HW_ID hw_id = 11; } } @@ -210,5 +216,6 @@ message Request uint32 get_process_info = 5; string test_log=6; string get_tracepoint_info = 7; + bool get_hw_id_map = 8; } } diff --git a/gputop-server/gputop-server.c b/gputop-server/gputop-server.c index a7453117..c3bda8de 100644 --- a/gputop-server/gputop-server.c +++ b/gputop-server/gputop-server.c @@ -69,7 +69,6 @@ static uv_timer_t timer; static bool update_queued; static uv_idle_t update_idle; - enum { WS_MESSAGE_PERF = 1, WS_MESSAGE_PROTOBUF, @@ -973,6 +972,71 @@ gputop_get_cmd_line_pid(uint32_t pid, char *buf, int len) return res; } +void handle_update_hw_id_map(uint64_t *vgpu_id, + uint64_t *ctx_hw_id, + int *current_vgpu_num) +{ + int i = 0; + int UUID_length = 36; + DIR *vgpu_dir; + char *vgpu_path="/sys/bus/pci/devices/0000:00:02.0"; + char *vgpu_id_path, *hw_id_path; + struct dirent *entry; + bool success_vgpu_id, success_hw_id; + + vgpu_dir=opendir(vgpu_path); + if (vgpu_dir == NULL) { + fprintf(stderr, "The path %s doesn't exist!\n", vgpu_path); + return; + } + + while (entry = readdir(vgpu_dir)) { + if (entry->d_type == DT_DIR && (strlen(entry->d_name)==UUID_length)) { + int ret_vgpu_id_path = asprintf(&vgpu_id_path, "%s/%s/intel_vgpu/vgpu_id", vgpu_path, entry->d_name); + assert(ret_vgpu_id_path != -1); + int ret_hw_id_path = asprintf(&hw_id_path, "%s/%s/intel_vgpu/hw_id", vgpu_path, entry->d_name); + assert(hw_id_path != -1); + success_vgpu_id = gputop_read_file_uint64(vgpu_id_path, vgpu_id); + free(vgpu_id_path); + vgpu_id++; + success_hw_id = gputop_read_file_uint64(hw_id_path, ctx_hw_id); + free(hw_id_path); + ctx_hw_id++; + i++; + } + } + + closedir(vgpu_dir); + *current_vgpu_num = i; +} + +static +void handle_get_hw_id_map(h2o_websocket_conn_t *conn, + Gputop__Request *request) +{ + int i = 0, max_vgpu_num = 7; + int current_vgpu_num = 0; + uint64_t ctx_hw_id[max_vgpu_num]; + uint64_t vgpu_id[max_vgpu_num]; + + handle_update_hw_id_map(vgpu_id, ctx_hw_id, ¤t_vgpu_num); + + Gputop__Message message = GPUTOP__MESSAGE__INIT; + Gputop__HWID hw_id = GPUTOP__HW__ID__INIT; + + message.reply_uuid = request->uuid; + message.cmd_case = GPUTOP__MESSAGE__CMD_HW_ID; + + hw_id.n_ctx_hw_id = current_vgpu_num; + hw_id.ctx_hw_id = ctx_hw_id; + + hw_id.n_vgpu_id = current_vgpu_num; + hw_id.vgpu_id = vgpu_id; + + message.hw_id = &hw_id; + send_pb_message(conn, &message.base); +} + static void handle_get_process_info(h2o_websocket_conn_t *conn, Gputop__Request *request) @@ -1330,6 +1394,10 @@ static void on_ws_message(h2o_websocket_conn_t *conn, case GPUTOP__REQUEST__REQ_TEST_LOG: fprintf(stderr, "TEST LOG: %s\n", request->test_log); break; + case GPUTOP__REQUEST__REQ_GET_HW_ID_MAP: + fprintf(stderr, "Get HW_ID_MAP request received\n"); + handle_get_hw_id_map(conn, request); + break; case GPUTOP__REQUEST__REQ__NOT_SET: assert(0); } diff --git a/gputop-webui/ajax/metrics.html b/gputop-webui/ajax/metrics.html index 278e4131..e6c3aeed 100644 --- a/gputop-webui/ajax/metrics.html +++ b/gputop-webui/ajax/metrics.html @@ -3,6 +3,12 @@
+