diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index f3dfcc88b99..f5d6ae4ca4e 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -267,6 +267,10 @@ template class scheduler_tmpl_t { * with no timeout but do not include a corresponding * #TRACE_MARKER_TYPE_SYSCALL_SCHEDULE for wakeup, an input could remain * unscheduled. + * + * Also beware that this can skip over trace header entries (like + * #TRACE_MARKER_TYPE_FILETYPE), which should ideally be obtained from the + * #dynamorio::drmemtrace::memtrace_stream_t API instead. */ std::vector regions_of_interest; }; @@ -1041,7 +1045,8 @@ template class scheduler_tmpl_t { * #TRACE_MARKER_TYPE_VERSION record in the trace header. * This can be queried prior to explicitly retrieving any records from * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t:: - * scheduler_options_t.read_inputs_in_init is false. + * scheduler_options_t.read_inputs_in_init is false (which is the + * case for online drmemtrace analysis). */ uint64_t get_version() const override @@ -1055,7 +1060,8 @@ template class scheduler_tmpl_t { * #TRACE_MARKER_TYPE_FILETYPE record in the trace header. * This can be queried prior to explicitly retrieving any records from * output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t:: - * scheduler_options_t.read_inputs_in_init is false. + * scheduler_options_t.read_inputs_in_init is false (which is the + * case for online drmemtrace analysis). */ uint64_t get_filetype() const override diff --git a/clients/drcachesim/tests/offline-skip_instrs_opcode_mix.templatex b/clients/drcachesim/tests/offline-skip_instrs_opcode_mix.templatex new file mode 100644 index 00000000000..676004cc6c0 --- /dev/null +++ b/clients/drcachesim/tests/offline-skip_instrs_opcode_mix.templatex @@ -0,0 +1,8 @@ +Hello, world! +Opcode mix tool results: + *[0-9]* : total executed instructions + *[0-9]* : [a-z ]* + *[0-9]* : [a-z ]* + *[0-9]* : [a-z ]* + *[0-9]* : [a-z ]* +.* diff --git a/clients/drcachesim/tests/opcode_mix_test.cpp b/clients/drcachesim/tests/opcode_mix_test.cpp index 89074de2f30..c46afd02f96 100644 --- a/clients/drcachesim/tests/opcode_mix_test.cpp +++ b/clients/drcachesim/tests/opcode_mix_test.cpp @@ -82,6 +82,22 @@ class test_opcode_mix_t : public opcode_mix_t { instrlist_t *instrs_; }; +class test_stream_t : public default_memtrace_stream_t { +public: + test_stream_t(uint64_t filetype) + : filetype_(filetype) + { + } + uint64_t + get_filetype() const override + { + return filetype_; + } + +private: + uint64_t filetype_; +}; + std::string check_opcode_mix(void *drcontext, bool use_module_mapper) { @@ -92,10 +108,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper) instrlist_t *ilist = instrlist_create(drcontext); instrlist_append(ilist, nop); instrlist_append(ilist, ret); + uint64_t filetype = OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | + (use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS); std::vector memref_setup = { - { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, - OFFLINE_FILE_TYPE_SYSCALL_NUMBERS | - (use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS)), + { gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, static_cast(filetype)), nullptr }, { gen_instr(TID_A), nop }, { gen_instr(TID_A), ret }, @@ -116,9 +132,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper) // Set up the second nop memref to reuse the same encoding as the first nop. memrefs[3].instr.encoding_is_new = false; } + test_stream_t stream(filetype); test_opcode_mix_t opcode_mix(ilist_for_test); - opcode_mix.initialize(); - void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, nullptr); + opcode_mix.initialize_stream(/*serial_stream=*/nullptr); + void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, &stream); for (const memref_t &memref : memrefs) { if (!opcode_mix.parallel_shard_memref(shard_data, memref)) { return opcode_mix.parallel_shard_error(shard_data); diff --git a/clients/drcachesim/tools/common/decode_cache.h b/clients/drcachesim/tools/common/decode_cache.h index 20ca4db6ecc..d50c70593ed 100644 --- a/clients/drcachesim/tools/common/decode_cache.h +++ b/clients/drcachesim/tools/common/decode_cache.h @@ -423,15 +423,22 @@ template class decode_cache_t : public decode_cache_base_t { * indeed has embedded encodings or not, and initializing the * #dynamorio::drmemtrace::module_mapper_t if the module path is provided. * - * It is important to note that the trace filetype may be obtained using the - * get_filetype() API on a #dynamorio::drmemtrace::memtrace_stream_t. However, - * instances of #dynamorio::drmemtrace::memtrace_stream_t have the filetype - * available at init time (before the #TRACE_MARKER_TYPE_FILETYPE is actually - * returned by the stream) only for offline analysis. In the online analysis - * case, the user would need to call this API after init time when the - * #TRACE_MARKER_TYPE_FILETYPE marker is seen (which is fine as it occurs - * before any instr record). For tools intended for both offline and online - * analysis, following just the latter strategy would work fine. + * It is important to note some nuances in how the filetype can be obtained: + * - the trace filetype may be obtained using the get_filetype() API on the + * #dynamorio::drmemtrace::memtrace_stream_t. However, instances of + * #dynamorio::drmemtrace::memtrace_stream_t have the filetype available at + * init time (in the #dynamorio::drmemtrace::analysis_tool_t::initialize() + * or #dynamorio::drmemtrace::analysis_tool_t::initialize_stream() + * functions) only for offline analysis, not for online analysis. + * - when using the -skip_instrs or -skip_timestamp analyzer options, all + * initial header entries are skipped over. Therefore, the analysis tool + * may not see a #TRACE_MARKER_TYPE_FILETYPE at all. + * + * The most reliable way to obtain the filetype (and call this init() API), + * would be to use #dynamorio::drmemtrace::memtrace_stream_t::get_filetype() + * just before processing the first instruction memref in the + * #dynamorio::drmemtrace::analysis_tool_t::process_memref() or + * #dynamorio::drmemtrace::analysis_tool_t::parallel_shard_memref() APIs. * * If the \p module_file_path parameter is not empty, it instructs the * #dynamorio::drmemtrace::decode_cache_t object that it should look for the diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index e58d1ffd20a..19999ff46cf 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -85,6 +85,17 @@ bool opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext, offline_file_type_t filetype) { + // XXX: Perhaps this should be moved into decode_cache_t::init(). + // We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS + // is not a real ISA and can coexist with any real architecture. + if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype) && + !TESTANY(build_target_arch_type(), filetype)) { + shard->error = std::string("Architecture mismatch: trace recorded on ") + + trace_arch_string(static_cast(filetype)) + + " but tool built for " + trace_arch_string(build_target_arch_type()); + return false; + } + shard->decode_cache = std::unique_ptr>(new decode_cache_t( dcontext, @@ -100,9 +111,12 @@ opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext, } std::string -opcode_mix_t::initialize() +opcode_mix_t::initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream) { dcontext_.dcontext = dr_standalone_init(); + if (serial_stream != nullptr) { + serial_shard_.stream = serial_stream; + } return ""; } @@ -125,6 +139,7 @@ opcode_mix_t::parallel_shard_init_stream( dynamorio::drmemtrace::memtrace_stream_t *shard_stream) { auto shard = new shard_data_t(); + shard->stream = shard_stream; std::lock_guard guard(shard_map_mutex_); shard_map_[shard_index] = shard; return reinterpret_cast(shard); @@ -145,22 +160,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { shard_data_t *shard = reinterpret_cast(shard_data); if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { - shard->filetype = static_cast(memref.marker.marker_value); - /* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS - * is not a real ISA and can coexist with any real architecture. - */ - if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, - memref.marker.marker_value) && - !TESTANY(build_target_arch_type(), memref.marker.marker_value)) { - shard->error = std::string("Architecture mismatch: trace recorded on ") + - trace_arch_string(static_cast( - memref.marker.marker_value)) + - " but tool built for " + trace_arch_string(build_target_arch_type()); - return false; - } - } else if (memref.marker.type == TRACE_TYPE_MARKER && - memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { + memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) { #ifdef AARCH64 const int new_vl_bits = memref.marker.marker_value * 8; if (dr_get_vector_length() != new_vl_bits) { @@ -177,18 +177,19 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } /* At this point we start processing memref instructions, so we're past the header of - * the trace, which contains the trace filetype. If we didn't encounter a - * TRACE_MARKER_TYPE_FILETYPE we return an error and stop processing the trace. - * We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the + * the trace, which contains the trace filetype. If -skip_instrs was used, we may not + * have seen TRACE_MARKER_TYPE_FILETYPE itself but the memtrace_stream_t should be + * able to provide it to us. + */ + shard->filetype = static_cast(shard->stream->get_filetype()); + + /* We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the * enum values of offline_file_type_t (e.g., OFFLINE_FILE_TYPE_ARCH_), so * OFFLINE_FILE_TYPE_DEFAULT (== 0) should never be present alone and can be used as * uninitialized value of filetype for an error check. * XXX i#6812: we could allow traces that have some shards with no filetype, as long * as there is at least one shard with it, by caching the filetype from shards that * have it and using that one. We can do this using memtrace_stream_t::get_filetype(), - * which requires updating opcode_mix to use parallel_shard_init_stream() rather than - * the current (and deprecated) parallel_shard_init(). However, we should first decide - * whether we want to allow traces that have some shards without a filetype. */ if (shard->filetype == OFFLINE_FILE_TYPE_DEFAULT) { shard->error = "No file type found in this shard"; diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 21c8ae11235..48030eb9991 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -62,7 +62,7 @@ class opcode_mix_t : public analysis_tool_t { const std::string &alt_module_dir = ""); virtual ~opcode_mix_t(); std::string - initialize() override; + initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream) override; bool process_memref(const memref_t &memref) override; bool @@ -142,20 +142,14 @@ class opcode_mix_t : public analysis_tool_t { struct shard_data_t { shard_data_t() - : instr_count(0) - , last_trace_module_start(nullptr) - , last_trace_module_size(0) - , last_mapped_module_start(nullptr) { } - int64_t instr_count; + int64_t instr_count = 0; std::unordered_map opcode_counts; std::unordered_map category_counts; std::string error; - app_pc last_trace_module_start; - size_t last_trace_module_size; - app_pc last_mapped_module_start; + dynamorio::drmemtrace::memtrace_stream_t *stream = nullptr; std::unique_ptr> decode_cache; offline_file_type_t filetype = OFFLINE_FILE_TYPE_DEFAULT; }; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 15a4ce80424..526395785a9 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4452,6 +4452,7 @@ if (BUILD_CLIENTS) # TODO i#3544: Port tests to RISC-V 64 torunonly_drcacheoff(opcode_mix ${ci_shared_app} "" "@-tool@opcode_mix" "") + # Ensure the tool works without the raw/ subdir. set(tool.drcacheoff.opcode_mix_postcmd "firstglob@${drraw2trace_path}@-indir@${dir_prefix}.*.dir") @@ -4460,6 +4461,9 @@ if (BUILD_CLIENTS) set(tool.drcacheoff.opcode_mix_postcmd3 "firstglob@${drcachesim_path}@-indir@${dir_prefix}.*.dir@-tool@opcode_mix") + torunonly_drcacheoff(skip_instrs_opcode_mix ${ci_shared_app} "" + "@-tool@opcode_mix@-skip_instrs@1" "") + torunonly_drcacheoff(view ${ci_shared_app} "" "@-tool@view@-sim_refs@16384" "") unset(tool.drcacheoff.view_rawtemp) # Use preprocessor