Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#7224 skipped header: Get filetype from stream in opcode_mix #7225

Merged
merged 9 commits into from
Jan 28, 2025
10 changes: 8 additions & 2 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* with no timeout but do not include a corresponding
* #TRACE_MARKER_TYPE_SYSCALL_SCHEDULE for wakeup, an input could remain
* unscheduled.
*
* Also beware that this can skip over trace header entries (like
* #TRACE_MARKER_TYPE_FILETYPE), which should ideally be obtained from the
* #dynamorio::drmemtrace::memtrace_stream_t API instead.
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
*/
std::vector<range_t> regions_of_interest;
};
Expand Down Expand Up @@ -1041,7 +1045,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* #TRACE_MARKER_TYPE_VERSION record in the trace header.
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
* scheduler_options_t.read_inputs_in_init is false (which is the
* case for online drmemtrace analysis).
*/
uint64_t
get_version() const override
Expand All @@ -1055,7 +1060,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* #TRACE_MARKER_TYPE_FILETYPE record in the trace header.
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
* scheduler_options_t.read_inputs_in_init is false (which is the
* case for online drmemtrace analysis).
*/
uint64_t
get_filetype() const override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Hello, world!
Opcode mix tool results:
*[0-9]* : total executed instructions
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
.*
27 changes: 22 additions & 5 deletions clients/drcachesim/tests/opcode_mix_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ class test_opcode_mix_t : public opcode_mix_t {
instrlist_t *instrs_;
};

class test_stream_t : public default_memtrace_stream_t {
public:
test_stream_t(uint64_t filetype)
: filetype_(filetype)
{
}
uint64_t
get_filetype() const override
{
return filetype_;
}

private:
uint64_t filetype_;
};

std::string
check_opcode_mix(void *drcontext, bool use_module_mapper)
{
Expand All @@ -92,10 +108,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper)
instrlist_t *ilist = instrlist_create(drcontext);
instrlist_append(ilist, nop);
instrlist_append(ilist, ret);
uint64_t filetype = OFFLINE_FILE_TYPE_SYSCALL_NUMBERS |
(use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS);
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE,
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS |
(use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS)),
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, static_cast<uintptr_t>(filetype)),
nullptr },
{ gen_instr(TID_A), nop },
{ gen_instr(TID_A), ret },
Expand All @@ -116,9 +132,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper)
// Set up the second nop memref to reuse the same encoding as the first nop.
memrefs[3].instr.encoding_is_new = false;
}
test_stream_t stream(filetype);
test_opcode_mix_t opcode_mix(ilist_for_test);
opcode_mix.initialize();
void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, nullptr);
opcode_mix.initialize_stream(/*serial_stream=*/nullptr);
void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, &stream);
for (const memref_t &memref : memrefs) {
if (!opcode_mix.parallel_shard_memref(shard_data, memref)) {
return opcode_mix.parallel_shard_error(shard_data);
Expand Down
25 changes: 16 additions & 9 deletions clients/drcachesim/tools/common/decode_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,15 +423,22 @@ template <class DecodeInfo> class decode_cache_t : public decode_cache_base_t {
* indeed has embedded encodings or not, and initializing the
* #dynamorio::drmemtrace::module_mapper_t if the module path is provided.
*
* It is important to note that the trace filetype may be obtained using the
* get_filetype() API on a #dynamorio::drmemtrace::memtrace_stream_t. However,
* instances of #dynamorio::drmemtrace::memtrace_stream_t have the filetype
* available at init time (before the #TRACE_MARKER_TYPE_FILETYPE is actually
* returned by the stream) only for offline analysis. In the online analysis
* case, the user would need to call this API after init time when the
* #TRACE_MARKER_TYPE_FILETYPE marker is seen (which is fine as it occurs
* before any instr record). For tools intended for both offline and online
* analysis, following just the latter strategy would work fine.
* It is important to note some nuances in how the filetype can be obtained:
* - the trace filetype may be obtained using the get_filetype() API on the
* #dynamorio::drmemtrace::memtrace_stream_t. However, instances of
* #dynamorio::drmemtrace::memtrace_stream_t have the filetype available at
* init time (in the #dynamorio::drmemtrace::analysis_tool_t::initialize()
* or #dynamorio::drmemtrace::analysis_tool_t::initialize_stream()
* functions) only for offline analysis, not for online analysis.
* - when using the -skip_instrs or -skip_timestamp analyzer options, all
* initial header entries are skipped over. Therefore, the analysis tool
* may not see a #TRACE_MARKER_TYPE_FILETYPE at all.
*
* The most reliable way to obtain the filetype (and call this init() API),
* would be to use #dynamorio::drmemtrace::memtrace_stream_t::get_filetype()
* just before processing the first instruction memref in the
* #dynamorio::drmemtrace::analysis_tool_t::process_memref() or
* #dynamorio::drmemtrace::analysis_tool_t::parallel_shard_memref() APIs.
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
*
* If the \p module_file_path parameter is not empty, it instructs the
* #dynamorio::drmemtrace::decode_cache_t object that it should look for the
Expand Down
47 changes: 24 additions & 23 deletions clients/drcachesim/tools/opcode_mix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,17 @@ bool
opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext,
offline_file_type_t filetype)
{
// XXX: Perhaps this should be moved into decode_cache_t::init().
// We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS
// is not a real ISA and can coexist with any real architecture.
if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype) &&
!TESTANY(build_target_arch_type(), filetype)) {
shard->error = std::string("Architecture mismatch: trace recorded on ") +
trace_arch_string(static_cast<offline_file_type_t>(filetype)) +
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}

shard->decode_cache =
std::unique_ptr<decode_cache_t<opcode_data_t>>(new decode_cache_t<opcode_data_t>(
dcontext,
Expand All @@ -100,9 +111,12 @@ opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext,
}

std::string
opcode_mix_t::initialize()
opcode_mix_t::initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream)
{
dcontext_.dcontext = dr_standalone_init();
if (serial_stream != nullptr) {
serial_shard_.stream = serial_stream;
}
return "";
}

Expand All @@ -125,6 +139,7 @@ opcode_mix_t::parallel_shard_init_stream(
dynamorio::drmemtrace::memtrace_stream_t *shard_stream)
{
auto shard = new shard_data_t();
shard->stream = shard_stream;
std::lock_guard<std::mutex> guard(shard_map_mutex_);
shard_map_[shard_index] = shard;
return reinterpret_cast<void *>(shard);
Expand All @@ -145,22 +160,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
{
shard_data_t *shard = reinterpret_cast<shard_data_t *>(shard_data);
if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
shard->filetype = static_cast<offline_file_type_t>(memref.marker.marker_value);
/* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS
* is not a real ISA and can coexist with any real architecture.
*/
if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS,
memref.marker.marker_value) &&
!TESTANY(build_target_arch_type(), memref.marker.marker_value)) {
shard->error = std::string("Architecture mismatch: trace recorded on ") +
trace_arch_string(static_cast<offline_file_type_t>(
memref.marker.marker_value)) +
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}
} else if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
#ifdef AARCH64
const int new_vl_bits = memref.marker.marker_value * 8;
if (dr_get_vector_length() != new_vl_bits) {
Expand All @@ -177,18 +177,19 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
}

/* At this point we start processing memref instructions, so we're past the header of
* the trace, which contains the trace filetype. If we didn't encounter a
* TRACE_MARKER_TYPE_FILETYPE we return an error and stop processing the trace.
* We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the
* the trace, which contains the trace filetype. If -skip_instrs was used, we may not
* have seen TRACE_MARKER_TYPE_FILETYPE itself but the memtrace_stream_t should be
* able to provide it to us.
*/
shard->filetype = static_cast<offline_file_type_t>(shard->stream->get_filetype());

/* We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the
* enum values of offline_file_type_t (e.g., OFFLINE_FILE_TYPE_ARCH_), so
* OFFLINE_FILE_TYPE_DEFAULT (== 0) should never be present alone and can be used as
* uninitialized value of filetype for an error check.
* XXX i#6812: we could allow traces that have some shards with no filetype, as long
* as there is at least one shard with it, by caching the filetype from shards that
* have it and using that one. We can do this using memtrace_stream_t::get_filetype(),
* which requires updating opcode_mix to use parallel_shard_init_stream() rather than
* the current (and deprecated) parallel_shard_init(). However, we should first decide
* whether we want to allow traces that have some shards without a filetype.
*/
if (shard->filetype == OFFLINE_FILE_TYPE_DEFAULT) {
shard->error = "No file type found in this shard";
Expand Down
12 changes: 3 additions & 9 deletions clients/drcachesim/tools/opcode_mix.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class opcode_mix_t : public analysis_tool_t {
const std::string &alt_module_dir = "");
virtual ~opcode_mix_t();
std::string
initialize() override;
initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream) override;
bool
process_memref(const memref_t &memref) override;
bool
Expand Down Expand Up @@ -142,20 +142,14 @@ class opcode_mix_t : public analysis_tool_t {

struct shard_data_t {
shard_data_t()
: instr_count(0)
, last_trace_module_start(nullptr)
, last_trace_module_size(0)
, last_mapped_module_start(nullptr)
{
}

int64_t instr_count;
int64_t instr_count = 0;
std::unordered_map<int, int64_t> opcode_counts;
std::unordered_map<uint, int64_t> category_counts;
std::string error;
app_pc last_trace_module_start;
size_t last_trace_module_size;
app_pc last_mapped_module_start;
dynamorio::drmemtrace::memtrace_stream_t *stream = nullptr;
std::unique_ptr<decode_cache_t<opcode_data_t>> decode_cache;
offline_file_type_t filetype = OFFLINE_FILE_TYPE_DEFAULT;
};
Expand Down
4 changes: 4 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4452,6 +4452,7 @@ if (BUILD_CLIENTS)
# TODO i#3544: Port tests to RISC-V 64
torunonly_drcacheoff(opcode_mix ${ci_shared_app} ""
"@-tool@opcode_mix" "")

# Ensure the tool works without the raw/ subdir.
set(tool.drcacheoff.opcode_mix_postcmd
"firstglob@${drraw2trace_path}@-indir@${dir_prefix}.*.dir")
Expand All @@ -4460,6 +4461,9 @@ if (BUILD_CLIENTS)
set(tool.drcacheoff.opcode_mix_postcmd3
"firstglob@${drcachesim_path}@-indir@${dir_prefix}.*.dir@-tool@opcode_mix")

torunonly_drcacheoff(skip_instrs_opcode_mix ${ci_shared_app} ""
"@-tool@opcode_mix@-skip_instrs@1" "")

torunonly_drcacheoff(view ${ci_shared_app} ""
"@-tool@view@-sim_refs@16384" "")
unset(tool.drcacheoff.view_rawtemp) # Use preprocessor
Expand Down
Loading