Skip to content

Commit

Permalink
!17009 [PROF] Profiler support AicMetrics MemoryAccess feature
Browse files Browse the repository at this point in the history
Merge pull request !17009 from wangjie/memory_access_feature_master
  • Loading branch information
wangjie authored and it-is-a-robot committed Dec 18, 2024
1 parent 6ea73e1 commit 8b26c6d
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 7 deletions.
1 change: 1 addition & 0 deletions third_party/acl/inc/acl/acl_prof.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ typedef enum {
ACL_AICORE_RESOURCE_CONFLICT_RATIO = 4,
ACL_AICORE_MEMORY_UB = 5,
ACL_AICORE_L2_CACHE = 6,
ACL_AICORE_MEMORY_ACCESS = 8,
ACL_AICORE_NONE = 0xFF
} aclprofAicoreMetrics;

Expand Down
2 changes: 2 additions & 0 deletions torch_npu/csrc/profiler/feature_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ const static char* VERSION = "master\0";

static std::unordered_map<std::string, FeatureType> NAME_TABLE = {
{"ATTR", FeatureType::FEATURE_ATTR},
{"MemoryAccess", FeatureType::FEATURE_MEMORY_ACCESS}
};

// featureName, featureVersion
static std::unordered_map<FeatureType, std::string> FMK_FEATURES = {
{FeatureType::FEATURE_ATTR, "1"},
{FeatureType::FEATURE_MEMORY_ACCESS, "1"}
};
}

Expand Down
1 change: 1 addition & 0 deletions torch_npu/csrc/profiler/feature_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace profiler {
enum class FeatureType {
FEATURE_MIN = 0,
FEATURE_ATTR,
FEATURE_MEMORY_ACCESS,
FEATURE_MAX,
};
using torch_npu::toolkit::profiler::Utils;
Expand Down
21 changes: 17 additions & 4 deletions torch_npu/csrc/profiler/profiler_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ std::map<std::string, aclprofAicoreMetrics> ProfilerMgr::npu_metrics_map_ = {
{"ACL_AICORE_RESOURCE_CONFLICT_RATIO", ACL_AICORE_RESOURCE_CONFLICT_RATIO},
{"ACL_AICORE_MEMORY_UB", ACL_AICORE_MEMORY_UB},
{"ACL_AICORE_L2_CACHE", ACL_AICORE_L2_CACHE},
{"ACL_AICORE_MEMORY_ACCESS", ACL_AICORE_MEMORY_ACCESS},
{"ACL_AICORE_NONE", ACL_AICORE_NONE},
};

Expand All @@ -34,7 +35,19 @@ std::map<std::string, uint64_t> ProfilerMgr::trace_level_map_ = {
};

constexpr uint32_t capacity_ = 1048576; // 2^20, Experience value for default ringbuffer size for single data
constexpr uint32_t trace_capacity_ = 128; // 2^7, Experience value for python trace data ringbuffer size for batch data
constexpr uint32_t trace_capacity_ = 128; // 2^7, Experience value for python trace data ringbuffer size for batch data

aclprofAicoreMetrics CheckAicMetricsFeature(aclprofAicoreMetrics aic_metrics, int8_t level)
{
if (aic_metrics == ACL_AICORE_MEMORY_ACCESS &&
!FeatureMgr::GetInstance()->IsSupportFeature(FeatureType::FEATURE_MEMORY_ACCESS)) {
ASCEND_LOGW("AiCMetrics is not supported to set to MemoryAccess.");
printf("[WARN]%s,%s:%u:AiCMetrics is not supported to set to MemoryAccess, reset to default.\n",
__FUNCTION__, __FILENAME__, __LINE__);
return (level >= 1 ? ACL_AICORE_PIPE_UTILIZATION : ACL_AICORE_NONE);
}
return aic_metrics;
}

ProfilerMgr::ProfilerMgr()
: report_enable_(false),
Expand Down Expand Up @@ -77,12 +90,14 @@ void ProfilerMgr::Start(const NpuTraceConfig &npu_config, bool cpu_trace)
c10_npu::npuSynchronizeDevice();
if (npu_trace_.load() == true) {
aclprofAicoreMetrics aic_metrics = ACL_AICORE_NONE;
int8_t level_int = trace_level_to_int_.find(npu_config.trace_level) != trace_level_to_int_.end() ?
trace_level_to_int_[npu_config.trace_level] : -1;
auto level_iter = trace_level_map_.find(npu_config.trace_level);
uint64_t datatype_config = (level_iter == trace_level_map_.end()) ? Level0 : trace_level_map_[npu_config.trace_level];
auto metrics_iter = npu_metrics_map_.find(npu_config.metrics);
if (metrics_iter != npu_metrics_map_.end() && npu_config.metrics.compare("ACL_AICORE_NONE") != 0) {
datatype_config |= ACL_PROF_AICORE_METRICS;
aic_metrics = npu_metrics_map_[npu_config.metrics];
aic_metrics = CheckAicMetricsFeature(npu_metrics_map_[npu_config.metrics], level_int);
}
if (npu_config.l2_cache) {
datatype_config |= ACL_PROF_L2CACHE;
Expand Down Expand Up @@ -111,8 +126,6 @@ void ProfilerMgr::Start(const NpuTraceConfig &npu_config, bool cpu_trace)
const uint32_t deviceNum = 1;
uint32_t deviceIdList[deviceNum] = {deviceId};
EnableMsProfiler(deviceIdList, deviceNum, aic_metrics, datatype_config);
int8_t level_int = trace_level_to_int_.find(npu_config.trace_level) != trace_level_to_int_.end() ?
trace_level_to_int_[npu_config.trace_level] : -1;
trace_level_.store(level_int);
}

Expand Down
1 change: 1 addition & 0 deletions torch_npu/profiler/analysis/prof_common_func/_constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class Constant(object):
AicMemoryUB = "ACL_AICORE_MEMORY_UB"
AicResourceConflictRatio = "ACL_AICORE_RESOURCE_CONFLICT_RATIO"
AicL2Cache = "ACL_AICORE_L2_CACHE"
AicMemoryAccess = "ACL_AICORE_MEMORY_ACCESS"
AicMetricsNone = "ACL_AICORE_NONE"
Db = "db"
Text = "text"
Expand Down
7 changes: 4 additions & 3 deletions torch_npu/profiler/experimental_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def supported_profiler_level():
def supported_ai_core_metrics():
return set((AiCMetrics.AiCoreNone, AiCMetrics.PipeUtilization, AiCMetrics.ArithmeticUtilization,
AiCMetrics.Memory, AiCMetrics.MemoryL0, AiCMetrics.MemoryUB,
AiCMetrics.ResourceConflictRatio, AiCMetrics.L2Cache))
AiCMetrics.ResourceConflictRatio, AiCMetrics.L2Cache, AiCMetrics.MemoryAccess))


def supported_export_type():
Expand All @@ -43,6 +43,7 @@ class AiCMetrics:
MemoryUB = Constant.AicMemoryUB
ResourceConflictRatio = Constant.AicResourceConflictRatio
L2Cache = Constant.AicL2Cache
MemoryAccess = Constant.AicMemoryAccess
AiCoreNone = Constant.AicMetricsNone


Expand Down Expand Up @@ -120,10 +121,10 @@ def _check_params(self):
if self._aic_metrics not in (
AiCMetrics.L2Cache, AiCMetrics.MemoryL0, AiCMetrics.Memory, AiCMetrics.MemoryUB,
AiCMetrics.PipeUtilization, AiCMetrics.ArithmeticUtilization, AiCMetrics.ResourceConflictRatio,
Constant.AicMetricsNone):
AiCMetrics.MemoryAccess, AiCMetrics.AiCoreNone):
print_warn_msg("Invalid parameter aic_metrics, reset it to default.")
if self._profiler_level == ProfilerLevel.Level0:
self._aic_metrics = Constant.AicMetricsNone
self._aic_metrics = AiCMetrics.AiCoreNone
else:
self._aic_metrics = AiCMetrics.PipeUtilization
if not isinstance(self._op_attr, bool):
Expand Down

0 comments on commit 8b26c6d

Please sign in to comment.