From a7bda1bff030d2791e0781b9bfdee1671ce3e475 Mon Sep 17 00:00:00 2001 From: Junyi Ye <294572668@qq.com> Date: Wed, 24 Apr 2024 14:42:15 +0800 Subject: [PATCH 1/3] Change the output order of PmuData to the dictionary order of event names. --- pmu/pmu_list.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 95d9760..c2f7787 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -381,6 +381,12 @@ namespace KUNPENG_PMU { } } + + bool comparePmuData(const PmuData& data1, const PmuData& data2) + { + return strcmp(data1.evt, data2.evt) < 0; + } + void PmuList::AggregateUncoreData(const unsigned pd, const vector& evData, vector& newEvData) { // One count for same parent according to parentEventMap. @@ -406,6 +412,7 @@ namespace KUNPENG_PMU { for (const auto& pair : dataMap) { newEvData.emplace_back(pair.second); } + std::sort(newEvData.begin(), newEvData.end(), comparePmuData); } vector& PmuList::ExchangeToUserData(const unsigned pd) -- Gitee From 14d8213bbf7fa977c1a2eb6636ccaa99bac59116 Mon Sep 17 00:00:00 2001 From: Junyi Ye <294572668@qq.com> Date: Wed, 24 Apr 2024 15:42:59 +0800 Subject: [PATCH 2/3] Improve uncore event querying. --- pmu/pmu.cpp | 5 +++-- pmu/pmu_list.cpp | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index b196a5f..15a9bd9 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -126,11 +126,12 @@ void AppendChildEvents(char* evt, unordered_map& eventSplitMap) string strName(evt); auto findSlash = strName.find('/'); string devName = strName.substr(0, findSlash); - string evtName = strName.substr(devName.size(), strName.size() - devName.size() + 1); + string evtName = strName.substr(devName.size() + , strName.size() - 1 - devName.size() + 1); auto numEvt = uncoreEventPair.first; auto uncoreEventList = uncoreEventPair.second; for (int i = 0; i < numEvt; ++i) { - if (std::strncmp(uncoreEventList[i], devName.c_str(), devName.length()) == 0) { + if (std::strncmp(uncoreEventList[i], devName.c_str(), devName.length()) == 0 && + std::strncmp(uncoreEventList[i], evtName.c_str(), evtName.length()) == 0) { eventSplitMap.emplace(uncoreEventList[i], evt); } } diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index c2f7787..acdaab0 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -14,6 +14,7 @@ * performance counters in the KUNPENG_PMU namespace ******************************************************************************/ #include +#include #include #include "linked_list.h" #include "cpu_map.h" -- Gitee From efbf5130a0fc54d1505aa88199d2aa75b280ebad Mon Sep 17 00:00:00 2001 From: Junyi Ye <294572668@qq.com> Date: Wed, 24 Apr 2024 17:23:41 +0800 Subject: [PATCH 3/3] Improve uncore event querying. --- pmu/pfm/pfm_name.cpp | 94 ++++++++++++++++++++++---------------------- pmu/pmu.cpp | 9 +++-- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/pmu/pfm/pfm_name.cpp b/pmu/pfm/pfm_name.cpp index a587dc4..e99d74b 100644 --- a/pmu/pfm/pfm_name.cpp +++ b/pmu/pfm/pfm_name.cpp @@ -30,48 +30,48 @@ const char* KUNPENG_PMU::HIP_A::CORE::CYCLES = "cycles"; const char* KUNPENG_PMU::HIP_A::CORE::INSTRUCTIONS = "instructions"; const char* KUNPENG_PMU::HIP_A::CORE::STALLED_CYCLES_BACKEND = "stalled-cycles-backend"; const char* KUNPENG_PMU::HIP_A::CORE::STALLED_CYCLES_FRONTEND = "stalled-cycles-frontend"; -const char* KUNPENG_PMU::HIP_A::CORE::L1_DCACHE_LOAD_MISSES = "l1-dcache-load-misses"; +const char* KUNPENG_PMU::HIP_A::CORE::L1_DCACHE_LOAD_MISSES = "L1-dcache-load-misses"; const char* KUNPENG_PMU::HIP_A::CORE::IDLE_CYCLES_BACKEND = "idle-cycles-backend"; -const char* KUNPENG_PMU::HIP_A::CORE::L1_ICACHE_LOAD_MISSES = "l1-icache-load-misses"; +const char* KUNPENG_PMU::HIP_A::CORE::L1_ICACHE_LOAD_MISSES = "L1-icache-load-misses"; const char* KUNPENG_PMU::HIP_A::CORE::IDLE_CYCLES_FRONTEND = "idle-cycles-frontend"; -const char* KUNPENG_PMU::HIP_A::CORE::L1_ICACHE_LOADS = "l1-icache-loads"; -const char* KUNPENG_PMU::HIP_A::CORE::LLC_LOAD_MISSES = "llc-load-misses"; -const char* KUNPENG_PMU::HIP_A::CORE::LLC_LOADS = "llc-loads"; +const char* KUNPENG_PMU::HIP_A::CORE::L1_ICACHE_LOADS = "L1-icache-loads"; +const char* KUNPENG_PMU::HIP_A::CORE::LLC_LOAD_MISSES = "LLC-load-misses"; +const char* KUNPENG_PMU::HIP_A::CORE::LLC_LOADS = "LLC-loads"; const char* KUNPENG_PMU::HIP_A::CORE::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::HIP_A::CORE::BRANCH_LOADS = "branch-loads"; -const char* KUNPENG_PMU::HIP_A::CORE::DTLB_LOAD_MISSES = "dtlb-load-misses"; -const char* KUNPENG_PMU::HIP_A::CORE::DTLB_LOADS = "dtlb-loads"; -const char* KUNPENG_PMU::HIP_A::CORE::ITLB_LOAD_MISSES = "itlb-load-misses"; -const char* KUNPENG_PMU::HIP_A::CORE::ITLB_LOADS = "itlb-loads"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_RD = "l1d-cache-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WR = "l1d-cache-wr"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_REFILL_RD = "l1d-cache-refill-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_REFILL_WR = "l1d-cache-refill-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WB_VICTIM = "l1d-cache-wb-victim"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WB_CLEAN = "l1d-cache-wb-clean"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_INVAL = "l1d-cache-inval"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_REFILL_RD = "l1d-tlb-refill-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_REFILL_WR = "l1d-tlb-refill-wr"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_RD = "l1d-tlb-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_WR = "l1d-tlb-wr"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_RD = "l2d-cache-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WR = "l2d-cache-wr"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_REFILL_RD = "l2d-cache-refill-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_REFILL_WR = "l2d-cache-refill-rd"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WB_VICTIM = "l2d-cache-wb-victim"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WB_CLEAN = "l2d-cache-wb-clean"; -const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_INVAL = "l2d-cache-inval"; -const char* KUNPENG_PMU::HIP_A::CORE::L1I_CACHE_PRF = "l1i-cache-prf"; -const char* KUNPENG_PMU::HIP_A::CORE::L1I_CACHE_PRF_REFILL = "l1i-cache-prf-refill"; -const char* KUNPENG_PMU::HIP_A::CORE::IQ_IS_EMPTY = "iq-is-empty"; -const char* KUNPENG_PMU::HIP_A::CORE::IF_IS_STALL = "if-is-stall"; -const char* KUNPENG_PMU::HIP_A::CORE::FETCH_BUBBLE = "fetch-bubble"; -const char* KUNPENG_PMU::HIP_A::CORE::PRF_REQ = "prf-req"; -const char* KUNPENG_PMU::HIP_A::CORE::HIT_ON_PRF = "hit-on-prf"; -const char* KUNPENG_PMU::HIP_A::CORE::EXE_STALL_CYCLE = "exe-stall-cycle"; -const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_ANYLOAD = "mem-stall-anyload"; -const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_L1MISS = "mem-stall-l1miss"; -const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_L2MISS = "mem-stall-l2miss"; +const char* KUNPENG_PMU::HIP_A::CORE::DTLB_LOAD_MISSES = "dTLB-load-misses"; +const char* KUNPENG_PMU::HIP_A::CORE::DTLB_LOADS = "dTLB-loads"; +const char* KUNPENG_PMU::HIP_A::CORE::ITLB_LOAD_MISSES = "iTLB-load-misses"; +const char* KUNPENG_PMU::HIP_A::CORE::ITLB_LOADS = "iTLB-loads"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_RD = "l1d_cache_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WR = "l1d_cache_wr"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_REFILL_RD = "l1d_cache_refill_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_REFILL_WR = "l1d_cache_refill_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WB_VICTIM = "l1d_cache_wb_victim"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_WB_CLEAN = "l1d_cache_wb_clean"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_CACHE_INVAL = "l1d_cache_inval"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_REFILL_RD = "l1d_tlb_refill_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_REFILL_WR = "l1d_tlb_refill_wr"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_RD = "l1d_tlb_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L1D_TLB_WR = "l1d_tlb_wr"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_RD = "l2d_cache_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WR = "l2d_cache_wr"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_REFILL_RD = "l2d_cache_refill_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_REFILL_WR = "l2d_cache_refill_rd"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WB_VICTIM = "l2d_cache_wb_victim"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_WB_CLEAN = "l2d_cache_wb_clean"; +const char* KUNPENG_PMU::HIP_A::CORE::L2D_CACHE_INVAL = "l2d_cache_inval"; +const char* KUNPENG_PMU::HIP_A::CORE::L1I_CACHE_PRF = "l1i_cache_prf"; +const char* KUNPENG_PMU::HIP_A::CORE::L1I_CACHE_PRF_REFILL = "l1i_cache_prf_refill"; +const char* KUNPENG_PMU::HIP_A::CORE::IQ_IS_EMPTY = "iq_is_empty"; +const char* KUNPENG_PMU::HIP_A::CORE::IF_IS_STALL = "if_is_stall"; +const char* KUNPENG_PMU::HIP_A::CORE::FETCH_BUBBLE = "fetch_bubble"; +const char* KUNPENG_PMU::HIP_A::CORE::PRF_REQ = "prf_req"; +const char* KUNPENG_PMU::HIP_A::CORE::HIT_ON_PRF = "hit_on_prf"; +const char* KUNPENG_PMU::HIP_A::CORE::EXE_STALL_CYCLE = "exe_stall_cycle"; +const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_ANYLOAD = "mem_stall_anyload"; +const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_L1MISS = "mem_stall_l1miss"; +const char* KUNPENG_PMU::HIP_A::CORE::MEM_STALL_L2MISS = "mem_stall_l2miss"; /** * CORE events for HIP_B @@ -84,16 +84,16 @@ const char* KUNPENG_PMU::HIP_B::CORE::CYCLES = "cycles"; const char* KUNPENG_PMU::HIP_B::CORE::INSTRUCTIONS = "instructions"; const char* KUNPENG_PMU::HIP_B::CORE::STALLED_CYCLES_BACKEND = "stalled-cycles-backend"; const char* KUNPENG_PMU::HIP_B::CORE::STALLED_CYCLES_FRONTEND = "stalled-cycles-frontend"; -const char* KUNPENG_PMU::HIP_B::CORE::L1_DCACHE_LOAD_MISSES = "l1-dcache-load-misses"; +const char* KUNPENG_PMU::HIP_B::CORE::L1_DCACHE_LOAD_MISSES = "L1-dcache-load-misses"; const char* KUNPENG_PMU::HIP_B::CORE::IDLE_CYCLES_BACKEND = "idle-cycles-backend"; -const char* KUNPENG_PMU::HIP_B::CORE::L1_ICACHE_LOAD_MISSES = "l1-icache-load-misses"; +const char* KUNPENG_PMU::HIP_B::CORE::L1_ICACHE_LOAD_MISSES = "L1-icache-load-misses"; const char* KUNPENG_PMU::HIP_B::CORE::IDLE_CYCLES_FRONTEND = "idle-cycles-frontend"; -const char* KUNPENG_PMU::HIP_B::CORE::L1_ICACHE_LOADS = "l1-icache-loads"; -const char* KUNPENG_PMU::HIP_B::CORE::LLC_LOAD_MISSES = "llc-load-misses"; -const char* KUNPENG_PMU::HIP_B::CORE::LLC_LOADS = "llc-loads"; +const char* KUNPENG_PMU::HIP_B::CORE::L1_ICACHE_LOADS = "L1-icache-loads"; +const char* KUNPENG_PMU::HIP_B::CORE::LLC_LOAD_MISSES = "LLC-load-misses"; +const char* KUNPENG_PMU::HIP_B::CORE::LLC_LOADS = "LLC-loads"; const char* KUNPENG_PMU::HIP_B::CORE::BRANCH_LOAD_MISSES = "branch-load-misses"; const char* KUNPENG_PMU::HIP_B::CORE::BRANCH_LOADS = "branch-loads"; -const char* KUNPENG_PMU::HIP_B::CORE::DTLB_LOAD_MISSES = "dtlb-load-misses"; -const char* KUNPENG_PMU::HIP_B::CORE::DTLB_LOADS = "dtlb-loads"; -const char* KUNPENG_PMU::HIP_B::CORE::ITLB_LOAD_MISSES = "itlb-load-misses"; -const char* KUNPENG_PMU::HIP_B::CORE::ITLB_LOADS = "itlb-loads"; \ No newline at end of file +const char* KUNPENG_PMU::HIP_B::CORE::DTLB_LOAD_MISSES = "dTLB-load-misses"; +const char* KUNPENG_PMU::HIP_B::CORE::DTLB_LOADS = "dTLB-loads"; +const char* KUNPENG_PMU::HIP_B::CORE::ITLB_LOAD_MISSES = "iTLB-load-misses"; +const char* KUNPENG_PMU::HIP_B::CORE::ITLB_LOADS = "iTLB-loads"; \ No newline at end of file diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 15a9bd9..5c8d888 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -126,13 +126,14 @@ void AppendChildEvents(char* evt, unordered_map& eventSplitMap) string strName(evt); auto findSlash = strName.find('/'); string devName = strName.substr(0, findSlash); - string evtName = strName.substr(devName.size() + , strName.size() - 1 - devName.size() + 1); + string evtName = strName.substr(devName.size() + 1, strName.size() - 1 - (devName.size() + 1)); auto numEvt = uncoreEventPair.first; auto uncoreEventList = uncoreEventPair.second; for (int i = 0; i < numEvt; ++i) { - if (std::strncmp(uncoreEventList[i], devName.c_str(), devName.length()) == 0 && - std::strncmp(uncoreEventList[i], evtName.c_str(), evtName.length()) == 0) { - eventSplitMap.emplace(uncoreEventList[i], evt); + auto uncoreEvent = uncoreEventList[i]; + if (strncmp(uncoreEvent, devName.c_str(), devName.length()) == 0 && + strstr(uncoreEvent, evtName.c_str()) != nullptr) { + eventSplitMap.emplace(uncoreEvent, evt); } } } -- Gitee