diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000000000000000000000000000000000000..5381dad67d5f6d7d3b17e83d8dd646df421a963c --- /dev/null +++ b/Doxyfile @@ -0,0 +1,409 @@ +# libkperf doxygen + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "libkperf" +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +OUTPUT_DIRECTORY = ./docs +CREATE_SUBDIRS = NO +CREATE_SUBDIRS_LEVEL = 8 +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = Chinese +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +JAVADOC_BANNER = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +PYTHON_DOCSTRING = YES +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +OPTIMIZE_OUTPUT_SLICE = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +TOC_INCLUDE_HEADINGS = 5 +MARKDOWN_ID_STYLE = DOXYGEN +AUTOLINK_SUPPORT = YES +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 +NUM_PROC_THREADS = 1 +TIMESTAMP = NO +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = NO +EXTRACT_PRIV_VIRTUAL = NO +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +RESOLVE_UNNAMED_PARAMS = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = SYSTEM +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_HEADERFILE = YES +SHOW_INCLUDE_FILES = YES +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_IF_INCOMPLETE_DOC = YES +WARN_NO_PARAMDOC = NO +WARN_IF_UNDOC_ENUM_VAL = NO +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LINE_FORMAT = "at line $line of file $file" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = +INPUT_ENCODING = UTF-8 +INPUT_FILE_ENCODING = +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cxxm \ + *.cpp \ + *.cppm \ + *.c++ \ + *.c++m \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.idl \ + *.ddl \ + *.odl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.ixx \ + *.l \ + *.cs \ + *.d \ + *.php \ + *.php4 \ + *.php5 \ + *.phtml \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.pyw \ + *.f90 \ + *.f95 \ + *.f03 \ + *.f08 \ + *.f18 \ + *.f \ + *.for \ + *.vhd \ + *.vhdl \ + *.ucf \ + *.qsf \ + *.ice +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = docs/ +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = +FORTRAN_COMMENT_AFTER = 72 +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +CLANG_ADD_INC_PATHS = YES +CLANG_OPTIONS = +CLANG_DATABASE_PATH = +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE = AUTO_LIGHT +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_DYNAMIC_MENUS = YES +HTML_DYNAMIC_SECTIONS = NO +HTML_CODE_FOLDING = YES +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_FEEDURL = +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +SITEMAP_URL = +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = NO +FULL_SIDEBAR = NO +ENUM_VALUES_PER_LINE = 4 +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +OBFUSCATE_EMAILS = YES +HTML_FORMULA_FORMAT = png +FORMULA_FONTSIZE = 10 +FORMULA_MACROFILE = +USE_MATHJAX = NO +MATHJAX_VERSION = MathJax_2 +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = +#--------------------------------------------------------------------------- +# Configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = +MAKEINDEX_CMD_NAME = makeindex +LATEX_MAKEINDEX_CMD = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +LATEX_EXTRA_STYLESHEET = +LATEX_EXTRA_FILES = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_BIB_STYLE = plain +LATEX_EMOJI_DIRECTORY = +#--------------------------------------------------------------------------- +# Configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# Configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_SUBDIR = +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# Configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_PROGRAMLISTING = YES +XML_NS_MEMB_FILE_SCOPE = NO +#--------------------------------------------------------------------------- +# Configuration options related to the DOCBOOK output +#--------------------------------------------------------------------------- +GENERATE_DOCBOOK = NO +DOCBOOK_OUTPUT = docbook +#--------------------------------------------------------------------------- +# Configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# Configuration options related to Sqlite3 output +#--------------------------------------------------------------------------- +GENERATE_SQLITE3 = NO +SQLITE3_OUTPUT = sqlite3 +SQLITE3_RECREATE_DB = YES +#--------------------------------------------------------------------------- +# Configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration options related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +EXTERNAL_PAGES = YES +#--------------------------------------------------------------------------- +# Configuration options related to diagram generator tools +#--------------------------------------------------------------------------- +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +DOT_NUM_THREADS = 0 +DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" +DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" +DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +UML_LIMIT_NUM_FIELDS = 10 +DOT_UML_DETAILS = NO +DOT_WRAP_THRESHOLD = 17 +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DIR_GRAPH_MAX_DEPTH = 1 +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = +DOTFILE_DIRS = +DIA_PATH = +DIAFILE_DIRS = +PLANTUML_JAR_PATH = +PLANTUML_CFG_FILE = +PLANTUML_INCLUDE_PATH = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +MSCGEN_TOOL = +MSCFILE_DIRS = diff --git a/README.en.md b/README.en.md index 72c61e1cca686eddd99f45f73851dfd97a35f0fb..6cb5c53b10ba65df65a583ebeaba912fee98a9c3 100644 --- a/README.en.md +++ b/README.en.md @@ -1,12 +1,15 @@ -# libkperf +# libkperf eglish readme #### Description + libkperf is a lightweight performance collection library on linux, that enables developers to perform performance collection in an API fashion. libkperf provides performance data in memory and allows develops to process data directly, reducing overhead of writing and reading perf.data. #### Supported CPU Architectures + - Kunpeng #### Supported OS + - openEuler - OpenCloudOS - TencentOS @@ -14,60 +17,75 @@ libkperf is a lightweight performance collection library on linux, that enables - CentOS #### Build + Minimum required GCC version: + - gcc-4.8.5 and glibc-2.17. Minimum required Python version: + - python-3.7. To build a library with C API: + ```shell git clone --recurse-submodules https://gitee.com/openeuler/libkperf.git cd libkperf bash build.sh install_path=/path/to/install ``` + Note: - If the compilation error message indicates that numa.h file is missing, you need to first install the corresponding numactl-devel package. - If you encounter a CMake error related to 'Found PythonInterp' during compilation and linking, you need to first install the required python3-devel package. To build a library with debug version: + ```shell bash build.sh install_path=/path/to/install buildType=debug ``` To build a python package: + ```shell bash build.sh install_path=/path/to/install python=true ``` To uninstall python package: + ```shell python3 -m pip uninstall -y libkperf ``` #### Documents + Refer to ```docs``` directory for detailed docs: + - [Detailed usage](./docs/Details_Usage.md) Refer to ```docs``` directory for python API specification docs: + - [Python API specification](./docs/Python_API.md) #### Instructions + All pmu functions are accomplished by the following interfaces: -* PmuOpen - Input pid, core id and event and Open pmu device. -* PmuEnable - Start collection. -* PmuRead - Read collection data. -* PmuDisable - Stop collection. -* PmuClose - Close pmu device. + +* PmuOpen + Input pid, core id and event and Open pmu device. +* PmuEnable + Start collection. +* PmuRead + Read collection data. +* PmuDisable + Stop collection. +* PmuClose + Close pmu device. Here are some examples: + * Get pmu count for a process. + ```C++ int pidList[1]; pidList[0] = pid; @@ -102,6 +120,7 @@ PmuClose(pd); ``` * Sample a process + ```C++ int pidList[1]; pidList[0] = pid; @@ -147,6 +166,7 @@ PmuClose(pd); ``` Python examples: + ```python import time from collections import defaultdict @@ -180,17 +200,19 @@ def Counting(): #### Quick Run Reference for Example Code: * **For C++ Example Code:** -You can place the sample code into the main function of a C++ source file, and include the header files related to this dynamic library (#include "symbol.h", #include "pmu.h", #include "pcerrc.h"). Then, use g++ to compile and link this dynamic library to generate an executable file that can be run. + You can place the sample code into the main function of a C++ source file, and include the header files related to this dynamic library (#include "symbol.h", #include "pmu.h", #include "pcerrc.h"). Then, use g++ to compile and link this dynamic library to generate an executable file that can be run. Compilation Command Reference: + ```bash g++ -o example example.cpp -I /install_path/include -L /install_path/lib -lkperf -lsym ``` * **For Python Example Code:** -You can place the sample code into the main function of a Python source file, and import the packages related to this dynamic library (import kperf, import ksym). Running the Python file will then utilize the functionalities provided by these packages. + You can place the sample code into the main function of a Python source file, and import the packages related to this dynamic library (import kperf, import ksym). Running the Python file will then utilize the functionalities provided by these packages. Run Command Reference: + ```bash python example.py -``` \ No newline at end of file +``` diff --git a/README.md b/README.md index 67cfa462af46ab8439a945478c06b26d504892ab..526d7ca41783d6b08492b38d386582173ae14230 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,15 @@ -# libkperf +# libkperf chinese readme #### 描述 libkperf是一个轻量级linux性能采集库,它能够让开发者以API的方式执行性能采集,包括pmu采样和符号解析。libkperf把采集数据内存化,使开发者能够在内存中直接处理采集数据,避免了读写perf.data带来的开销。 #### 支持的CPU架构 + - 鲲鹏 #### 支持的OS + - openEuler - OpenCloudOS - TencentOS @@ -15,49 +17,62 @@ libkperf是一个轻量级linux性能采集库,它能够让开发者以API的 - CentOS #### 编译 + 最低依赖gcc版本: + - gcc-4.8.5 和 glibc-2.17 最低依赖python版本: + - python-3.7 编译生成动态库和C的API: + ```shell git clone --recurse-submodules https://gitee.com/openeuler/libkperf.git cd libkperf bash build.sh install_path=/path/to/install ``` + 说明: + - 如果编译报错提示没有numa.h文件,需要先安装对应的numactl-devel包。 - 如果编译连接在Found PythonInterp报CMake错误,需要先安装所需的python3-devel包。 如果想要编译调试版本: + ```shell bash build.sh install_path=/path/to/install buildType=debug ``` 如果想要编译python包: + ```shell bash build.sh install_path=/path/to/install python=true ``` 安装后若需要卸载python库, 可以执行下述命令 + ```shell python3 -m pip uninstall -y libkperf ``` #### 文档 + 详细文档可以参考docs目录: + - [详细使用文档](./docs/Details_Usage.md) Python API文档可以参考docs目录: + - [Python API说明文档](./docs/Python_API.md) #### 快速使用 -主要有以下几个API: +主要有以下几个API: + - PmuOpen - 输入pid、core id和event,打开pmu设备。 + 输入pid、core id和event,打开pmu设备。 - PmuEnable 开始收集。 - PmuRead @@ -67,9 +82,9 @@ Python API文档可以参考docs目录: - PmuClose 关闭pmu设备。 -以下是一些示例: +以下是一些示例: -- 获取进程的pmu计数。 +- 获取进程的pmu计数。 ```C++ int pidList[1]; @@ -104,6 +119,7 @@ PmuClose(pd); ``` - 对进程进行采样 + ```C++ int pidList[1]; pidList[0] = pid; @@ -146,6 +162,7 @@ PmuClose(pd); ``` Python 例子: + ```python import time from collections import defaultdict @@ -178,21 +195,22 @@ def Counting(): ``` - #### 示例代码快速运行参考: * **针对C++示例代码:** -可以将示例代码放到一个c++源文件的main函数中,并引用此动态库相关的头文件(#include "symbol.h"、#include "pmu.h"、#include "pcerrc.h"),再使用g++编译链接此动态库,生成可执行文件即可运行。 + 可以将示例代码放到一个c++源文件的main函数中,并引用此动态库相关的头文件(#include "symbol.h"、#include "pmu.h"、#include "pcerrc.h"),再使用g++编译链接此动态库,生成可执行文件即可运行。 编译指令参考: + ```bash g++ -o example example.cpp -I /install_path/include -L /install_path/lib -lkperf -lsym ``` * **针对python示例代码:** -可以将示例代码放到一个python源文件的main函数中,并导入此动态库相关的头文件包(import kperf、import ksym),在运行此python文件即可。 + 可以将示例代码放到一个python源文件的main函数中,并导入此动态库相关的头文件包(import kperf、import ksym),在运行此python文件即可。 运行指令参考: + ```bash python example.py -``` \ No newline at end of file +``` diff --git a/build.sh b/build.sh index fc60cdf8fefe5ca08142ac4ddb68e4b7604da124..f6b208e8cf0b404ca8215cd033842e4147e6a87c 100644 --- a/build.sh +++ b/build.sh @@ -118,5 +118,4 @@ main() { } # bash build.sh test=true installPath=/home/ build_type=Release .The last three settings are optional. -main $@ - +main $@ \ No newline at end of file diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index b356b2b46157d9f71abe7868179d6f214644b46c..00debc3c0694f9244b10c03226de2669d3baef0f 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -1,11 +1,15 @@ -Details -============ +Details Usage +============= + ### Counting + libkperf提供Counting模式,类似于perf stat功能。 例如,如下perf命令: + ``` perf stat -e cycles,branch-misses ``` + 该命令是对系统采集cycles和branch-misses这两个事件的计数。 对于libkperf,可以这样来设置PmuAttr: @@ -32,8 +36,10 @@ if pd == -1: print(kperf.error()) exit(1) ``` + 通过调用```PmuOpen```初始化了采集任务,并获得了任务的标识符pd。 然后,可以利用pd来启动采集: + ```c++ // c++代码示例 PmuEnable(pd); @@ -47,23 +53,28 @@ kperf.enable(pd) time.sleep(1) kperf.disable(pd) ``` + 不论是否停止了采集,都可以通过```PmuRead```来读取采集数据: + ```c++ // c++代码示例 PmuData *data = NULL; int len = PmuRead(pd, &data); ``` + ```PmuRead```会返回采集数据的长度。 - + ```python # python代码示例 pmu_data = kperf.read(pd) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") ``` + ```kperf.read```会返回采集数据链表,可以通过遍历的方式读取。 如果是对系统采集,那么PmuData的长度等于core的数量乘以事件的数量,PmuData的数据类似如下: + ``` cpu 0 count 123 evt cycles cpu 1 count 1242354 evt cycles @@ -74,7 +85,9 @@ cpu 1 count 124235 evt branch-misses cpu 2 count 789723 evt branch-misses ... ``` + 如果是对进程采集,那么PmuData的长度等于进程内线程的数量乘以事件的数量,PmuData的数据类似如下: + ``` pid 4156 tid 4156 count 123 evt cycles pid 4156 tid 4157 count 534123 evt cycles @@ -87,13 +100,17 @@ pid 4156 tid 4158 count 64574 evt branch-misses ``` ### Sampling + libkperf提供Sampling模式,类似于perf record的如下命令: + ``` perf record -e cycles,branch-misses ``` + 该命令是对系统采样cycles和branch-misses这两个事件。 设置PmuAttr的方式和Counting一样,在调用PmuOpen的时候,把任务类型设置为SAMPLING,并且设置采样频率: + ```c++ // c++代码示例 attr.freq = 1000; // 采样频率是1000HZ @@ -116,6 +133,7 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) 启动采集和读取数据的方式和Counting一致。 如果是对系统采集,PmuData的数据类似如下(长度取决于数据量): + ``` cpu 0 pid 3145 tid 3145 period 12314352 cpu 0 pid 4145 tid 4145 period 12314367 @@ -124,23 +142,30 @@ cpu 1 pid 23423 tid 23423 period 1231241 ... ... ``` + 如果是对进程采集,PmuData的数据类似如下: + ``` cpu 32 pid 7878 tid 7878 period 123144 cpu 32 pid 7878 tid 7879 period 1523342 cpu 32 pid 7878 tid 7879 period 1234342 ... ``` + 每一条记录还包含触发事件的程序地址和符号信息,关于如何获取符号信息,可以参考[获取符号信息](#获取符号信息)这一章节。 ### SPE Sampling + libkperf提供SPE采样模式,类似于perf record的如下命令: + ``` perf record -e arm_spe_0/load_filter=1/ ``` + 该命令是对系统进行spe采样,关于linux spe采样的详细介绍,可以参考[这里](https://www.man7.org/linux/man-pages/man1/perf-arm-spe.1.html)。 对于libkperf,可以这样设置PmuAttr: + ```c++ // c++代码示例 PmuAttr attr = {0}; @@ -161,12 +186,15 @@ pmu_attr = kperf.PmuAttr( # 需要root权限才能运行 pd = kperf.open(kperf.PmuTaskType.SPE_SAMPLING, pmu_attr) ``` + 对于spe采样,不需要设置evtList,而是通过设置dataFilter和evFilter来指定需要采集的事件。dataFilter和evFilter的含义仍然可以参考[perf spe的说明文档](https://www.man7.org/linux/man-pages/man1/perf-arm-spe.1.html)。 采样数据PmuData和Sampling模式差不多,差别是: + - SPE采样的调用栈只有一层,而Sampling可以有多层调用栈。 - SPE的PmuData提供了额外的数据struct PmuDataExt *ext. -PmuDataExt包含spe特有的数据:访存的物理地址、虚拟地址和事件bit。 + PmuDataExt包含spe特有的数据:访存的物理地址、虚拟地址和事件bit。 + ```c++ struct PmuDataExt { unsigned long pa; // physical address @@ -174,8 +202,10 @@ struct PmuDataExt { unsigned long event; // event id, which is a bit map of mixed events, event bit is defined in SPE_EVENTS. }; ``` + 其中,物理地址pa需要在启用PA_ENABLE的情况下才能采集。 event是一个bit map,是多个事件的集合,每一个事件占据一个bit,事件对应的bit参考枚举SPE_EVENTS: + ```c++ enum SPE_EVENTS { SPE_EV_EXCEPT = 1 << 0, @@ -194,8 +224,11 @@ enum SPE_EVENTS { SPE_EV_EMPTY_PRED = 1 << 18, }; ``` + ### 获取符号信息 + 结构体PmuData里提供了采样数据的调用栈信息,包含调用栈的地址、符号名称等。 + ```c++ struct Symbol { unsigned long addr; @@ -215,6 +248,7 @@ struct Stack { ``` Stack是链表结构,每一个元素都是一层调用函数。 + ```mermaid graph LR a(Symbol) --> b(Symbol) @@ -223,14 +257,17 @@ c --> d(......) ``` Symbol的字段信息受PmuAttr影响: + - PmuAttr.callStack会决定Stack是完整的调用栈,还是只有一层调用栈(即Stack链表只有一个元素)。 - PmuAttr.symbolMode如果等于NO_SYMBOL_RESOLVE,那么PmuData的stack是空指针。 - PmuAttr.symbolMode如果等于RESOLVE_ELF,那么Symbol的fileName和lineNum没有数据,都等于0,因为没有解析dwarf信息。 - PmuAttr.symbolMode如果等于RESOLVE_ELF_DWARF,那么Symbol的所有信息都有效。 ### 采集uncore事件 + libkperf支持uncore事件的采集,只有Counting模式支持uncore事件的采集(和perf一致)。 可以像这样设置PmuAttr: + ```c++ // c++代码示例 char *evtList[1]; @@ -240,6 +277,7 @@ attr.evtList = evtList; attr.numEvt = 1; int pd = PmuOpen(COUNTING, &attr); ``` + ```python # python代码示例 import kperf @@ -251,6 +289,7 @@ pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) uncore事件的格式为```//```,上面代码是采集设备hisi_sccl1_ddrc0的flux_rd事件。 也可以把设备索引号省略: + ```c++ // c++代码示例 evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; @@ -260,9 +299,11 @@ evtList[0] = "hisi_sccl1_ddrc/flux_rd/"; # python代码示例 evtList = ["hisi_sccl1_ddrc/flux_rd/"] ``` + 这里把hisi_sccl1_ddrc0改为了hisi_sccl1_ddrc,这样会采集设备hisi_sccl1_ddrc0、hisi_sccl1_ddrc1、hisi_sccl1_ddrc2...,并且采集数据PmuData是所有设备数据的总和:count = count(hisi_sccl1_ddrc0) + count(hisi_sccl1_ddrc1) + count(hisi_sccl1_ddrc2) + ... 也可以通过```/config=0xxx/```的方式来指定事件名: + ```c++ // c++代码示例 evtList[0] = "hisi_sccl1_ddrc0/config=0x1/"; @@ -276,8 +317,10 @@ evtList = ["hisi_sccl1_ddrc0/config=0x1/"] 这样效果是和指定flux_rd是一样的。 ### 采集tracepoint + libkperf支持tracepoint的采集,支持的tracepoint事件可以通过perf list来查看(通常需要root权限)。 可以这样设置PmuAttr: + ```c++ // c++代码示例 char *evtList[1]; @@ -304,6 +347,7 @@ tracepoint能够获取每个事件特有的数据,比如sched:sched_switch包 想要查询每个事件包含哪些数据,可以查看/sys/kernel/tracing/events下面的文件内容,比如/sys/kernel/tracing/events/sched/sched_switch/format。 libkperf提供了接口PmuGetField来获取tracepoint的数据。比如对于sched:sched_switch,可以这样调用: + ```c++ // c++代码示例 int prev_pid; @@ -332,17 +376,21 @@ for data in pmu_data.iter: print(f"next_comm={next_comm};prev_pid={prev_pid.value}") ``` + 这里调用者需要提前了解数据的类型,并且指定数据的大小。数据的类型和大小仍然可以从/sys/kernel/tracing/下每个事件的format文件来得知。 ### 事件分组 + libkperf提供了事件分组的能力,能够让多个事件同时处于采集状态。 该功能类似于perf的如下使用方式: + ``` perf stat -e "{cycles,branch-loads,branch-load-misses,iTLB-loads}",inst_retired ``` 对于libkperf,可以通过设置PmuAttr的evtAttr字段来设定哪些事件放在一个group内。 比如,可以这样调用: + ```c++ // c++代码示例 unsigned numEvt = 5; @@ -372,6 +420,7 @@ pd = kperf.open(kperf.PmuTaskType.SAMPLING, pmu_attr) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") ``` + 上述代码把前四个事件设定为一个分组,groupId都设定为1,最后一个事件不分组,groupId设定为-1。 事件数组attr.evtList和事件属性数组attr.evtAttr必须一一对应,即长度必须一致。 或者attr.evtAttr也可以是空指针,那么所有事件都不分组。 @@ -380,30 +429,37 @@ for data in pmu_data.iter: 对于同一组的事件,他们的countPercent是相同的。如果一个组的事件过多,超过了硬件计数器的数目,那么这个组的所有事件都不会被采集,countPercent会等于-1. ### 对进程子线程计数采集 + ```mermaid graph TD a(主线程) --perf stat--> b(创建线程) b --> c(子线程) c --end perf--> d(子线程退出) ``` + 考虑上面的场景:用perf stat对进程采集,之后进程创建了子线程,采集一段事件后,停止perf。 查看采集结果,perf只会显示主线程的采集结果,而无法看到子线程的结果:count = count(main thread) + count(thread). perf把子线程的数据聚合到了主线程上。 libkperf提供了采集子线程的能力。如果想要在上面场景中获取子线程的计数,可以把PmuAttr.incluceNewFork设置为1. + ```c++ // c++代码示例 attr.includeNewFork = 1; ``` + ```python # python代码示例 pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) ``` + 然后,通过PmuRead获取到的PmuData,便能包含子线程计数信息了。 注意,该功能是针对Counting模式,因为Sampling和SPE Sampling本身就会采集子线程的数据。 ### 采集DDRC带宽 + 基于uncore事件可以计算DDRC的访存带宽,不同硬件平台有不同的计算方式。 鲲鹏芯片上的访存带宽公式可以参考openeuler kernel的tools/perf/pmu-events/arch/arm64/hisilicon/hip09/sys/uncore-ddrc.json: + ```json { "MetricExpr": "flux_wr * 32 / duration_time", @@ -424,6 +480,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) ``` 根据公式,采集flux_wr和flux_rd事件,用于计算带宽: + ```c++ // c++代码示例 @@ -479,7 +536,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) "hisi_sccl3_ddrc/flux_wr/", "hisi_sccl5_ddrc/flux_wr/", "hisi_sccl7_ddrc/flux_wr/"] # 采集hisi_scclX_ddrc设备下的flux_rd和flux_wr,具体设备名称因硬件而异,可以在/sys/devices/下查询。 - + pmu_attr = kperf.PmuAttr(evtList=evtList) pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) if pd == -1: @@ -502,6 +559,7 @@ pmu_attr = kperf.PmuAttr(evtList=evtList, includeNewFork=True) ``` 执行上述代码,输出的结果类似如下: + ``` read bandwidth: 17.32 M/s read bandwidth: 5.43 M/s @@ -514,6 +572,7 @@ write bandwidth: 0.97 M/s ``` ### 采集系统调用函数耗时信息 + libkperf基于tracepoint事件采集能力,在原有能力的基础上,重新封装了一组相关的调用API,来提供采集系统调用函数耗时信息的能力,类似于perf trace命令 ``` @@ -522,6 +581,7 @@ perf trace -e read,write 对于libkperf,可以通过设置PmuTraceAttr的funcs字段来需要采集哪些系统调用函数的耗时信息,pidList字段用于设定需要采集耗时的进程,cpuList字段用于设定需要采集哪些cpu上的系统调用耗时信息。三个参数如果任何一个为空,表示采集此字段采集系统上存在的所有信息,比如funcs为空,表示采集所有系统调用耗时信息。 比如,可以这样调用: + ```c++ // c++代码示例 unsigned numFunc = 2; @@ -555,10 +615,12 @@ kperf.trace_disable(pd) pmu_trace_data = kperf.trace_read(pd) for data in pmu_trace_data.iter: print(f"funcName: {data.funcs} elapsedTime: {data.elapsedTime} ms pid: {data.pid} tid: {data.tid} cpu: {data.cpu} comm: {data.comm}") - + kperf.trace_close(pd) ``` + 执行上述代码,输出的结果类似如下: + ``` funcName: read elapsedTime: 0.00110 ms pid: 997235 tid: 997235 cpu: 110 comm: taskset funcName: read elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: taskset @@ -568,10 +630,13 @@ funcName: write elapsedTime: 0.00105 ms pid: 997235 tid: 997235 cpu: 110 comm: t funcName: write elapsedTime: 0.00107 ms pid: 997235 tid: 997235 cpu: 110 comm: taskset funcName: write elapsedTime: 0.00118 ms pid: 997235 tid: 997235 cpu: 110 comm: taskset ``` + 支持采集的系统调用函数列表,在查看/sys/kernel/tracing/events/syscalls/下所有系统调用对应的enter和exit文件,去掉相同的前缀就是对应的系统调用函数名称;也可以基于提供的PmuSysCallFuncList函数获取对应的系统调用函数列表。 ### 采集BRBE数据 + libkperf基于sampling的能力,增加了对branch sample stack数据的采集能力,用于获取CPU的跳转记录, 通过branchSampleFilter可指定获取不同类型的分支跳转记录。 + ```c++ char* evtList[1] = {"cycles"}; int* cpuList = nullptr; @@ -612,7 +677,9 @@ for (int i = 0; i < len; i++) PmuDataFree(data); PmuClose(pd); ``` + 执行上述代码,输出的结果类似如下: + ``` ffff88f6065c->ffff88f60b0c 35 ffff88f60aa0->ffff88f60618 1 @@ -642,11 +709,13 @@ for data in pmu_data.iter: for item in data.ext.branchRecords.iter: print(f"{hex(item.fromAddr)}->{hex(item.toAddr)} {item.cycles}") ``` + 执行上述代码,输出的结果类似如下: + ``` 0xffff88f6065c->0xffff88f60b0c 35 0xffff88f60aa0->0xffff88f60618 1 0x40065c->0xffff88f60b00 1 0x400824->0x400650 1 0x400838->0x400804 1 -``` \ No newline at end of file +``` diff --git a/docs/Python_API.md b/docs/Python_API.md index 4eb7c51456c523743983a0f7e21425f65fe8a563..9058a550de4b662d96d8e86543967acb4e0999dc 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -1,3 +1,5 @@ +# Python API + ### kperf.open kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) @@ -59,12 +61,12 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) * KPERF_SAMPLE_BRANCH_HV = 1 << 2 分支目标位于虚拟机管理程序中 * KPERF_SAMPLE_BRANCH_ANY = 1 << 3 任意分支目标 * KPERF_SAMPLE_BRANCH_ANY_CALL = 1 << 4 任意调用分支(包括直接调用,间接调用和远程调用) - * KPERF_SAMPLE_BRANCH_ANY_RETURN = 1 << 5 任意返回分支 + * KPERF_SAMPLE_BRANCH_ANY_RETURN = 1 << 5 任意返回分支 * KPERF_SAMPLE_BRANCH_IND_CALL = 1 << 6 间接调用分支 * KPERF_SAMPLE_BRANCH_ABORT_TX = 1 << 7 事物性内存中止 * KPERF_SAMPLE_BRANCH_IN_TX = 1 << 8 事物内存分支 * KPERF_SAMPLE_BRANCH_NO_TX = 1 << 9 分支不在事物性内存事物中 - * KPERF_SAMPLE_BRANCH_COND = 1 << 10 条件分支 + * KPERF_SAMPLE_BRANCH_COND = 1 << 10 条件分支 * KPERF_SAMPLE_BRANCH_CALL_STACK = 1 << 11 调用栈分支 * KPERF_SAMPLE_BRANCH_IND_JUMP = 1 << 12 跳跃分支 * KPERF_SAMPLE_BRANCH_CALL = 1 << 13 调用分支 @@ -99,6 +101,7 @@ pd为kperf.open返回值 返回值为int 如果返回值>0,则使能异常 如果返回值=0, 则使能正常 + ### kperf.disable kperf.disable(pd: int) 该接口用于关闭某个pd的采样 @@ -113,11 +116,13 @@ kperf.enable(pd) time.sleep(1) kperf.disable(pd) ``` + ### kperf.read kperf.read(pd: int) 读取pd采样的数据 pd为kperf.open返回值 返回值为PmuData + * class PmuData * len 数据长度 * iter 返回Iterator[ImplPmuData] @@ -155,8 +160,8 @@ pd为kperf.open返回值 * va 虚拟地址 * event 事件ID * branchRecords brbe数据 - * iter brbe数据迭代器 - * len brbe数据长度 + * iter brbe数据迭代器 + * len brbe数据长度 * rawData: tracepointer数据指针,搭配kperf.get_field和Kperf.get_field_exp使用 以下为kperf.read示例 @@ -167,12 +172,15 @@ pmu_data = kperf.read(pd) for data in pmu_data.iter: print(f"cpu {data.cpu} count {data.count} evt {data.evt}") ``` + ### kperf.close kperf.close(pd: int) 该接口用于清理该pd所有的对应数据,并移除该pd + ### kperf.dump dump(pmuData: PmuData, filepath: str, dump_dwf: int) + * pmuData 由kperf.read返回的PmuData数据 * filePath @@ -185,6 +193,7 @@ dump(pmuData: PmuData, filepath: str, dump_dwf: int) get_field(pmu_data: ImplPmuData, field_name: str, value: c_void_p) 获取tracepointer format某个字段数据,format数据可通过/sys/kernel/tracing/events/或者/sys/kernel/debug/tracing/events/进行查找 + * pmu_data: ImplePmuData 详细见kperf.read返回数据说明 * field_name @@ -218,10 +227,12 @@ for data in pmu_data.iter: print(f"next_comm={next_comm};prev_pid={prev_pid.value}") ``` + ### kperf.get_field_exp -get_field_exp(pmu_data: ImplPmuData, field_name: str) +get_field_exp(pmu_data: ImplPmuData, field_name: str) 获取某个字段属性说明 + * pmu_data: ImplePmuData 详细见kperf.read返回数据说明 * field_name @@ -239,10 +250,12 @@ field = kperf.get_field_exp(data, "name") print("field_str={} field_name={} size={} offset={} isSigned={}" .format(field.field_name, field.field_str, field.size, field.offset, field.is_signed)) ``` + ### kperf.event_list event_list(event_type: PmuEventType) 查找所有的事件列表 + * class PmuEventType: * CORE_EVENT = 0 获取core事件列表 * UNCORE_EVENT = 1 获取uncore事件列表 @@ -257,9 +270,11 @@ event_list(event_type: PmuEventType) for evt in kperf.event_list(kperf.PmuEventType.CORE_EVENT): print(f"event name: {evt}") ``` + ### kperf.trace_open kperf.trace_open(trace_type: kperf.PmuTraceType, pmu_trace_attr: kperf.PmuTraceAttr) # 初始化采集系统调用函数能力 + * class PmuTraceType: * TRACE_SYS_CALL = 0 采集系统调用函数事件 * class PmuTraceAttr: @@ -281,14 +296,17 @@ if pd == -1: print(kperf.error()) exit(1) ``` + ### kperf.trace_enable、kperf.trace_disable 调用逻辑类似kperf.enable、kperf.disable,用于配置采集启动和结束的时刻,两个调用之间的时间即是采集的时间段 + ### kperf.trace_read kperf.trace_read(pd: int) pd为kperf.trace_open返回值 返回值是PmuTraceData + * class PmuTraceData: * len: 数据长度 * iter: 返回iterator[lmplPmuTraceData] @@ -307,12 +325,15 @@ pmu_trace_data = kperf.trace_read(pd) for pmu_trace in pmu_trace_data.iter: print("funcs: %s, elapsedTime: %d, pid: %d, tid: %d, cpu: %d, comm: %s" % (pmu_trace.funcs, pmu_trace.elapsedTime, pmu_trace.pid, pmu_trace.tid, pmu_trace.cpu, pmu_trace.comm)) ``` + ### kperf.trace_close kperf.trace_close(pd: int): 该接口用于清理该pd所有对应的数据,并移除该pd + ### kperf.sys_call_func_list kperf.sys_call_func_list(): 查找所有的系统调用函数列表 + * 返回数据 iterator[str], 可通过for循环遍历该单元 以下为kperf.sys_call_func_list示例