Mirror of strace – the linux syscall tracer
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

perf.c 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. * Copyright (c) 2013 Ben Noordhuis <info@bnoordhuis.nl>
  3. * Copyright (c) 2013-2015 Dmitry V. Levin <ldv@altlinux.org>
  4. * Copyright (c) 2016 Eugene Syromyatnikov <evgsyr@gmail.com>
  5. * Copyright (c) 2015-2018 The strace developers.
  6. * All rights reserved.
  7. *
  8. * SPDX-License-Identifier: LGPL-2.1-or-later
  9. */
  10. #include "defs.h"
  11. #include "perf_event_struct.h"
  12. #include "print_fields.h"
  13. #include "xlat/hw_breakpoint_len.h"
  14. #include "xlat/hw_breakpoint_type.h"
  15. #include "xlat/perf_attr_size.h"
  16. #include "xlat/perf_branch_sample_type.h"
  17. #include "xlat/perf_event_open_flags.h"
  18. #include "xlat/perf_event_read_format.h"
  19. #include "xlat/perf_event_sample_format.h"
  20. #include "xlat/perf_hw_cache_id.h"
  21. #include "xlat/perf_hw_cache_op_id.h"
  22. #include "xlat/perf_hw_cache_op_result_id.h"
  23. #include "xlat/perf_hw_id.h"
  24. #include "xlat/perf_sw_ids.h"
  25. #include "xlat/perf_type_id.h"
  26. struct pea_desc {
  27. struct perf_event_attr *attr;
  28. uint32_t size;
  29. };
  30. static void
  31. free_pea_desc(void *pea_desc_ptr)
  32. {
  33. struct pea_desc *desc = pea_desc_ptr;
  34. free(desc->attr);
  35. free(desc);
  36. }
  37. int
  38. fetch_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
  39. {
  40. struct pea_desc *desc;
  41. struct perf_event_attr *attr;
  42. uint32_t size;
  43. if (umove(tcp, addr + offsetof(struct perf_event_attr, size), &size)) {
  44. printaddr(addr);
  45. return 1;
  46. }
  47. if (size > sizeof(*attr))
  48. size = sizeof(*attr);
  49. if (!size)
  50. size = PERF_ATTR_SIZE_VER0;
  51. /*
  52. * Kernel (rightfully) deems invalid attribute structures with size less
  53. * than first published format size, and we do the same.
  54. */
  55. if (size < PERF_ATTR_SIZE_VER0) {
  56. printaddr(addr);
  57. return 1;
  58. }
  59. if (abbrev(tcp))
  60. size = offsetofend(struct perf_event_attr, config);
  61. /* Size should be multiple of 8, but kernel doesn't check for it */
  62. /* size &= ~7; */
  63. attr = xzalloc(sizeof(*attr));
  64. if (umoven_or_printaddr(tcp, addr, size, attr)) {
  65. free(attr);
  66. return 1;
  67. }
  68. desc = xmalloc(sizeof(*desc));
  69. desc->attr = attr;
  70. desc->size = size;
  71. set_tcb_priv_data(tcp, desc, free_pea_desc);
  72. return 0;
  73. }
  74. void
  75. print_perf_event_attr(struct tcb *const tcp, const kernel_ulong_t addr)
  76. {
  77. static const char *precise_ip_desc[] = {
  78. "arbitrary skid",
  79. "constant skid",
  80. "requested to have 0 skid",
  81. "must have 0 skid",
  82. };
  83. struct pea_desc *desc;
  84. struct perf_event_attr *attr;
  85. uint32_t size;
  86. uint32_t new_size;
  87. int use_new_size = 0;
  88. /*
  89. * Amusingly, kernel accepts structures with only part of the field
  90. * present, so we making check like this (instead of checking
  91. * offsetofend against size) in order to print fields as kernel sees
  92. * them. This also should work great on big endian architectures.
  93. */
  94. #define _PERF_CHECK_FIELD(_field) \
  95. do { \
  96. if (offsetof(struct perf_event_attr, _field) >= size) \
  97. goto print_perf_event_attr_out; \
  98. } while (0)
  99. desc = get_tcb_priv_data(tcp);
  100. attr = desc->attr;
  101. size = desc->size;
  102. /* The only error which expected to change size field currently */
  103. if (tcp->u_error == E2BIG) {
  104. if (umove(tcp, addr + offsetof(struct perf_event_attr, size),
  105. &new_size))
  106. use_new_size = -1;
  107. else
  108. use_new_size = 1;
  109. }
  110. PRINT_FIELD_XVAL("{", *attr, type, perf_type_id, "PERF_TYPE_???");
  111. PRINT_FIELD_XVAL(", ", *attr, size, perf_attr_size,
  112. "PERF_ATTR_SIZE_???");
  113. if (use_new_size) {
  114. tprints(" => ");
  115. if (use_new_size > 0)
  116. printxval(perf_attr_size, new_size,
  117. "PERF_ATTR_SIZE_???");
  118. else
  119. tprints("???");
  120. }
  121. switch (attr->type) {
  122. case PERF_TYPE_HARDWARE:
  123. PRINT_FIELD_XVAL(", ", *attr, config, perf_hw_id,
  124. "PERF_COUNT_HW_???");
  125. break;
  126. case PERF_TYPE_SOFTWARE:
  127. PRINT_FIELD_XVAL(", ", *attr, config, perf_sw_ids,
  128. "PERF_COUNT_SW_???");
  129. break;
  130. case PERF_TYPE_TRACEPOINT:
  131. /*
  132. * "The value to use in config can be obtained from under
  133. * debugfs tracing/events/../../id if ftrace is enabled
  134. * in the kernel."
  135. */
  136. PRINT_FIELD_U(", ", *attr, config);
  137. break;
  138. case PERF_TYPE_HW_CACHE:
  139. /*
  140. * (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
  141. * (perf_hw_cache_op_result_id << 16)
  142. */
  143. tprints(", config=");
  144. printxval(perf_hw_cache_id, attr->config & 0xFF,
  145. "PERF_COUNT_HW_CACHE_???");
  146. tprints("|");
  147. printxval(perf_hw_cache_op_id, (attr->config >> 8) & 0xFF,
  148. "PERF_COUNT_HW_CACHE_OP_???");
  149. tprints("<<8|");
  150. /*
  151. * Current code (see set_ext_hw_attr in arch/x86/events/core.c,
  152. * tile_map_cache_event in arch/tile/kernel/perf_event.c,
  153. * arc_pmu_cache_event in arch/arc/kernel/perf_event.c,
  154. * hw_perf_cache_event in arch/blackfin/kernel/perf_event.c,
  155. * _hw_perf_cache_event in arch/metag/kernel/perf/perf_event.c,
  156. * mipspmu_map_cache_event in arch/mips/kernel/perf_event_mipsxx.c,
  157. * hw_perf_cache_event in arch/powerpc/perf/core-book3s.c,
  158. * hw_perf_cache_event in arch/powerpc/perf/core-fsl-emb.c,
  159. * hw_perf_cache_event in arch/sh/kernel/perf_event.c,
  160. * sparc_map_cache_event in arch/sparc/kernel/perf_event.c,
  161. * xtensa_pmu_cache_event in arch/xtensa/kernel/perf_event.c,
  162. * armpmu_map_cache_event in drivers/perf/arm_pmu.c) assumes
  163. * that cache result is 8 bits in size.
  164. */
  165. printxval(perf_hw_cache_op_result_id,
  166. (attr->config >> 16) & 0xFF,
  167. "PERF_COUNT_HW_CACHE_RESULT_???");
  168. tprints("<<16");
  169. if (attr->config >> 24) {
  170. tprintf("|%#" PRIx64 "<<24", attr->config >> 24);
  171. tprints_comment("PERF_COUNT_HW_CACHE_???");
  172. }
  173. break;
  174. case PERF_TYPE_RAW:
  175. /*
  176. * "If type is PERF_TYPE_RAW, then a custom "raw" config
  177. * value is needed. Most CPUs support events that are not
  178. * covered by the "generalized" events. These are
  179. * implementation defined; see your CPU manual (for example the
  180. * Intel Volume 3B documentation or the AMD BIOS and Kernel
  181. * Developer Guide). The libpfm4 library can be used to
  182. * translate from the name in the architectural manuals
  183. * to the raw hex value perf_event_open() expects in this
  184. * field."
  185. */
  186. case PERF_TYPE_BREAKPOINT:
  187. /*
  188. * "If type is PERF_TYPE_BREAKPOINT, then leave config set
  189. * to zero. Its parameters are set in other places."
  190. */
  191. default:
  192. PRINT_FIELD_X(", ", *attr, config);
  193. break;
  194. }
  195. if (abbrev(tcp))
  196. goto print_perf_event_attr_out;
  197. if (attr->freq)
  198. PRINT_FIELD_U(", ", *attr, sample_freq);
  199. else
  200. PRINT_FIELD_U(", ", *attr, sample_period);
  201. PRINT_FIELD_FLAGS(", ", *attr, sample_type, perf_event_sample_format,
  202. "PERF_SAMPLE_???");
  203. PRINT_FIELD_FLAGS(", ", *attr, read_format, perf_event_read_format,
  204. "PERF_FORMAT_???");
  205. tprintf(", disabled=%u"
  206. ", inherit=%u"
  207. ", pinned=%u"
  208. ", exclusive=%u"
  209. ", exclusive_user=%u"
  210. ", exclude_kernel=%u"
  211. ", exclude_hv=%u"
  212. ", exclude_idle=%u"
  213. ", mmap=%u"
  214. ", comm=%u"
  215. ", freq=%u"
  216. ", inherit_stat=%u"
  217. ", enable_on_exec=%u"
  218. ", task=%u"
  219. ", watermark=%u"
  220. ", precise_ip=%u",
  221. attr->disabled,
  222. attr->inherit,
  223. attr->pinned,
  224. attr->exclusive,
  225. attr->exclude_user,
  226. attr->exclude_kernel,
  227. attr->exclude_hv,
  228. attr->exclude_idle,
  229. attr->mmap,
  230. attr->comm,
  231. attr->freq,
  232. attr->inherit_stat,
  233. attr->enable_on_exec,
  234. attr->task,
  235. attr->watermark,
  236. attr->precise_ip);
  237. tprints_comment(precise_ip_desc[attr->precise_ip]);
  238. tprintf(", mmap_data=%u"
  239. ", sample_id_all=%u"
  240. ", exclude_host=%u"
  241. ", exclude_guest=%u"
  242. ", exclude_callchain_kernel=%u"
  243. ", exclude_callchain_user=%u"
  244. ", mmap2=%u"
  245. ", comm_exec=%u"
  246. ", use_clockid=%u"
  247. ", context_switch=%u"
  248. ", write_backward=%u"
  249. ", namespaces=%u",
  250. attr->mmap_data,
  251. attr->sample_id_all,
  252. attr->exclude_host,
  253. attr->exclude_guest,
  254. attr->exclude_callchain_kernel,
  255. attr->exclude_callchain_user,
  256. attr->mmap2,
  257. attr->comm_exec,
  258. attr->use_clockid,
  259. attr->context_switch,
  260. attr->write_backward,
  261. attr->namespaces);
  262. /*
  263. * Print it only in case it is non-zero, since it may contain flags we
  264. * are not aware about.
  265. */
  266. if (attr->__reserved_1) {
  267. tprintf(", __reserved_1=%#" PRIx64,
  268. (uint64_t) attr->__reserved_1);
  269. tprints_comment("Bits 63..29");
  270. }
  271. if (attr->watermark)
  272. PRINT_FIELD_U(", ", *attr, wakeup_watermark);
  273. else
  274. PRINT_FIELD_U(", ", *attr, wakeup_events);
  275. if (attr->type == PERF_TYPE_BREAKPOINT)
  276. /* Any combination of R/W with X is deemed invalid */
  277. PRINT_FIELD_XVAL(", ", *attr, bp_type, hw_breakpoint_type,
  278. (attr->bp_type <=
  279. (HW_BREAKPOINT_X | HW_BREAKPOINT_RW))
  280. ? "HW_BREAKPOINT_INVALID"
  281. : "HW_BREAKPOINT_???");
  282. if (attr->type == PERF_TYPE_BREAKPOINT)
  283. PRINT_FIELD_X(", ", *attr, bp_addr);
  284. else
  285. PRINT_FIELD_X(", ", *attr, config1);
  286. /*
  287. * Fields after bp_addr/config1 are optional and may not present; check
  288. * against size is needed.
  289. */
  290. _PERF_CHECK_FIELD(bp_len);
  291. if (attr->type == PERF_TYPE_BREAKPOINT)
  292. PRINT_FIELD_U(", ", *attr, bp_len);
  293. else
  294. PRINT_FIELD_X(", ", *attr, config2);
  295. _PERF_CHECK_FIELD(branch_sample_type);
  296. if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) {
  297. PRINT_FIELD_FLAGS(", ", *attr, branch_sample_type,
  298. perf_branch_sample_type,
  299. "PERF_SAMPLE_BRANCH_???");
  300. }
  301. _PERF_CHECK_FIELD(sample_regs_user);
  302. /*
  303. * "This bit mask defines the set of user CPU registers to dump on
  304. * samples. The layout of the register mask is architecture-specific and
  305. * described in the kernel header
  306. * arch/ARCH/include/uapi/asm/perf_regs.h."
  307. */
  308. PRINT_FIELD_X(", ", *attr, sample_regs_user);
  309. _PERF_CHECK_FIELD(sample_stack_user);
  310. /*
  311. * "size of the user stack to dump if PERF_SAMPLE_STACK_USER is
  312. * specified."
  313. */
  314. if (attr->sample_type & PERF_SAMPLE_STACK_USER)
  315. PRINT_FIELD_X(", ", *attr, sample_stack_user);
  316. if (attr->use_clockid) {
  317. _PERF_CHECK_FIELD(clockid);
  318. PRINT_FIELD_XVAL(", ", *attr, clockid, clocknames, "CLOCK_???");
  319. }
  320. _PERF_CHECK_FIELD(sample_regs_intr);
  321. PRINT_FIELD_X(", ", *attr, sample_regs_intr);
  322. _PERF_CHECK_FIELD(aux_watermark);
  323. PRINT_FIELD_U(", ", *attr, aux_watermark);
  324. _PERF_CHECK_FIELD(sample_max_stack);
  325. PRINT_FIELD_U(", ", *attr, sample_max_stack);
  326. /* _PERF_CHECK_FIELD(__reserved_2);
  327. PRINT_FIELD_U(", ", *attr, __reserved2); */
  328. print_perf_event_attr_out:
  329. if ((attr->size && (attr->size > size)) ||
  330. (!attr->size && (size < PERF_ATTR_SIZE_VER0)))
  331. tprints(", ...");
  332. tprints("}");
  333. }
  334. SYS_FUNC(perf_event_open)
  335. {
  336. /*
  337. * We try to copy out the whole structure on entering in order to check
  338. * size value on exiting. We do not check the rest of the fields because
  339. * they shouldn't be changed, but copy the whole structure instead
  340. * of just size field because they could.
  341. */
  342. if (entering(tcp)) {
  343. if (!fetch_perf_event_attr(tcp, tcp->u_arg[0]))
  344. return 0;
  345. } else {
  346. print_perf_event_attr(tcp, tcp->u_arg[0]);
  347. }
  348. tprintf(", %d, %d, ",
  349. (int) tcp->u_arg[1],
  350. (int) tcp->u_arg[2]);
  351. printfd(tcp, tcp->u_arg[3]);
  352. tprints(", ");
  353. printflags64(perf_event_open_flags, tcp->u_arg[4], "PERF_FLAG_???");
  354. return RVAL_DECODED | RVAL_FD;
  355. }