1. tracepoint原理
tracepoint是预先在函数的插入点中插桩,当执行到函数的插入点,则执行插桩函数,进而触发与插入点预先绑定的probe函数,probe函数可以是一个或者多个,probe函数可以定义为任意的行为,从而可以起到对函数内部观测的作用。目前内核已经不提倡手动创建tracepoint,因此将tracpoint-sample从内核sample代码中删除,取而代之的是trac event,大大简化了tracepoint的使用,但是如果要更好的理解trace event,则也要对tracepoint做一个了解比较好。后面介绍trace event时我们可以在回过头进行比较。
2. 使用tracepoint的步骤
2.1 DECLARE_TRACE
需要在头文件中通过DECLARE_TRACE宏声明,DECLARE_TRACE的关键是定义了register_trace_xxx函数来完成tracepoint与probe的绑定,同时还定义了trace_xxx函数来触发probe的执行,DECLARE_TRACE定义如下:
#define DECLARE_TRACE(name, proto, args) \
__DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \
cpu_online(raw_smp_processor_id()), \
PARAMS(void *__data, proto), \
PARAMS(__data, args))
#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
extern int __traceiter_##name(data_proto); \
DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name); \
extern struct tracepoint __tracepoint_##name; \
static inline void trace_##name(proto) \
{ \
if (static_key_false(&__tracepoint_##name.key)) \
__DO_TRACE(name, \
TP_PROTO(data_proto), \
TP_ARGS(data_args), \
TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
rcu_read_lock_sched_notrace(); \
rcu_dereference_sched(__tracepoint_##name.funcs);\
rcu_read_unlock_sched_notrace(); \
} \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
PARAMS(cond), PARAMS(data_proto), PARAMS(data_args)) \
static inline int \
register_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_register(&__tracepoint_##name, \
(void *)probe, data); \
} \
static inline int \
register_trace_prio_##name(void (*probe)(data_proto), void *data,\
int prio) \
{ \
return tracepoint_probe_register_prio(&__tracepoint_##name, \
(void *)probe, data, prio); \
} \
static inline int \
unregister_trace_##name(void (*probe)(data_proto), void *data) \
{ \
return tracepoint_probe_unregister(&__tracepoint_##name,\
(void *)probe, data); \
} \
static inline void \
check_trace_callback_type_##name(void (*cb)(data_proto)) \
{ \
}
static inline bool \
trace_##name##_enabled(void) \
{ \
return static_key_false(&__tracepoint_##name.key); \
} \
2.2 DEFINE_TRACE
在.c文件中通过DEFINE_TRACE定义,DEFINE_TRACE的关键是定义了struct tracepoint __tracepoint_xxx变量,DEFINE_TRACE定义如下:
#define DEFINE_TRACE(name, proto, args) \
DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));
#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \
static const char __tpstrtab_##_name[] \
__section("__tracepoints_strings") = #_name; \
extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \
int __traceiter_##_name(void *__data, proto); \
struct tracepoint __tracepoint_##_name __used \
__section("__tracepoints") = { \
.name = __tpstrtab_##_name, \
.key = STATIC_KEY_INIT_FALSE, \
.static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \
.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
.iterator = &__traceiter_##_name, \
.regfunc = _reg, \
.unregfunc = _unreg, \
.funcs = NULL }; \
__TRACEPOINT_ENTRY(_name); \
int __traceiter_##_name(void *__data, proto) \
{ \
struct tracepoint_func *it_func_ptr; \
void *it_func; \
\
it_func_ptr = \
rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
do { \
it_func = (it_func_ptr)->func; \
__data = (it_func_ptr)->data; \
((void(*)(void *, proto))(it_func))(__data, args); \
} while ((++it_func_ptr)->func); \
return 0; \
} \
DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
2.3 定义probe
定义插入点执行时对应的probe函数,可以在probe函数中定义自己的行为
2.4 将tracepoint与probe进行绑定
将DEFINE_TRACE定义的tracepoint变量与probe插入点回调进行绑定,这样在执行到插入点时会触发probe函数
3. tracepoint示例
本示例主要采用了内核自带的tracepoint-sample作为示例进行说明tracepoint的使用步骤,示例源码参考:
https://github.com/jasonactions/Linux-labs/tree/master/trace_point
3.1 DECLARE_TRACE
在tp-samples-trace.h中定义如下:
#ifndef _TP_SAMPLES_TRACE_H
#define _TP_SAMPLES_TRACE_H
#include <linux/proc_fs.h> /* for struct inode and struct file */
#include <linux/tracepoint.h>
DECLARE_TRACE(subsys_event,
TP_PROTO(struct inode *inode, struct file *file),
TP_ARGS(inode, file));
#endi
对于tracepoint-sample,DECLARE_TRACE展开(简化)为:
extern int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file);
extern struct static_call_key __SCK__tp_func_subsys_event;
extern typeof(__traceiter_subsys_event) __SCT__tp_func_subsys_event;
extern struct tracepoint __tracepoint_subsys_event;
// 触发probe执行
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__))
void trace_subsys_event(struct inode *inode, struct file *file)
{
//回调各个probe函数
__traceiter_subsys_event(__data, inode, file);
}
// 将tracepoint与probe进行绑定
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__))
int register_trace_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data)
{
return tracepoint_probe_register(&__tracepoint_subsys_event, (void *)probe, data);
}
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__))
int register_trace_prio_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data, int prio)
{
return tracepoint_probe_register_prio(&__tracepoint_subsys_event, (void *)probe, data, prio);
}
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__))
int unregister_trace_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data)
{
return tracepoint_probe_unregister(&__tracepoint_subsys_event, (void *)probe, data);
}
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__))
void check_trace_callback_type_subsys_event(void (*cb)(void *__data, struct inode *inode, struct file *file))
{
}
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __
attribute__((__no_instrument_function__)) bool trace_subsys_event_enabled(void)
{
return static_key_false(&__tracepoint_subsys_event.key);
}
可以看出DECLARE_TRACE的关键是定义了register_trace_subsys_event函数来完成tracepoint与probe的绑定,同时它还定义了trace_subsys_event函数,通过调用trace_subsys_event函数来调用__traceiter_subsys_event触发probe执行
3.2 DEFINE_TRACE
在tracepoint-sample.c 中定义如下:
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include "tp-samples-trace.h"
DEFINE_TRACE(subsys_event,
TP_PROTO(struct inode *inode, struct file *file),
TP_ARGS(inode, file));
对于tracepoint-sample,DEFINE_TRACE展开(简化)为:
static const char __tpstrtab_subsys_event[]
__attribute__((__section__("__tracepoints_strings"))) = "subsys_event";
extern struct static_call_key __SCK__tp_func_subsys_event;
int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file);
// 定义tracepoint变量
struct tracepoint __tracepoint_subsys_event __attribute__((__used__))
__attribute__((__section__("__tracepoints"))) = {
.name = __tpstrtab_subsys_event,
.key = { .enabled = { 0 }, { .entries = (void *)0UL } },
.static_call_key = &__SCK__tp_func_subsys_event,
.static_call_tramp = ((void *)0),
.iterator = &__traceiter_subsys_event,
.regfunc = ((void *)0),
.unregfunc = ((void *)0),
.funcs = ((void *)0)
};
// 执行tracepoint的probe函数,一个tracepoint可以有多个回调
int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file)
{
....
do {
it_func = (it_func_ptr)->func;
__data = (it_func_ptr)->data;
((void (*)(void *, struct inode *inode, struct file *file))(it_func))(__data, inode, file);
} while ((++it_func_ptr)->func);
return 0;
}
extern struct static_call_key __SCK__tp_func_subsys_event;
extern typeof(__traceiter_subsys_event) __SCT__tp_func_subsys_event;
struct static_call_key __SCK__tp_func_subsys_event = { .func = __traceiter_subsys_event, };
DEFINE_TRACE的关键是定义了struct tracepoint __tracepoint_subsys_event变量,并对其进行初始化;同时它还定义了__traceiter_subsys_event,它主要完成对各个probe函数的回调
3.3 定义probe
本例中定义了如下的probe函数:
void my_subsys_event(void *__data, struct inode *inode, struct file *file)
{
printk("inode: 0x%lx, file: 0x%lx\n", (unsigned long)inode, (unsigned long)file);
}
并通过显示调用register_trace_subsys_event来完成tracepoint与probe函数的绑定,注意probe函数要与DEFINE_TRACE和DECLARE_TRACE的参数类型个数保持一致
3.4 将 tracepoint与probe绑定
在初始化函数中执行如下,完成tracepoint与probe绑定
register_trace_subsys_event(my_subsys_event, NULL);
3.5 触发probe
通过trace_subsys_event可触发probe函数的执行
trace_subsys_event(inode, file);
运行结果如下:
# insmod tracepoint-sample.ko
# cat /proc/tracepoint-sample
[78074.813467] inode: 0xffff000001967a70, file: 0xffff000007f466c0