Bootstrap

tracepoint简介

1. tracepoint原理

tracepoint是预先在函数的插入点中插桩,当执行到函数的插入点,则执行插桩函数,进而触发与插入点预先绑定的probe函数,probe函数可以是一个或者多个,probe函数可以定义为任意的行为,从而可以起到对函数内部观测的作用。目前内核已经不提倡手动创建tracepoint,因此将tracpoint-sample从内核sample代码中删除,取而代之的是trac event,大大简化了tracepoint的使用,但是如果要更好的理解trace event,则也要对tracepoint做一个了解比较好。后面介绍trace event时我们可以在回过头进行比较。

2. 使用tracepoint的步骤

2.1 DECLARE_TRACE

需要在头文件中通过DECLARE_TRACE宏声明,DECLARE_TRACE的关键是定义了register_trace_xxx函数来完成tracepoint与probe的绑定,同时还定义了trace_xxx函数来触发probe的执行,DECLARE_TRACE定义如下:

#define DECLARE_TRACE(name, proto, args)                                \
        __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),              \
                        cpu_online(raw_smp_processor_id()),             \
                        PARAMS(void *__data, proto),                    \
                        PARAMS(__data, args))

#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
        extern int __traceiter_##name(data_proto);                      \
        DECLARE_STATIC_CALL(tp_func_##name, __traceiter_##name);        \
        extern struct tracepoint __tracepoint_##name;                   \
        static inline void trace_##name(proto)                          \
        {                                                               \
                if (static_key_false(&__tracepoint_##name.key))         \
                        __DO_TRACE(name,                                \
                                TP_PROTO(data_proto),                   \
                                TP_ARGS(data_args),                     \
                                TP_CONDITION(cond), 0);                 \
                if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {             \
                        rcu_read_lock_sched_notrace();                  \
                        rcu_dereference_sched(__tracepoint_##name.funcs);\
                        rcu_read_unlock_sched_notrace();                \
                }                                                       \
        }                                                               \
        __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),          \
                PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))    \
        static inline int                                               \
        register_trace_##name(void (*probe)(data_proto), void *data)    \
        {                                                               \
                return tracepoint_probe_register(&__tracepoint_##name,  \
                                                (void *)probe, data);   \
        }                                                               \
        static inline int                                               \
        register_trace_prio_##name(void (*probe)(data_proto), void *data,\
                                   int prio)                            \
        {                                                               \
                return tracepoint_probe_register_prio(&__tracepoint_##name, \
                                              (void *)probe, data, prio); \
        }                                                               \
        static inline int                                               \
        unregister_trace_##name(void (*probe)(data_proto), void *data)  \
        {                                                               \
                return tracepoint_probe_unregister(&__tracepoint_##name,\
                                                (void *)probe, data);   \
        }                                                               \
        static inline void                                              \
        check_trace_callback_type_##name(void (*cb)(data_proto))        \
        {                                                               \
        } 
        static inline bool                                              \
        trace_##name##_enabled(void)                                    \
        {                                                               \
                return static_key_false(&__tracepoint_##name.key);      \
        }                                                              \

2.2 DEFINE_TRACE

在.c文件中通过DEFINE_TRACE定义,DEFINE_TRACE的关键是定义了struct tracepoint __tracepoint_xxx变量,DEFINE_TRACE定义如下:

#define DEFINE_TRACE(name, proto, args)         \
        DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args));

#define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args)               \
        static const char __tpstrtab_##_name[]                          \
        __section("__tracepoints_strings") = #_name;                    \
        extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \
        int __traceiter_##_name(void *__data, proto);                   \
        struct tracepoint __tracepoint_##_name  __used                  \
        __section("__tracepoints") = {                                  \
                .name = __tpstrtab_##_name,                             \
                .key = STATIC_KEY_INIT_FALSE,                           \
                .static_call_key = &STATIC_CALL_KEY(tp_func_##_name),   \
                .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
                .iterator = &__traceiter_##_name,                       \
                .regfunc = _reg,                                        \
                .unregfunc = _unreg,                                    \
                .funcs = NULL };                                        \
        __TRACEPOINT_ENTRY(_name);                                      \
        int __traceiter_##_name(void *__data, proto)                    \
        {                                                               \
                struct tracepoint_func *it_func_ptr;                    \
                void *it_func;                                          \
                                                                        \
                it_func_ptr =                                           \
                        rcu_dereference_raw((&__tracepoint_##_name)->funcs); \
                do {                                                    \
                        it_func = (it_func_ptr)->func;                  \
                        __data = (it_func_ptr)->data;                   \
                        ((void(*)(void *, proto))(it_func))(__data, args); \
                } while ((++it_func_ptr)->func);                        \
                return 0;                                               \
        }                                                               \
        DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);

2.3 定义probe

定义插入点执行时对应的probe函数,可以在probe函数中定义自己的行为

2.4 将tracepoint与probe进行绑定

将DEFINE_TRACE定义的tracepoint变量与probe插入点回调进行绑定,这样在执行到插入点时会触发probe函数

3. tracepoint示例

本示例主要采用了内核自带的tracepoint-sample作为示例进行说明tracepoint的使用步骤,示例源码参考:
https://github.com/jasonactions/Linux-labs/tree/master/trace_point

3.1 DECLARE_TRACE

在tp-samples-trace.h中定义如下:

#ifndef _TP_SAMPLES_TRACE_H
#define _TP_SAMPLES_TRACE_H

#include <linux/proc_fs.h>      /* for struct inode and struct file */
#include <linux/tracepoint.h>

DECLARE_TRACE(subsys_event,
        TP_PROTO(struct inode *inode, struct file *file),
        TP_ARGS(inode, file));
#endi

对于tracepoint-sample,DECLARE_TRACE展开(简化)为:

extern int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file);
extern struct static_call_key __SCK__tp_func_subsys_event;
extern typeof(__traceiter_subsys_event) __SCT__tp_func_subsys_event;
extern struct tracepoint __tracepoint_subsys_event;
// 触发probe执行
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) 
    void trace_subsys_event(struct inode *inode, struct file *file)
{
    //回调各个probe函数
    __traceiter_subsys_event(__data, inode, file)}
// 将tracepoint与probe进行绑定
static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) 
    int register_trace_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data) 
{ 
    return tracepoint_probe_register(&__tracepoint_subsys_event, (void *)probe, data); 
}

static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) 
	int register_trace_prio_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data, int prio) 
{ 
    return tracepoint_probe_register_prio(&__tracepoint_subsys_event, (void *)probe, data, prio); 
}

static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) 
    int unregister_trace_subsys_event(void (*probe)(void *__data, struct inode *inode, struct file *file), void *data) 
{ 
    return tracepoint_probe_unregister(&__tracepoint_subsys_event, (void *)probe, data); 
}

static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __attribute__((__no_instrument_function__)) 
	void check_trace_callback_type_subsys_event(void (*cb)(void *__data, struct inode *inode, struct file *file)) 
{

}

static inline __attribute__((__gnu_inline__)) __attribute__((__unused__)) __
    attribute__((__no_instrument_function__)) bool trace_subsys_event_enabled(void) 
{ 
        return static_key_false(&__tracepoint_subsys_event.key); 
}

可以看出DECLARE_TRACE的关键是定义了register_trace_subsys_event函数来完成tracepoint与probe的绑定,同时它还定义了trace_subsys_event函数,通过调用trace_subsys_event函数来调用__traceiter_subsys_event触发probe执行

3.2 DEFINE_TRACE

在tracepoint-sample.c 中定义如下:

#include <linux/module.h>
#include <linux/sched.h>
#include <linux/proc_fs.h>
#include "tp-samples-trace.h"

DEFINE_TRACE(subsys_event,
        TP_PROTO(struct inode *inode, struct file *file),
        TP_ARGS(inode, file));

对于tracepoint-sample,DEFINE_TRACE展开(简化)为:

static const char __tpstrtab_subsys_event[] 
	__attribute__((__section__("__tracepoints_strings"))) = "subsys_event";
	
extern struct static_call_key __SCK__tp_func_subsys_event;

int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file); 

// 定义tracepoint变量
struct tracepoint __tracepoint_subsys_event __attribute__((__used__)) 
	__attribute__((__section__("__tracepoints"))) = { 
    .name = __tpstrtab_subsys_event, 
    .key = { .enabled = { 0 }, { .entries = (void *)0UL } }, 
    .static_call_key = &__SCK__tp_func_subsys_event, 
    .static_call_tramp = ((void *)0), 
    .iterator = &__traceiter_subsys_event, 
    .regfunc = ((void *)0), 
    .unregfunc = ((void *)0), 
    .funcs = ((void *)0)
}; 
// 执行tracepoint的probe函数,一个tracepoint可以有多个回调
int __traceiter_subsys_event(void *__data, struct inode *inode, struct file *file) 
{
    ....
    do {
        it_func = (it_func_ptr)->func;
        __data = (it_func_ptr)->data;
        ((void (*)(void *, struct inode *inode, struct file *file))(it_func))(__data, inode, file);
    } while ((++it_func_ptr)->func);

    return 0; 
} 
extern struct static_call_key __SCK__tp_func_subsys_event; 
extern typeof(__traceiter_subsys_event) __SCT__tp_func_subsys_event;
struct static_call_key __SCK__tp_func_subsys_event = { .func = __traceiter_subsys_event, };

DEFINE_TRACE的关键是定义了struct tracepoint __tracepoint_subsys_event变量,并对其进行初始化;同时它还定义了__traceiter_subsys_event,它主要完成对各个probe函数的回调

3.3 定义probe

本例中定义了如下的probe函数:

void my_subsys_event(void *__data, struct inode *inode, struct file *file)
{
        printk("inode: 0x%lx, file: 0x%lx\n", (unsigned long)inode, (unsigned long)file);
}

并通过显示调用register_trace_subsys_event来完成tracepoint与probe函数的绑定,注意probe函数要与DEFINE_TRACE和DECLARE_TRACE的参数类型个数保持一致

3.4 将 tracepoint与probe绑定

在初始化函数中执行如下,完成tracepoint与probe绑定

register_trace_subsys_event(my_subsys_event, NULL);

3.5 触发probe

通过trace_subsys_event可触发probe函数的执行

trace_subsys_event(inode, file);

运行结果如下:

# insmod tracepoint-sample.ko
# cat /proc/tracepoint-sample 
[78074.813467] inode: 0xffff000001967a70, file: 0xffff000007f466c0
;