2023年7月

Kretprobes

使用Kretprobes 监控内涵函数运行时间和返回值.

一个简单的例子

文件名: myretprobe.c

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
#include <linux/limits.h>
#include <linux/sched.h>

static char func_name[NAME_MAX] = "ksys_read";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
            " function's execution time");

/* per-instance private data */
struct my_data {
    ktime_t entry_stamp;
};

/* Here we use the entry_hanlder to timestamp function entry */
static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
    struct my_data *data;

    if (!current->mm)
        return 1;    /* Skip kernel threads */

    data = (struct my_data *)ri->data;
    data->entry_stamp = ktime_get();
    return 0;
}

/*
 * Return-probe handler: Log the return value and duration. Duration may turn
 * out to be zero consistently, depending upon the granularity of time
 * accounting on the platform.
 */
static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
    int retval = regs_return_value(regs);
    struct my_data *data = (struct my_data *)ri->data;
    s64 delta;
    ktime_t now;

    now = ktime_get();
    delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
    printk(KERN_INFO "%s returned %d and took %lld ns to execute\n",
            func_name, retval, (long long)delta);
    return 0;
}

static struct kretprobe my_kretprobe = {
    .handler        = ret_handler,
    .entry_handler        = entry_handler,
    .data_size        = sizeof(struct my_data),
    /* Probe up to 20 instances concurrently. */
    .maxactive        = 20,
};

static int __init kretprobe_init(void)
{
    int ret;

    my_kretprobe.kp.symbol_name = func_name;
    ret = register_kretprobe(&my_kretprobe);
    if (ret < 0) {
        printk(KERN_INFO "register_kretprobe failed, returned %d\n",
                ret);
        return -1;
    }
    printk(KERN_INFO "Planted return probe at %s: %p\n",
            my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
    return 0;
}

static void __exit kretprobe_exit(void)
{
    unregister_kretprobe(&my_kretprobe);
    printk(KERN_INFO "kretprobe at %p unregistered\n",
            my_kretprobe.kp.addr);

    /* nmissed > 0 suggests that maxactive was set too low. */
    printk(KERN_INFO "Missed probing %d instances of %s\n",
        my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
}

module_init(kretprobe_init)
module_exit(kretprobe_exit)
MODULE_LICENSE("GPL");

Makefile

obj-m += myretprobe.o

tag ?= `uname -r`
KDIR := /lib/modules/${tag}/build/

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

运行并查看结果

$ make all
$ sudo insmod myretprobe.ko

$ sudo rmmod myretprobe
$ tail -n 10 /var/log/syslog
Jul 13 00:27:54 supra kernel: [ 2367.855060] ksys_read returned -32 and took 234 ns to execute
Jul 13 00:27:54 supra kernel: [ 2367.855063] ksys_read returned -32 and took 190 ns to execute
Jul 13 00:27:54 supra kernel: [ 2367.855066] ksys_read returned -32 and took 191 ns to execute
Jul 13 00:27:54 supra kernel: [ 2367.855068] ksys_read returned -32 and took 189 ns to execute

统计返回值 histogram 的例子

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
#include <linux/limits.h>
#include <linux/sched.h>
#include <linux/init.h>

#define MY_ARRAY_SIZE 10
static uint my_array[MY_ARRAY_SIZE];

static char func_name[NAME_MAX] = "ksys_read";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
            " function's execution time");

static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
    int retval = regs_return_value(regs);
    if (retval < 0) {
        my_array[0]++;
    } else if (0 == retval) {
        my_array[1]++;
    } else if (retval < 20) {
        my_array[2]++;
    } else if (retval < 40) {
        my_array[3]++;
    } else if (retval < 80) {
        my_array[4]++;
    } else if (retval < 160) {
        my_array[5]++;
    } else if (retval < 320) {
        my_array[6]++;
    } else if (retval < 640) {
        my_array[7]++;
    } else if (retval < 1280) {
        my_array[8]++;
    } else {
        my_array[9]++;
    }
    //printk(KERN_INFO "%s returned %d \n", func_name, retval);
    return 0;
}

static struct kretprobe my_kretprobe = {
    .handler        = ret_handler,
    .maxactive        = 3,
};

static int __init kretprobe_init(void)
{
    int ret;

    memset(my_array, 0, sizeof(uint) * MY_ARRAY_SIZE);
    my_kretprobe.kp.symbol_name = func_name;
    ret = register_kretprobe(&my_kretprobe);
    if (ret < 0) {
        printk(KERN_INFO "register_kretprobe failed, returned %d\n",
                ret);
        return -1;
    }
    printk(KERN_INFO "Planted return probe at %s: %p\n",
            my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);
    return 0;
}

static void __exit kretprobe_exit(void)
{
    int i;
    unregister_kretprobe(&my_kretprobe);
    printk(KERN_INFO "kretprobe at %p unregistered\n",
            my_kretprobe.kp.addr);

    printk(KERN_INFO "my_array values:\n");
    for (i = 0; i < MY_ARRAY_SIZE; i++) {
        printk(KERN_INFO "my_array[%d]: %u\n", i, my_array[i]);
    }

    /* nmissed > 0 suggests that maxactive was set too low. */
    printk(KERN_INFO "Missed probing %d instances of %s\n",
        my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
}

module_init(kretprobe_init)
module_exit(kretprobe_exit)
MODULE_LICENSE("GPL");

使用Kretprobes 观测系统调用 read 字节数 并放到 proc 文件系统

下面使用 Kretprobes 观测系统调用 ksys_read() 的返回字节数, 并把这些数字做成 histogram 的形式放到 /proc/readpattern 去, 然后读这个文件.

源代码

文件名 dumpreadstat.c:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
#include <linux/limits.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/proc_fs.h>

#define MY_ARRAY_SIZE 10
#define MAX_STRING_LENGTH 16
static uint my_array[MY_ARRAY_SIZE];
static char desc_arr[MY_ARRAY_SIZE][MAX_STRING_LENGTH] = {
    "< 0        ",
    "= 0        ",
    "0 -> 20    ",
    "20 -> 40   ",
    "40 -> 80   ",
    "80 -> 160  ",
    "160 -> 320 ",
    "320 -> 640 ",
    "640 -> 1280",
    " > 1280    "
};

static int flag = 1;
static struct proc_dir_entry *proc_file;

static char func_name[NAME_MAX] = "ksys_read";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
            " function's execution time");

static int open_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "open proc\n");
    return 0;
}

static int release_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "release proc\n");
    return 0;
}

static ssize_t read_proc(struct file *filp, char __user *buffer, size_t length, loff_t *offset)
{
    int i;
    printk(KERN_ALERT "read proc\n");
    if (flag)
    {
        flag = 0;
    }
    else
    {
        flag = 1;
        return 0;
    }

    char output[1024];
    int offst = 15;
    int remaining = sizeof(output);
    snprintf(output, remaining, "bytes \t\t:count\n");
    remaining -= 15;
    for (i = 0; i < MY_ARRAY_SIZE; i++) {
        int ret;

        ret = snprintf(output + offst, remaining, "%s\t: %u\n", desc_arr[i], my_array[i]);
        if (ret < 0 || ret >= remaining) {
            printk(KERN_ERR "Failed to concatenate my_array values\n");
            return -EINVAL;
        }

        offst += ret;
        remaining -= ret;
    }

    printk(KERN_ALERT "%s", output);

    if (copy_to_user(buffer, output, offst))
    {
        printk(KERN_ERR "Data Send: Err!\n");
        return -EFAULT;
    }
    return strlen(output);
}

static ssize_t write_proc(struct file *filp, const char *buffer, size_t len, loff_t *off)
{
    printk(KERN_ALERT "write proc\n");
    return 0;
}

static const struct proc_ops proc_fops = {
    .proc_open = open_proc,
    .proc_read = read_proc,
    .proc_write = write_proc,
    .proc_release = release_proc,
};

static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
    int retval = regs_return_value(regs);
    if (retval < 0) {
        my_array[0]++;
    } else if (0 == retval) {
        my_array[1]++;
    } else if (retval < 20) {
        my_array[2]++;
    } else if (retval < 40) {
        my_array[3]++;
    } else if (retval < 80) {
        my_array[4]++;
    } else if (retval < 160) {
        my_array[5]++;
    } else if (retval < 320) {
        my_array[6]++;
    } else if (retval < 640) {
        my_array[7]++;
    } else if (retval < 1280) {
        my_array[8]++;
    } else {
        my_array[9]++;
    }
    //printk(KERN_INFO "%s returned %d \n", func_name, retval);
    return 0;
}

static struct kretprobe my_kretprobe = {
    .handler        = ret_handler,
    .maxactive        = 3,
};

static int __init kretprobe_init(void)
{
    int ret;

    memset(my_array, 0, sizeof(uint) * MY_ARRAY_SIZE);
    my_kretprobe.kp.symbol_name = func_name;
    ret = register_kretprobe(&my_kretprobe);
    if (ret < 0) {
        printk(KERN_INFO "register_kretprobe failed, returned %d\n",
                ret);
        return -1;
    }
    printk(KERN_INFO "Planted return probe at %s: %p\n",
            my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr);

    /* Create proc file under /proc/dumpprocmm */
    proc_file = proc_create("readpattern", 0666, NULL, &proc_fops);
    if (!proc_file) {
        printk(KERN_ERR "Failed to create proc file\n");
        return -ENOMEM;
    }

    return 0;
}

static void __exit kretprobe_exit(void)
{
    int i;
    unregister_kretprobe(&my_kretprobe);
    printk(KERN_INFO "kretprobe at %p unregistered\n",
            my_kretprobe.kp.addr);

    printk(KERN_INFO "my_array values:\n");
    for (i = 0; i < MY_ARRAY_SIZE; i++) {
        printk(KERN_INFO "my_array[%d]: %u\n", i, my_array[i]);
    }

    if (proc_file) {
        proc_remove(proc_file);
        printk(KERN_INFO "Removed /proc/%s file\n", "readpattern");
    }

    /* nmissed > 0 suggests that maxactive was set too low. */
    printk(KERN_INFO "Missed probing %d instances of %s\n",
        my_kretprobe.nmissed, my_kretprobe.kp.symbol_name);
}

module_init(kretprobe_init)
module_exit(kretprobe_exit)
MODULE_LICENSE("GPL");

Makefile

obj-m += dumpreadstat.o

tag ?= `uname -r`
KDIR := /lib/modules/${tag}/build/

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

执行并观测

$ make all
$ sudo insmod dumpreadstat.ko
$ cat /proc/readpattern
bytes         :count
< 0            : 1
= 0            : 27
0 -> 20        : 65
20 -> 40       : 24
40 -> 80       : 7
80 -> 160      : 8
160 -> 320     : 3
320 -> 640     : 2
640 -> 1280    : 11
 > 1280        : 10

# 卸载模块
$ sudo rmmod dumpreadstat

# 观测系统日志
$ tail -n 100 /var/log/syslog

Kprobes

Kprobes 允许开发者在内核函数的开始,结束,及任意偏移位置插入代码, 监视内核函数的执行, 并收集参数, 返回值,及运行时间等数据.

概念

  1. 有2种: kprobes, kretprobes
  2. 通常使用内核模块来注册 Kprobes, 在模块的 init 代码注册action handler, exit 代码注销;
  3. register_kprobe 注册在那个内核函数位置注入, 以及要注入的代码块;
  4. unregister_kprobe 用来注销;
  5. 可以批量注册/注销 Kprobes;
  6. 有些特定的内核函数属于 blacklist, 是不允许插入代码的;

    1. 可以 probe 的函数列表 /sys/kernel/tracing/available_filter_functions;
    2. 不可以 probe 的: inline functions & /sys/kernel/debug/kprobes/blacklist

简单例子

一个简单的kernel 模块来注册/注销 Kprobes. 传入不同的参数, 可以注入到不同的kernel 代码位置.
文件名 mykprobe.c

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>

#define MAX_SYMBOL_LEN    64
static char symbol[MAX_SYMBOL_LEN] = "vfs_write";
module_param_string(symbol, symbol, sizeof(symbol), 0644);

/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
    .symbol_name    = symbol,
};

#if defined(CONFIG_X86_64)
#define arg0(pt_regs)    ((pt_regs)->di)
#define arg1(pt_regs)    ((pt_regs)->si)
#define arg2(pt_regs)    ((pt_regs)->dx)
#define arg3(pt_regs)    ((pt_regs)->cx)
#define arg4(pt_regs)    ((pt_regs)->r8)
#define arg5(pt_regs)    ((pt_regs)->r9)
#elif defined(CONFIG_ARM64)
#define arg0(pt_regs)    ((pt_regs)->regs[0])
#define arg1(pt_regs)    ((pt_regs)->regs[1])
#define arg2(pt_regs)    ((pt_regs)->regs[2])
#define arg3(pt_regs)    ((pt_regs)->regs[3])
#define arg4(pt_regs)    ((pt_regs)->regs[4])
#define arg5(pt_regs)    ((pt_regs)->regs[5])
#define arg6(pt_regs)    ((pt_regs)->regs[6])
#define arg7(pt_regs)    ((pt_regs)->regs[7])
#else
#error "Unsupported architecture"
#endif


/* kprobe pre_handler: called just before the probed instruction is executed */
static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
{
    pr_info("<%s> p->addr = 0x%p, ip = %lx, flags = 0x%lx, count = %lu \n",
        p->symbol_name, p->addr, regs->ip, regs->flags, arg2(regs));

    /* A dump_stack() here will give a stack backtrace */
    return 0;
}

/* kprobe post_handler: called after the probed instruction is executed */
static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
                unsigned long flags)
{
    pr_info("<%s> p->addr = 0x%p, flags = 0x%lx\n",
        p->symbol_name, p->addr, regs->flags);
}

static int __init kprobe_init(void)
{
    int ret;
    kp.pre_handler = handler_pre;
    kp.post_handler = handler_post;

    ret = register_kprobe(&kp);
    if (ret < 0) {
        pr_err("register_kprobe failed, returned %d\n", ret);
        return ret;
    }
    pr_info("Planted kprobe at %p\n", kp.addr);
    return 0;
}

static void __exit kprobe_exit(void)
{
    unregister_kprobe(&kp);
    pr_info("kprobe at %p unregistered\n", kp.addr);
}

module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");

同目录的 Makefile 文件:

obj-m += kprobe.o

tag ?= `uname -r`
KDIR := /lib/modules/${tag}/build/

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

编译并加载模块

$ make all
$ sudo insmod cprobe.ko

查看结果

$ tail -f -n 10 /var/log/syslog
Jul 12 11:46:18 supra kernel: [64756.049707] <vfs_write> p->addr = 0x000000007d821bae, flags = 0x293
Jul 12 11:46:18 supra kernel: [64756.049736] <vfs_write> p->addr = 0x000000007d821bae, ip = ffffffffafd7b621, flags = 0x293, count = 478
````

### 卸载模块

$ sudo rmmod kprobe


# kprobe 结构体定义
参见: https://github.com/torvalds/linux/blob/master/include/linux/kprobes.h#L60

struct kprobe {

struct hlist_node hlist;
struct list_head list;
unsigned long nmissed;
kprobe_opcode_t *addr;
const char *symbol_name;
unsigned int offset;
kprobe_pre_handler_t pre_handler;
kprobe_post_handler_t post_handler;
kprobe_opcode_t opcode;
struct arch_specific_insn ainsn;
u32 flags;

};


# 定义插入位置
1. 通过 `symbol_name`;
2. 通过 `addr`:
  要么通过 `symbol_name` 要么通过 `addr`. 上面的例子中使用 `symbol_name`, 如果要替换成 `addr`, 方法如下:
  1. 通过查找 `/proc/kallsyms` 定位地址:
cat /proc/kallsyms | grep vfs_write
ffffffffafd7b620 T vfs_write
```
  1. 替换结构体 .addr = ffffffffafd7b620
  1. 通过 (symbol_name | addr) + offset

    .addr = ffffffffafd7b620,
    .offset = 5,

how it works

如下图, 把指定位置处的指令替换成 trap(0x03), 然后引导到新构建的代码块, 里面包含 pre_handler, 原指令,post_handler.
trap.png

其它

Kprobes events

写一个包含 Kprobes 注册/注销的模块稍微有点复杂, 使用 Kprobes events 就像对简单一些. 它们类似基于 tracepoint 的events, 但是使用 Kprobes 实现的, 可以动态的添加删除.

概念

  1. 要使用此功能, 编译 Kernel的时候, 必须 CONFIG_KPROBE_EVENTS=y.;
  2. 类似其它 event tracer, 不需要通过 current_tracer 激活;
  3. 通过 /sys/kernel/tracing/kprobe_events/sys/kernel/tracing/dynamic_events 添加 Kprobes events;
  4. 通过 /sys/kernel/tracing/events/kprobes/<EVENT>/enable 开启 events;
  5. /sys/kernel/tracing/kprobe_profile 统计 hit 多少, miss 多少.

一个简单的例子

下面的例子使用探测 uptime_proc_show 函数, 建立以 trace_sample 为event 名字的 Kprobes event.

可以看到动态添加这个 Kprobes events 之后, 对应的文件夹被建立了.

并且不需要设置 current_tracer.

# 保证环境正确
$ echo 0 > /sys/kernel/tracing/tracing_on
$ echo "" > /sys/kernel/tracing/trace

# 设置 Kprobes events
$ echo "p:trace_sample uptime_proc_show" > /sys/kernel/debug/tracing/kprobe_events

# 查看新生成的 Kprobes 文件夹
$ ls /sys/kernel/tracing/events/kprobes/trace_sample/
enable  filter  format  hist  id  inject  trigger

$ echo 1 > /sys/kernel/tracing/events/kprobes/trace_sample/enable

# 开启 probe 
$ echo 1 > /sys/kernel/tracing/tracing_on

# 执行包含该函数的命令
$ uptime

# 查看结果
$ cat /sys/kernel/tracing/trace
# tracer: nop
#
# entries-in-buffer/entries-written: 1/1   #P:8
#
#                                _-----=> irqs-off
#                               / _----=> need-resched
#                              | / _---=> hardirq/softirq
#                              || / _--=> preempt-depth
#                              ||| / _-=> migrate-disable
#                              |||| /     delay
#           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
#              | |         |   |||||     |         |
          uptime-40063   [007] ..... 67233.823241: trace_sample: (uptime_proc_show+0x0/0x1d0)

# 清理
$ echo 0 > /sys/kernel/tracing/tracing_on
$ echo 0 > /sys/kernel/tracing/events/kprobes/trace_sample/enable
$ echo -:trace_sample > /sys/kernel/debug/tracing/kprobe_events

查看 ksys_read 系统调用返回值的例子

下面的内容都放到 test.sh 文件里, 然后执行 sh test.sh

echo $$

# 准备
echo 0 > /sys/kernel/tracing/tracing_on
echo "" > /sys/kernel/tracing/trace

# 注册事件
echo "r:myreturnprobe ksys_read $retval" > /sys/kernel/debug/tracing/kprobe_events
echo $$ > /sys/kernel/debug/tracing/set_event_pid

#开启
echo 1 > /sys/kernel/tracing/events/kprobes/myreturnprobe/enable
echo 1 > /sys/kernel/tracing/tracing_on

# 等一会, 然后查看结果, 命令替换同一个进程读
sleep 5
output=$(cat /tmp/test.sh > /dev/null)

# 关闭
echo 0 > /sys/kernel/tracing/tracing_on

# 查看结果
cat /sys/kernel/tracing/trace

# 清理
echo 0 > /sys/kernel/tracing/events/kprobes/myreturnprobe/enable
echo -:myreturnprobe > /sys/kernel/debug/tracing/kprobe_events 2>/dev/null

echo "done"

Kprobes events 格式

更多说明在这里: https://docs.kernel.org/trace/kprobetrace.html

 p[:[GRP/][EVENT]] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS]        : Set a probe
 r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]SYM[+0] [FETCHARGS]        : Set a return probe
 p[:[GRP/][EVENT]] [MOD:]SYM[+0]%return [FETCHARGS]            : Set a return probe
 -:[GRP/][EVENT]                                               : Clear a probe

参考:
https://docs.kernel.org/trace/kprobetrace.html

几个Linux 内核模块的例子

本文写几个Linux 内核模块的例子.

系列:

  1. 写一个 Linux 内核 hello world 模块
  2. 写一个有参数的 Linux 内核模块
  3. 写一个有依赖的Linux 内核模块

打印系统进程的 kernel 模块

文件名: printthread.c

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/oom.h>

static int __init tprocs_init(void)
{
    struct task_struct *p, *t;
    pr_info("print_threads: tid, pid, command, state\n\n");

    for_each_process_thread(p, t) {
            pr_info("tgid=%d, thread_pid=%d, parent_pid=%d, comm=%s, state=%d\n",
                    t->tgid, t->pid, t->real_parent->pid, t->comm, READ_ONCE(t->__state));
    }
    return 0;
}

static void __exit tprocs_exit(void)
{
        pr_info("print_threads has left the building...\n");
}

module_init(tprocs_init);
module_exit(tprocs_exit);
MODULE_LICENSE("GPL v2");

打印当前进程及它的子进程

文件名: list_children.c

#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/oom.h>

void list_children(struct task_struct *parent) {
    struct task_struct *child;
    struct list_head *list;

    // Iterate through the list of children
    list_for_each(list, &parent->children) {
        child = list_entry(list, struct task_struct, sibling);

        // Print information about the child process
        printk(KERN_INFO "Child process: %s [%d]\n", child->comm, child->pid);
    }
}

static int __init pprocs_init(void)
{
    struct task_struct *p;
    pr_info("print_procs: pid, command, state\n\n");

    for_each_process(p) {
            pr_info("pid=%d, comm=%s, state=%d\n",
                    p->pid, p->comm, READ_ONCE(p->__state));
        list_children(p);
    }
    return 0;
}

static void __exit pprocs_exit(void)
{
        pr_info("print_procs has left the building...\n");
}

module_init(pprocs_init);
module_exit(pprocs_exit);
MODULE_LICENSE("GPL v2");

dump 某个进程的内存信息

文件名 dumpprocmm.c

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/types.h>
#include <linux/kstrtox.h>
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/mm_types_task.h>
#include <linux/mm.h>
#include <linux/pid.h>
#include <linux/slab.h>

static struct proc_dir_entry *parent;
static int flag1 = 1;
static int flag2 = 1;
char buff_array[32] = "123";
char mm_array[500] = "initial value\n";

static int open_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "Open dump_something.....\n");
    return 0;
}

static int release_proc(struct inode *inode, struct file *file)
{
    printk(KERN_ALERT "Close dump_something.....\n");
    return 0;
}

static ssize_t read_proc1(struct file *filp, char __user *buffer, size_t length, loff_t *offset)
{
    printk(KERN_ALERT "Read dump_something.....\n");
    if (flag1)
    {
        flag1 = 0;
        printk(KERN_ALERT "flag1 is 0 \n");
    }
    else
    {
        flag1 = 1;
        printk(KERN_ALERT "flag1 is 1, now to 0, return 0 \n");
        return 0;
    }

    if (copy_to_user(buffer, buff_array, strlen(buff_array)))
    {
        printk(KERN_ERR "Data Send: Err!\n");
        return -EFAULT;
    }
    printk(KERN_ALERT "return strlen is %zu \n", strlen(buff_array));
    return strlen(buff_array);
}

static ssize_t read_proc2(struct file *filp, char __user *buffer, size_t length, loff_t *offset)
{
    printk(KERN_ALERT "Read dump_something.....\n");
    if (flag2)
    {
        flag2 = 0;
        printk(KERN_ALERT "2 flag2 is 0 \n");
    }
    else
    {
        flag2 = 1;
        printk(KERN_ALERT "2 flag2 is 1, return 0 \n");
        return 0;
    }

    if (copy_to_user(buffer, mm_array, strlen(mm_array)))
    {
        printk(KERN_ERR "Data Send: Err!\n");
        return -EFAULT;
    }
    printk(KERN_ALERT "2 return strlen is %zu \n", strlen(buff_array));
    return strlen(mm_array);
}

static struct task_struct *get_task_struct_from_pid(const char *pid_buffer)
{
    pid_t pid;
    struct task_struct *task = NULL;

    // Convert the PID string to an integer
    if (kstrtoint(pid_buffer, 10, &pid) != 0)
    {
        printk(KERN_ERR "Invalid PID: %s\n", pid_buffer);
        return NULL;
    }

    // Get the task_struct pointer from the PID
    task = pid_task(find_vpid(pid), PIDTYPE_PID);
    if (task == NULL)
    {
        printk(KERN_ERR "Process with PID %d not found\n", pid);
        return NULL;
    }

    return task;
}

static void get_memory_information(struct mm_struct *mm, char *info_buffer)
{
    unsigned long total_vm = mm->total_vm;
    unsigned long anon = get_mm_counter(mm, MM_ANONPAGES);
    unsigned long file = get_mm_counter(mm, MM_FILEPAGES);
    unsigned long swap = get_mm_counter(mm, MM_SWAPENTS);

    snprintf(info_buffer, 400, "Total virtual memory: %lu kB\nRssAnon: %lu kB\nRssFile: %lu kB\nVmSwap: %lu kB\n",
             total_vm << (PAGE_SHIFT - 10), anon << (PAGE_SHIFT - 10), file << (PAGE_SHIFT - 10), swap << (PAGE_SHIFT - 10));
}

static ssize_t write_proc1(struct file *filp, const char *buffer, size_t len, loff_t *off)
{
    printk(KERN_ALERT "try to write to pid file.....\n");

    if (len >= sizeof(buff_array))
    {
        printk(KERN_ERR "Invalid PID: Length exceeds buffer size\n");
        return -EINVAL;
    }

    if (copy_from_user(buff_array, buffer, len))
    {
        printk(KERN_ERR "Data Write: Err!\n");
        return -EFAULT;
    }
    buff_array[len] = '\0';

    struct task_struct *task = get_task_struct_from_pid(buff_array);
    if (!task)
        return -EINVAL;

    struct mm_struct *mm = task->mm;

    // Check if mm_struct is present
    if (mm != NULL)
    {
        // Access memory information
        unsigned long rss = get_mm_rss(mm);

        // Print memory information
        printk(KERN_INFO "Process memory information:\n");
        printk(KERN_INFO "Resident set size (RSS): %lu\n", rss);

        get_memory_information(mm, mm_array);
    }

    return len;
}

static ssize_t write_proc2(struct file *filp, const char *buffer, size_t len, loff_t *off)
{
    printk(KERN_ALERT "try to write to dump_mm file.....\n");
    return 0;
}

static const struct proc_ops proc_fops1 = {
    .proc_open = open_proc,
    .proc_read = read_proc1,
    .proc_write = write_proc1,
    .proc_release = release_proc,
};

static const struct proc_ops proc_fops2 = {
    .proc_open = open_proc,
    .proc_read = read_proc2,
    .proc_write = write_proc2,
    .proc_release = release_proc,
};

static int __init hello_init(void)
{
    printk(KERN_ALERT "Hello !\n");

    /* Create proc directory under /proc */
    parent = proc_mkdir("dumpprocmm", NULL);

    if (NULL == parent)
    {
        printk(KERN_ERR "Failed creating proc entry dumpprocmm");
        return -ENOMEM;
    }

    /* Create proc file under /proc/dumpprocmm */
    proc_create("pid", 0666, parent, &proc_fops1);
    proc_create("dump_mm", 0666, parent, &proc_fops2);

    return 0;
}

static void __exit hello_exit(void)
{
    proc_remove(parent);
    printk(KERN_ALERT "Goodbye !\n");
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_DESCRIPTION("dump process memory");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Tian");

执行

$ make all
$ sudo insmod dumpprocmm.ko
$ echo "755" > /proc/dumpprocmm/pid
$ cat /proc/dumpprocmm/pid
$ cat /proc/dumpprocmm/dump_mm