Skip to content

Instantly share code, notes, and snippets.

@harrisonturton
Last active May 27, 2024 01:48
Show Gist options
  • Save harrisonturton/abecaf00f3c3b35b7aa3881f6937990b to your computer and use it in GitHub Desktop.
Save harrisonturton/abecaf00f3c3b35b7aa3881f6937990b to your computer and use it in GitHub Desktop.
Read a file with io_uring
#include <errno.h>
#include <fcntl.h>
#include <linux/io_uring.h>
#include <signal.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
#define RING_ENTRIES 256
#define BUFLEN 8192 // 8KiB
/**
* Submission queue ring. Mmapped from the kernel.
*/
struct sq_ring {
uint32_t *head;
uint32_t *tail;
uint32_t *ring_mask;
uint32_t *ring_entries;
uint32_t *flags;
uint32_t *dropped;
uint32_t *array;
};
/**
* Completion queue ring. Mmapped from the kernel.
*/
struct cq_ring {
uint32_t *head;
uint32_t *tail;
uint32_t *ring_mask;
uint32_t *ring_entries;
uint32_t *flags;
uint32_t *overflow;
struct io_uring_cqe *cqes;
};
struct uring {
unsigned int fd;
struct sq_ring sq_ring;
struct cq_ring cq_ring;
struct io_uring_sqe *sqes;
};
/**
* Execute the `io_uring_setup` syscall.
*/
static inline int io_uring_setup(unsigned int entries,
struct io_uring_params *params)
{
int ret = syscall(__NR_io_uring_setup, entries, params);
return ret < 0 ? -errno : ret;
}
/**
* Execute the `io_uring_enter` syscall.
*/
static inline int io_uring_enter(unsigned int fd, unsigned int to_submit,
unsigned int min_complete, unsigned int flags,
sigset_t sig)
{
int ret = syscall(__NR_io_uring_enter, fd, to_submit, min_complete, flags,
sig, _NSIG / 8);
return ret < 0 ? -errno : ret;
}
/**
* Setup the submission queue ring and mmap it from kernel space.
*/
int sq_ring_setup(int ringfd, const struct io_uring_params *params,
struct sq_ring *sq_ring)
{
size_t len;
void *ptr;
len = params->sq_off.array + params->sq_entries * sizeof(uint32_t);
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
ringfd, IORING_OFF_SQ_RING);
if (ptr == MAP_FAILED)
return -1;
sq_ring->head = ptr + params->sq_off.head;
sq_ring->tail = ptr + params->sq_off.tail;
sq_ring->ring_mask = ptr + params->sq_off.ring_mask;
sq_ring->ring_entries = ptr + params->sq_off.ring_entries;
sq_ring->flags = ptr + params->sq_off.flags;
sq_ring->dropped = ptr + params->sq_off.dropped;
sq_ring->array = ptr + params->sq_off.array;
return 0;
}
/**
* Setup the completion queue ring and mmap it from kernel space.
*/
int cq_ring_setup(int ringfd, const struct io_uring_params *params,
struct cq_ring *cq_ring)
{
size_t len;
void *ptr;
len = params->cq_off.cqes + params->cq_entries * sizeof(struct io_uring_cqe);
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
ringfd, IORING_OFF_CQ_RING);
if (ptr == MAP_FAILED)
return -1;
cq_ring->head = ptr + params->cq_off.head;
cq_ring->tail = ptr + params->cq_off.tail;
cq_ring->ring_mask = ptr + params->cq_off.ring_mask;
cq_ring->ring_entries = ptr + params->cq_off.ring_entries;
cq_ring->overflow = ptr + params->cq_off.overflow;
cq_ring->cqes = ptr + params->cq_off.cqes;
cq_ring->flags = ptr + params->cq_off.flags;
return 0;
}
/**
* Setup the SQE buffer and mmap it from kernel space.
*/
int sqe_buffer_setup(int ringfd, const struct io_uring_params *params,
struct io_uring_sqe **sqes)
{
size_t len;
void *ptr;
len = params->sq_entries * sizeof(struct io_uring_sqe);
ptr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
ringfd, IORING_OFF_SQES);
if (ptr == MAP_FAILED)
return -1;
*sqes = ptr;
return 0;
}
/**
* Construct a READV request to read `buflen` bytes from `filefd` into `buf`.
*/
void prep_readv_sqe(struct io_uring_sqe *sqe, unsigned int filefd, char *buf,
size_t buflen)
{
sqe->opcode = IORING_OP_READ;
sqe->fd = filefd;
sqe->addr = (unsigned long)buf;
sqe->len = buflen;
}
/**
* Submit an arbitrary SQE to the uring and wait for it to complete.
*/
int submit_sqe(struct uring *uring, struct io_uring_sqe *sqe_to_submit)
{
uint32_t curr_tail = *uring->sq_ring.tail;
uint32_t next_tail = curr_tail + 1;
uint32_t index = curr_tail & *uring->sq_ring.ring_mask;
// Copy the SQE into the SQE buffer. This could be constructed in place, but
// the copy is fine for this example.
struct io_uring_sqe *sqe = &uring->sqes[index];
memcpy(sqe, sqe_to_submit, sizeof(struct io_uring_sqe));
uring->sq_ring.array[index] = index;
*uring->sq_ring.tail = next_tail;
__sync_synchronize();
sigset_t sigset;
sigemptyset(&sigset);
if (io_uring_enter(uring->fd, 1, 1, IORING_ENTER_GETEVENTS, sigset) < 0) {
return -1;
}
return 0;
}
/**
* Read an arbitrary CQE from the uring.
*/
int recv_cqe(struct uring *uring, struct io_uring_cqe *cqe)
{
unsigned int head = 0;
do {
__sync_synchronize();
if (head == *uring->cq_ring.tail) {
fprintf(stderr, "Tried to read empty completion ring\n");
break;
}
cqe = &uring->cq_ring.cqes[head & (*uring->cq_ring.ring_mask)];
head++;
} while (1);
*uring->cq_ring.head = head;
__sync_synchronize();
return 0;
}
int main(int argc, char *argv[])
{
// Read the file
unsigned int fd;
char *pathname;
if (argc < 2) {
fprintf(stderr, "Usage: %s [filename]\n", argv[0]);
exit(1);
}
pathname = argv[1];
fd = open(pathname, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Failed to open file\n");
exit(1);
}
printf("Reading file with fd %d\n", fd);
// Setup the uring
struct io_uring_params *params;
int ringfd;
params = calloc(1, sizeof(struct io_uring_params));
if (!params) {
fprintf(stderr, "Failed to allocate memory for params: %s\n",
strerror(-errno));
exit(1);
}
ringfd = io_uring_setup(RING_ENTRIES, params);
if (ringfd < 0) {
fprintf(stderr, "Failed to create uring: %s\n", strerror(-ringfd));
exit(1);
}
struct uring uring = {
.fd = ringfd,
};
if (sq_ring_setup(ringfd, params, &uring.sq_ring) < 0) {
fprintf(stderr, "Failed to setup sq_ring\n");
exit(1);
}
if (cq_ring_setup(ringfd, params, &uring.cq_ring) < 0) {
fprintf(stderr, "Failed to setup cq_ring\n");
exit(1);
}
if (sqe_buffer_setup(ringfd, params, &uring.sqes) < 0) {
fprintf(stderr, "Failed to setup sqe buffer\n");
exit(1);
}
// Create and execute the READV operation
size_t buflen = BUFLEN;
char buf[buflen];
struct io_uring_sqe sqe = {};
struct io_uring_cqe cqe = {};
prep_readv_sqe(&sqe, fd, buf, buflen);
if (submit_sqe(&uring, &sqe) < 0) {
fprintf(stderr, "Failed to submit read sqe\n");
exit(1);
}
if (recv_cqe(&uring, &cqe) < 0) {
fprintf(stderr, "Failed to receive read cqe\n");
exit(1);
}
if (cqe.res < 0) {
fprintf(stderr, "Received CQE with error: %s\n", strerror(cqe.res));
exit(1);
}
printf("Received: %s\n", buf);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment