/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ /* * Header file for the io_uring interface. * * Copyright (C) 2019 Jens Axboe * Copyright (C) 2019 Christoph Hellwig */ #ifndef LINUX_IO_URING_H #define LINUX_IO_URING_H #include <linux/fs.h> #include <linux/types.h> /* * this file is shared with liburing and that has to autodetect * if linux/time_types.h is available or not, it can * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H * if linux/time_types.h is not available */ #ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H #include <linux/time_types.h> #endif #ifdef __cplusplus extern "C" { #endif /* * IO submission data structure (Submission Queue Entry) */ struct io_uring_sqe { … }; /* * If sqe->file_index is set to this for opcodes that instantiate a new * direct descriptor (like openat/openat2/accept), then io_uring will allocate * an available direct descriptor instead of having the application pass one * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE * if the space is full. */ #define IORING_FILE_INDEX_ALLOC … enum io_uring_sqe_flags_bit { … }; /* * sqe->flags */ /* use fixed fileset */ #define IOSQE_FIXED_FILE … /* issue after inflight IO */ #define IOSQE_IO_DRAIN … /* links next sqe */ #define IOSQE_IO_LINK … /* like LINK, but stronger */ #define IOSQE_IO_HARDLINK … /* always go async */ #define IOSQE_ASYNC … /* select buffer from sqe->buf_group */ #define IOSQE_BUFFER_SELECT … /* don't post CQE if request succeeded */ #define IOSQE_CQE_SKIP_SUCCESS … /* * io_uring_setup() flags */ #define IORING_SETUP_IOPOLL … #define IORING_SETUP_SQPOLL … #define IORING_SETUP_SQ_AFF … #define IORING_SETUP_CQSIZE … #define IORING_SETUP_CLAMP … #define IORING_SETUP_ATTACH_WQ … #define IORING_SETUP_R_DISABLED … #define IORING_SETUP_SUBMIT_ALL … /* * Cooperative task running. When requests complete, they often require * forcing the submitter to transition to the kernel to complete. If this * flag is set, work will be done when the task transitions anyway, rather * than force an inter-processor interrupt reschedule. This avoids interrupting * a task running in userspace, and saves an IPI. */ #define IORING_SETUP_COOP_TASKRUN … /* * If COOP_TASKRUN is set, get notified if task work is available for * running and a kernel transition would be needed to run it. This sets * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. */ #define IORING_SETUP_TASKRUN_FLAG … #define IORING_SETUP_SQE128 … #define IORING_SETUP_CQE32 … /* * Only one task is allowed to submit requests */ #define IORING_SETUP_SINGLE_ISSUER … /* * Defer running task work to get events. * Rather than running bits of task work whenever the task transitions * try to do it just before it is needed. */ #define IORING_SETUP_DEFER_TASKRUN … /* * Application provides the memory for the rings */ #define IORING_SETUP_NO_MMAP … /* * Register the ring fd in itself for use with * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather * than an fd. */ #define IORING_SETUP_REGISTERED_FD_ONLY … /* * Removes indirection through the SQ index array. */ #define IORING_SETUP_NO_SQARRAY … enum io_uring_op { … }; /* * sqe->uring_cmd_flags top 8bits aren't available for userspace * IORING_URING_CMD_FIXED use registered buffer; pass this flag * along with setting sqe->buf_index. */ #define IORING_URING_CMD_FIXED … #define IORING_URING_CMD_MASK … /* * sqe->fsync_flags */ #define IORING_FSYNC_DATASYNC … /* * sqe->timeout_flags */ #define IORING_TIMEOUT_ABS … #define IORING_TIMEOUT_UPDATE … #define IORING_TIMEOUT_BOOTTIME … #define IORING_TIMEOUT_REALTIME … #define IORING_LINK_TIMEOUT_UPDATE … #define IORING_TIMEOUT_ETIME_SUCCESS … #define IORING_TIMEOUT_MULTISHOT … #define IORING_TIMEOUT_CLOCK_MASK … #define IORING_TIMEOUT_UPDATE_MASK … /* * sqe->splice_flags * extends splice(2) flags */ #define SPLICE_F_FD_IN_FIXED … /* * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the * command flags for POLL_ADD are stored in sqe->len. * * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if * the poll handler will continue to report * CQEs on behalf of the same SQE. * * IORING_POLL_UPDATE Update existing poll request, matching * sqe->addr as the old user_data field. * * IORING_POLL_LEVEL Level triggered poll. */ #define IORING_POLL_ADD_MULTI … #define IORING_POLL_UPDATE_EVENTS … #define IORING_POLL_UPDATE_USER_DATA … #define IORING_POLL_ADD_LEVEL … /* * ASYNC_CANCEL flags. * * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the * request 'user_data' * IORING_ASYNC_CANCEL_ANY Match any request * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key * IORING_ASYNC_CANCEL_OP Match request based on opcode */ #define IORING_ASYNC_CANCEL_ALL … #define IORING_ASYNC_CANCEL_FD … #define IORING_ASYNC_CANCEL_ANY … #define IORING_ASYNC_CANCEL_FD_FIXED … #define IORING_ASYNC_CANCEL_USERDATA … #define IORING_ASYNC_CANCEL_OP … /* * send/sendmsg and recv/recvmsg flags (sqe->ioprio) * * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send * or receive and arm poll if that yields an * -EAGAIN result, arm poll upfront and skip * the initial transfer attempt. * * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if * the handler will continue to report * CQEs on behalf of the same SQE. * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. * * IORING_SEND_ZC_REPORT_USAGE * If set, SEND[MSG]_ZC should report * the zerocopy usage in cqe.res * for the IORING_CQE_F_NOTIF cqe. * 0 is reported if zerocopy was actually possible. * IORING_NOTIF_USAGE_ZC_COPIED if data was copied * (at least partially). * * IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send or * recv will grab as many buffers from the buffer * group ID given and send them all. The completion * result will be the number of buffers send, with * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers * will be contigious from the starting buffer ID. */ #define IORING_RECVSEND_POLL_FIRST … #define IORING_RECV_MULTISHOT … #define IORING_RECVSEND_FIXED_BUF … #define IORING_SEND_ZC_REPORT_USAGE … #define IORING_RECVSEND_BUNDLE … /* * cqe.res for IORING_CQE_F_NOTIF if * IORING_SEND_ZC_REPORT_USAGE was requested * * It should be treated as a flag, all other * bits of cqe.res should be treated as reserved! */ #define IORING_NOTIF_USAGE_ZC_COPIED … /* * accept flags stored in sqe->ioprio */ #define IORING_ACCEPT_MULTISHOT … #define IORING_ACCEPT_DONTWAIT … #define IORING_ACCEPT_POLL_FIRST … /* * IORING_OP_MSG_RING command types, stored in sqe->addr */ enum io_uring_msg_ring_flags { … }; /* * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) * * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not * applicable for IORING_MSG_DATA, obviously. */ #define IORING_MSG_RING_CQE_SKIP … /* Pass through the flags from sqe->file_index to cqe->flags */ #define IORING_MSG_RING_FLAGS_PASS … /* * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags) * * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC */ #define IORING_FIXED_FD_NO_CLOEXEC … /* * IORING_OP_NOP flags (sqe->nop_flags) * * IORING_NOP_INJECT_RESULT Inject result from sqe->result */ #define IORING_NOP_INJECT_RESULT … /* * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { … }; /* * cqe->flags * * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct * them from sends. * IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion will get * more completions. In other words, the buffer is being * partially consumed, and will be used by the kernel for * more completions. This is only set for buffers used via * the incremental buffer consumption, as provided by * a ring buffer setup with IOU_PBUF_RING_INC. For any * other provided buffer type, all completions with a * buffer passed back is automatically returned to the * application. */ #define IORING_CQE_F_BUFFER … #define IORING_CQE_F_MORE … #define IORING_CQE_F_SOCK_NONEMPTY … #define IORING_CQE_F_NOTIF … #define IORING_CQE_F_BUF_MORE … #define IORING_CQE_BUFFER_SHIFT … /* * Magic offsets for the application to mmap the data it needs */ #define IORING_OFF_SQ_RING … #define IORING_OFF_CQ_RING … #define IORING_OFF_SQES … #define IORING_OFF_PBUF_RING … #define IORING_OFF_PBUF_SHIFT … #define IORING_OFF_MMAP_MASK … /* * Filled with the offset for mmap(2) */ struct io_sqring_offsets { … }; /* * sq_ring->flags */ #define IORING_SQ_NEED_WAKEUP … #define IORING_SQ_CQ_OVERFLOW … #define IORING_SQ_TASKRUN … struct io_cqring_offsets { … }; /* * cq_ring->flags */ /* disable eventfd notifications */ #define IORING_CQ_EVENTFD_DISABLED … /* * io_uring_enter(2) flags */ #define IORING_ENTER_GETEVENTS … #define IORING_ENTER_SQ_WAKEUP … #define IORING_ENTER_SQ_WAIT … #define IORING_ENTER_EXT_ARG … #define IORING_ENTER_REGISTERED_RING … #define IORING_ENTER_ABS_TIMER … /* * Passed in for io_uring_setup(2). Copied back with updated info on success */ struct io_uring_params { … }; /* * io_uring_params->features flags */ #define IORING_FEAT_SINGLE_MMAP … #define IORING_FEAT_NODROP … #define IORING_FEAT_SUBMIT_STABLE … #define IORING_FEAT_RW_CUR_POS … #define IORING_FEAT_CUR_PERSONALITY … #define IORING_FEAT_FAST_POLL … #define IORING_FEAT_POLL_32BITS … #define IORING_FEAT_SQPOLL_NONFIXED … #define IORING_FEAT_EXT_ARG … #define IORING_FEAT_NATIVE_WORKERS … #define IORING_FEAT_RSRC_TAGS … #define IORING_FEAT_CQE_SKIP … #define IORING_FEAT_LINKED_FILE … #define IORING_FEAT_REG_REG_RING … #define IORING_FEAT_RECVSEND_BUNDLE … #define IORING_FEAT_MIN_TIMEOUT … /* * io_uring_register(2) opcodes and arguments */ enum io_uring_register_op { … }; /* io-wq worker categories */ enum io_wq_type { … }; /* deprecated, see struct io_uring_rsrc_update */ struct io_uring_files_update { … }; /* * Register a fully sparse file space, rather than pass in an array of all * -1 file descriptors. */ #define IORING_RSRC_REGISTER_SPARSE … struct io_uring_rsrc_register { … }; struct io_uring_rsrc_update { … }; struct io_uring_rsrc_update2 { … }; /* Skip updating fd indexes set to this value in the fd table */ #define IORING_REGISTER_FILES_SKIP … #define IO_URING_OP_SUPPORTED … struct io_uring_probe_op { … }; struct io_uring_probe { … }; struct io_uring_restriction { … }; struct io_uring_clock_register { … }; enum { … }; struct io_uring_clone_buffers { … }; struct io_uring_buf { … }; struct io_uring_buf_ring { … }; /* * Flags for IORING_REGISTER_PBUF_RING. * * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. * The application must not set a ring_addr in struct * io_uring_buf_reg, instead it must subsequently call * mmap(2) with the offset set as: * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) * to get a virtual mapping for the ring. * IOU_PBUF_RING_INC: If set, buffers consumed from this buffer ring can be * consumed incrementally. Normally one (or more) buffers * are fully consumed. With incremental consumptions, it's * feasible to register big ranges of buffers, and each * use of it will consume only as much as it needs. This * requires that both the kernel and application keep * track of where the current read/recv index is at. */ enum io_uring_register_pbuf_ring_flags { … }; /* argument for IORING_(UN)REGISTER_PBUF_RING */ struct io_uring_buf_reg { … }; /* argument for IORING_REGISTER_PBUF_STATUS */ struct io_uring_buf_status { … }; /* argument for IORING_(UN)REGISTER_NAPI */ struct io_uring_napi { … }; /* * io_uring_restriction->opcode values */ enum io_uring_register_restriction_op { … }; struct io_uring_getevents_arg { … }; /* * Argument for IORING_REGISTER_SYNC_CANCEL */ struct io_uring_sync_cancel_reg { … }; /* * Argument for IORING_REGISTER_FILE_ALLOC_RANGE * The range is specified as [off, off + len) */ struct io_uring_file_index_range { … }; struct io_uring_recvmsg_out { … }; /* * Argument for IORING_OP_URING_CMD when file is a socket */ enum io_uring_socket_op { … }; #ifdef __cplusplus } #endif #endif