diff --git a/configure.ac b/configure.ac index e0ae5df3f..6a804fb79 100644 --- a/configure.ac +++ b/configure.ac @@ -966,6 +966,27 @@ fi AM_CONDITIONAL([AIO], [[[[ $aio = yes ]]]]) +dnl +dnl Linux io_uring support +dnl + +AM_COND_IF(LINUX, +[ + AC_ARG_ENABLE(io_uring, AC_HELP_STRING([--disable-io_uring], [Disable kernel io_uring support]), + [ + io_uring=$enableval + ], [ + io_uring="$ac_cv_header_linux_io_uring_h" + ]) +]) + +AM_CONDITIONAL([IOU], [[[[ $io_uring = yes ]]]]) + +AM_COND_IF([IOU], +[ + IRCD_DEFINE(USE_IOU, [1], [Linux io_uring is supported and may be used]) +]) + dnl *************************************************************************** dnl @@ -1900,6 +1921,7 @@ echo "Crypto support .................... $have_crypto" echo "Magic support ..................... $have_magic" echo "ImageMagick support ............... $have_imagemagick" echo "Linux AIO support ................. $aio" +echo "Linux io_uring support ............ $io_uring" echo "IPv6 support ...................... $ipv6" echo "Precompiled headers ............... $build_pch" echo "Developer debug ................... $debug" diff --git a/construct/construct.cc b/construct/construct.cc index 888ab1a75..585f9c938 100644 --- a/construct/construct.cc +++ b/construct/construct.cc @@ -27,6 +27,7 @@ bool pitrecdb; bool nojs; bool nodirect; bool noaio; +bool noiou; bool no6; bool yes6; bool norun; @@ -51,6 +52,7 @@ lgetopt opts[] { "nojs", &nojs, lgetopt::BOOL, "Disable SpiderMonkey JS subsystem from initializing. (noop when not available)." }, { "nodirect", &nodirect, lgetopt::BOOL, "Disable direct IO (O_DIRECT) for unsupporting filesystems." }, { "noaio", &noaio, lgetopt::BOOL, "Disable the AIO interface in favor of traditional syscalls. " }, + { "noiou", &noiou, lgetopt::BOOL, "Disable the io_uring interface and fallback to AIO or system calls. " }, { "no6", &no6, lgetopt::BOOL, "Disable IPv6 operations (default)" }, { "6", &yes6, lgetopt::BOOL, "Enable IPv6 operations" }, { "norun", &norun, lgetopt::BOOL, "[debug & testing only] Initialize but never run the event loop." }, @@ -360,8 +362,9 @@ applyargs() if(noaio) ircd::fs::aio::enable.set("false"); - else - ircd::fs::aio::enable.set("true"); + + if(noiou) + ircd::fs::iou::enable.set("false"); if(yes6) ircd::net::enable_ipv6.set("true"); diff --git a/include/ircd/fs/fs.h b/include/ircd/fs/fs.h index e407e7fdc..df88e589f 100644 --- a/include/ircd/fs/fs.h +++ b/include/ircd/fs/fs.h @@ -63,6 +63,7 @@ namespace ircd::fs #include "write.h" #include "sync.h" #include "aio.h" +#include "iou.h" #include "stdin.h" #include "support.h" @@ -87,6 +88,7 @@ namespace ircd::fs /// Filesystem interface init / fini held by ircd::main(). struct ircd::fs::init { + iou::init _iou_; aio::init _aio_; init(); diff --git a/include/ircd/fs/iou.h b/include/ircd/fs/iou.h new file mode 100644 index 000000000..82648dace --- /dev/null +++ b/include/ircd/fs/iou.h @@ -0,0 +1,104 @@ +// Matrix Construct +// +// Copyright (C) Matrix Construct Developers, Authors & Contributors +// Copyright (C) 2016-2019 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_IRCD_FS_IOU_H + +// Public and unconditional interface for io_uring. This file is part of the +// standard include stack and available whether or not this platform is Linux +// with io_uring (>= 5.1), and whether or not it's enabled, etc. If it is not +// most of this stuff does nothing and will have null values. + +extern "C" +{ + struct io_uring_sqe; +} + +/// Input/Output Userspace Ring buffering. +/// +/// Note that fs::aio and fs::iou are never used simultaneously. If io_uring +/// is supported by both the compilation and the kernel at runtime then it +/// will be selected over AIO. +namespace ircd::fs::iou +{ + struct init; + struct stats; + struct system; + struct request; + enum state :uint8_t; + + // a priori + extern const bool support; + extern const size_t MAX_EVENTS; + + // configuration + extern conf::item enable; + extern conf::item max_events; + extern conf::item max_submit; + + // runtime state + extern struct stats stats; + extern struct system *system; + + // util + string_view reflect(const state &); + const_iovec_view iovec(const request &); + const struct ::io_uring_sqe &sqe(const request &); + struct ::io_uring_sqe &sqe(request &); + + // iterate requests + static bool for_each(const state &, const std::function &); + static bool for_each(const std::function &); + + // count requests + static size_t count(const state &, const op &); + static size_t count(const state &); + static size_t count(const op &); +} + +struct ircd::fs::iou::request +{ + const fs::opts *opts {nullptr}; + fs::op op {fs::op::NOOP}; + std::error_code ec; + int32_t res {-1}; + int32_t id {-1}; + + request() = default; + request(const fd &, const const_iovec_view &, const fs::opts *const &); + ~request() noexcept; +}; + +/// Enumeration of states for a request. +enum ircd::fs::iou::state +:uint8_t +{ + INVALID, + QUEUED, + SUBMITTED, + COMPLETED, + + _NUM +}; + +/// Reuse the same stats structure as fs::aio in fs::iou +struct ircd::fs::iou::stats +:aio::stats +{ + using aio::stats::stats; +}; + +/// Internal use; this is simply declared here for when internal headers are +/// not available for this build so a weak no-op definition can be defined. +struct ircd::fs::iou::init +{ + init(); + ~init() noexcept; +}; diff --git a/include/ircd/fs/op.h b/include/ircd/fs/op.h index faec793ee..e50421ecf 100644 --- a/include/ircd/fs/op.h +++ b/include/ircd/fs/op.h @@ -23,6 +23,11 @@ namespace ircd::fs::aio op translate(const int &); } +namespace ircd::fs::iou +{ + op translate(const int &); +} + /// The enumerated operation code to identify the type of request being /// made at runtime from any abstract list of requests. /// diff --git a/include/ircd/fs/opts.h b/include/ircd/fs/opts.h index c40096ac3..32abd78d3 100644 --- a/include/ircd/fs/opts.h +++ b/include/ircd/fs/opts.h @@ -44,11 +44,13 @@ struct ircd::fs::opts /// file is ineffective for regular files. bool blocking {true}; - /// Determines whether this operation is conducted via AIO. If not, a - /// direct syscall is made. Using AIO will only block one ircd::ctx while - /// a direct syscall will block the thread (all contexts). If AIO is not - /// available or not enabled, or doesn't support this operation, setting - /// this has no effect. + /// Determines whether this operation is conducted via AIO/io_uring. If + /// not, a direct syscall is made. Using AIO will only block one ircd::ctx + /// while a direct syscall will block the thread (all contexts). If AIO is + /// not available or not enabled, or doesn't support this operation, + /// setting this has no effect. If this system supports io_uring this + /// option is reused to indicate use of io_uring (as AIO and io_uring are + /// not used simultaneously). bool aio {true}; /// The enumerated operation code to identify the type of request being diff --git a/ircd/Makefile.am b/ircd/Makefile.am index 80f666e78..f163376a1 100644 --- a/ircd/Makefile.am +++ b/ircd/Makefile.am @@ -157,6 +157,9 @@ libircd_la_SOURCES += ctx_ole.cc if AIO libircd_la_SOURCES += fs_aio.cc endif +if IOU +libircd_la_SOURCES += fs_iou.cc +endif libircd_la_SOURCES += mods.cc if LINUX libircd_la_SOURCES += mods_ldso.cc diff --git a/ircd/fs.cc b/ircd/fs.cc index 1c5a3eb95..306365fc9 100644 --- a/ircd/fs.cc +++ b/ircd/fs.cc @@ -20,6 +20,10 @@ #include "fs_aio.h" #endif +#ifdef IRCD_USE_IOU + #include "fs_iou.h" +#endif + namespace ircd::fs { static uint posix_flags(const std::ios::openmode &mode); @@ -479,6 +483,11 @@ ircd::fs::flush(const fd &fd, { assert(opts.op == op::SYNC); + #ifdef IRCD_USE_IOU + if(iou::system && opts.aio) + return iou::fsync(fd, opts); + #endif + #ifdef IRCD_USE_AIO if(aio::system && opts.aio) { @@ -738,6 +747,11 @@ ircd::fs::read(const fd &fd, { assert(opts.op == op::READ); + #ifdef IRCD_USE_IOU + if(iou::system && opts.aio) + return iou::read(fd, iov, opts); + #endif + #ifdef IRCD_USE_AIO if(aio::system && opts.aio) return aio::read(fd, iov, opts); @@ -1114,6 +1128,11 @@ ircd::fs::write(const fd &fd, { assert(opts.op == op::WRITE); + #ifdef IRCD_USE_IOU + if(iou::system && opts.aio) + return iou::write(fd, iov, opts); + #endif + #ifdef IRCD_USE_AIO if(aio::system && opts.aio) return aio::write(fd, iov, opts); @@ -1327,49 +1346,23 @@ ircd::fs::reflect(const ready &ready) // fs/aio.h // -// -// These symbols can be overriden by ircd/aio.cc if it is compiled and linked; -// otherwise on non-supporting platforms these will be the defaults here. -// - decltype(ircd::fs::aio::support) -extern __attribute__((weak)) ircd::fs::aio::support { - false -}; - -decltype(ircd::fs::aio::support_fsync) -extern __attribute__((weak)) -ircd::fs::aio::support_fsync -{ - info::kernel_version[0] > 4 || - (info::kernel_version[0] >= 4 && info::kernel_version[1] >= 18) -}; - -decltype(ircd::fs::aio::support_fdsync) -extern __attribute__((weak)) -ircd::fs::aio::support_fdsync -{ - info::kernel_version[0] > 4 || - (info::kernel_version[0] >= 4 && info::kernel_version[1] >= 18) -}; - -decltype(ircd::fs::aio::MAX_EVENTS) -extern __attribute__((weak)) -ircd::fs::aio::MAX_EVENTS -{ - 0 + #ifdef IRCD_USE_AIO + true + #else + false + #endif }; decltype(ircd::fs::aio::MAX_REQPRIO) -extern __attribute__((weak)) ircd::fs::aio::MAX_REQPRIO { - 20 + info::aio_reqprio_max }; -/// Conf item to control whether AIO is enabled or bypassed. +/// Conf item to control whether aio is enabled or bypassed. decltype(ircd::fs::aio::enable) ircd::fs::aio::enable { @@ -1378,22 +1371,6 @@ ircd::fs::aio::enable { "persist", false }, }; -decltype(ircd::fs::aio::max_events) -ircd::fs::aio::max_events -{ - { "name", "ircd.fs.aio.max_events" }, - { "default", long(aio::MAX_EVENTS) }, - { "persist", false }, -}; - -decltype(ircd::fs::aio::max_submit) -ircd::fs::aio::max_submit -{ - { "name", "ircd.fs.aio.max_submit" }, - { "default", 0L }, - { "persist", false }, -}; - /// Global stats structure decltype(ircd::fs::aio::stats) ircd::fs::aio::stats; @@ -1406,24 +1383,68 @@ ircd::fs::aio::system; // init // -#ifndef IRCD_USE_AIO +__attribute__((weak)) ircd::fs::aio::init::init() { assert(!system); - log::warning - { - log, "No support for asynchronous local filesystem IO with AIO..." - }; } -#endif -#ifndef IRCD_USE_AIO +__attribute__((weak)) ircd::fs::aio::init::~init() noexcept { assert(!system); } -#endif + +/////////////////////////////////////////////////////////////////////////////// +// +// fs/iou.h +// + +decltype(ircd::fs::iou::support) +ircd::fs::iou::support +{ + #ifdef IRCD_USE_IOU + info::kernel_version[0] > 5 || + (info::kernel_version[0] >= 5 && info::kernel_version[1] >= 1) + #else + false + #endif +}; + +/// Conf item to control whether iou is enabled or bypassed. +decltype(ircd::fs::iou::enable) +ircd::fs::iou::enable +{ + { "name", "ircd.fs.iou.enable" }, + { "default", false }, + { "persist", false }, +}; + +/// Global stats structure +decltype(ircd::fs::iou::stats) +ircd::fs::iou::stats; + +/// Non-null when iou is available for use +decltype(ircd::fs::iou::system) +ircd::fs::iou::system; + +// +// init +// + +__attribute__((weak)) +ircd::fs::iou::init::init() +{ + assert(!system); +} + +__attribute__((weak)) +ircd::fs::iou::init::~init() +noexcept +{ + assert(!system); +} /////////////////////////////////////////////////////////////////////////////// // @@ -1877,6 +1898,14 @@ ircd::fs::aio::translate(const int &val) } #endif +#ifndef IRCD_USE_IOU +ircd::fs::op +ircd::fs::iou::translate(const int &val) +{ + return op::NOOP; +} +#endif + /////////////////////////////////////////////////////////////////////////////// // // fs/iov.h @@ -2361,9 +2390,17 @@ ircd::fs::error::error(const boost::filesystem::filesystem_error &e) void ircd::fs::debug_support() { + const bool support_async + { + false + || iou::support + || aio::support + }; + log::info { - log, "Supports preadv2:%b pwritev2:%b SYNC:%b DSYNC:%b HIPRI:%b NOWAIT:%b APPEND:%b RWH:%b WLH:%b", + log, "Supports async:%b preadv2:%b pwritev2:%b SYNC:%b DSYNC:%b HIPRI:%b NOWAIT:%b APPEND:%b RWH:%b WLH:%b", + support_async, support_preadv2, support_pwritev2, support_sync, @@ -2374,6 +2411,24 @@ ircd::fs::debug_support() support_rwh_write_life, support_rwf_write_life, }; + + if(support_async) + log::info + { + log, "Asynchronous filesystem IO provided by %s %s.", + "Linux", + iou::support? + "io_uring": + aio::support? + "AIO": + "?????", + }; + else + log::warning + { + log, "Support for asynchronous filesystem IO has not been" + " established. Filesystem IO is degraded to synchronous system calls." + }; } void diff --git a/ircd/fs_aio.cc b/ircd/fs_aio.cc index c5cead44c..43258279d 100644 --- a/ircd/fs_aio.cc +++ b/ircd/fs_aio.cc @@ -16,17 +16,6 @@ /////////////////////////////////////////////////////////////////////////////// // // ircd/fs/aio.h -// -// The contents of this section override weak symbols in ircd/fs.cc when this -// unit is conditionally compiled and linked on AIO-supporting platforms. On -// non-supporting platforms, or for items not listed here, the definitions in -// ircd/fs.cc are the default. - -decltype(ircd::fs::aio::support) -ircd::fs::aio::support -{ - true -}; /// True if IOCB_CMD_FSYNC is supported by AIO. If this is false then /// fs::fsync_opts::async=true flag is ignored. @@ -60,10 +49,20 @@ ircd::fs::aio::MAX_EVENTS 128L //TODO: get this info }; -decltype(ircd::fs::aio::MAX_REQPRIO) -ircd::fs::aio::MAX_REQPRIO +decltype(ircd::fs::aio::max_events) +ircd::fs::aio::max_events { - info::aio_reqprio_max + { "name", "ircd.fs.aio.max_events" }, + { "default", long(aio::MAX_EVENTS) }, + { "persist", false }, +}; + +decltype(ircd::fs::aio::max_submit) +ircd::fs::aio::max_submit +{ + { "name", "ircd.fs.aio.max_submit" }, + { "default", 0L }, + { "persist", false }, }; // @@ -73,9 +72,17 @@ ircd::fs::aio::MAX_REQPRIO ircd::fs::aio::init::init() { assert(!system); - if(!bool(aio::enable)) + if(!aio::enable) return; + // Don't init AIO if the io_uring is established. If it is, that means it + // was supported by the build, this kernel, and didn't encounter an error + // to construct. In all other cases AIO can serve as a fallback. + #if defined(IRCD_USE_IOU) + if(iou::system) + return; + #endif + system = new struct aio::system ( size_t(max_events), diff --git a/ircd/fs_iou.cc b/ircd/fs_iou.cc new file mode 100644 index 000000000..adf5ac0ee --- /dev/null +++ b/ircd/fs_iou.cc @@ -0,0 +1,603 @@ +// Matrix Construct +// +// Copyright (C) Matrix Construct Developers, Authors & Contributors +// Copyright (C) 2016-2019 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#include +#include +#include +#include "fs_iou.h" + +decltype(ircd::fs::iou::MAX_EVENTS) +ircd::fs::iou::MAX_EVENTS +{ + 128L //TODO: get this info +}; + +decltype(ircd::fs::iou::max_events) +ircd::fs::iou::max_events +{ + { "name", "ircd.fs.iou.max_events" }, + { "default", long(iou::MAX_EVENTS) }, + { "persist", false }, +}; + +decltype(ircd::fs::iou::max_submit) +ircd::fs::iou::max_submit +{ + { "name", "ircd.fs.iou.max_submit" }, + { "default", 0L }, + { "persist", false }, +}; + +// +// init +// + +ircd::fs::iou::init::init() +{ + assert(!system); + if(!iou::enable) + return; + + system = new struct iou::system + ( + size_t(max_events), + size_t(max_submit) + ); +} + +ircd::fs::iou::init::~init() +noexcept +{ + delete system; + system = nullptr; +} + +/////////////////////////////////////////////////////////////////////////////// +// +// ircd/fs/op.h +// +// The contents of this section override weak symbols in ircd/fs.cc when this +// unit is conditionally compiled and linked on IOU-supporting platforms. + +ircd::fs::op +ircd::fs::iou::translate(const int &val) +{ + switch(val) + { + case IORING_OP_NOP: return op::NOOP; + case IORING_OP_READV: return op::READ; + case IORING_OP_WRITEV: return op::WRITE; + case IORING_OP_FSYNC: return op::SYNC; + case IORING_OP_READ_FIXED: return op::READ; + case IORING_OP_WRITE_FIXED: return op::WRITE; + case IORING_OP_POLL_ADD: return op::NOOP; + case IORING_OP_POLL_REMOVE: return op::NOOP; + case IORING_OP_SYNC_FILE_RANGE: return op::SYNC; + } + + return op::NOOP; +} + +/////////////////////////////////////////////////////////////////////////////// +// +// fs/iou.h +// + +size_t +ircd::fs::iou::count(const op &op) +{ + return 0; +} + +size_t +ircd::fs::iou::count(const state &state) +{ + return 0; +} + +size_t +ircd::fs::iou::count(const state &state, + const op &op) +{ + return 0; +} + +bool +ircd::fs::iou::for_each(const state &state, + const std::function &closure) +{ + assert(system); + return true; +} + +bool +ircd::fs::iou::for_each(const std::function &closure) +{ + assert(system); + return true; +} + +struct ::io_uring_sqe & +ircd::fs::iou::sqe(request &request) +{ + assert(system); + ::io_uring_sqe *const ret + { + nullptr + }; + + if(request.id < 0) + throw std::out_of_range + { + "request has no entry on the submit queue." + }; + + assert(ret); + return *ret; +} + +const struct ::io_uring_sqe & +ircd::fs::iou::sqe(const request &request) +{ + assert(system); + const ::io_uring_sqe *const ret + { + nullptr + }; + + if(request.id < 0) + throw std::out_of_range + { + "request has no entry on the submit queue." + }; + + assert(ret); + return *ret; +} + +ircd::string_view +ircd::fs::iou::reflect(const state &s) +{ + switch(s) + { + case state::INVALID: return "INVALID"; + case state::QUEUED: return "QUEUED"; + case state::SUBMITTED: return "SUBMITTED"; + case state::COMPLETED: return "COMPLETED"; + case state::_NUM: break; + } + + return "?????"; +} + +ircd::fs::const_iovec_view +ircd::fs::iou::iovec(const request &request) +{ + return + { + //reinterpret_cast(iou_buf), iou_nbytes + }; +} + +// +// request::request +// + +ircd::fs::iou::request::request(const fs::fd &fd, + const const_iovec_view &iov, + const fs::opts *const &opts) +:opts +{ + opts +} +,op +{ + fs::op::NOOP +} +{ +} + +ircd::fs::iou::request::~request() +noexcept +{ +} + +// +// system::system +// + +ircd::fs::iou::system::system(const size_t &max_events, + const size_t &max_submit) +try +:p +{ + 0 +} +,fd +{ + int(syscall<__NR_io_uring_setup>(max_events, &p)) +} +,sq_len +{ + p.sq_off.array + p.sq_entries * sizeof(uint32_t) +} +,cq_len +{ + p.cq_off.cqes + p.cq_entries * sizeof(::io_uring_cqe) +} +,sqe_len +{ + p.sq_entries * sizeof(::io_uring_sqe) +} +,sq_p +{ + [this] + { + static const auto prot(PROT_READ | PROT_WRITE); + static const auto flags(MAP_SHARED | MAP_POPULATE); + void *const &map + { + ::mmap(NULL, sq_len, prot, flags, fd, IORING_OFF_SQ_RING) + }; + + if(unlikely(map == MAP_FAILED)) + { + throw_system_error(errno); + __builtin_unreachable(); + } + + return reinterpret_cast(map); + }(), + [this](uint8_t *const ptr) + { + syscall(::munmap, ptr, sq_len); + } +} +,cq_p +{ + [this] + { + static const auto prot(PROT_READ | PROT_WRITE); + static const auto flags(MAP_SHARED | MAP_POPULATE); + void *const &map + { + ::mmap(NULL, cq_len, prot, flags, fd, IORING_OFF_CQ_RING) + }; + + if(unlikely(map == MAP_FAILED)) + { + throw_system_error(errno); + __builtin_unreachable(); + } + + return reinterpret_cast(map); + }(), + [this](uint8_t *const ptr) + { + syscall(::munmap, ptr, cq_len); + } +} +,sqe_p +{ + [this] + { + static const auto prot(PROT_READ | PROT_WRITE); + static const auto flags(MAP_SHARED | MAP_POPULATE); + void *const &map + { + ::mmap(NULL, sqe_len, prot, flags, fd, IORING_OFF_SQES) + }; + + if(unlikely(map == MAP_FAILED)) + { + throw_system_error(errno); + __builtin_unreachable(); + } + + return reinterpret_cast(map); + }(), + [this](uint8_t *const ptr) + { + syscall(::munmap, ptr, sqe_len); + } +} +,head +{ + reinterpret_cast(sq_p.get() + p.sq_off.head), + reinterpret_cast(cq_p.get() + p.cq_off.head), +} +,tail +{ + reinterpret_cast(sq_p.get() + p.sq_off.tail), + reinterpret_cast(cq_p.get() + p.cq_off.tail), +} +,ring_mask +{ + reinterpret_cast(sq_p.get() + p.sq_off.ring_mask), + reinterpret_cast(cq_p.get() + p.cq_off.ring_mask), +} +,ring_entries +{ + reinterpret_cast(sq_p.get() + p.sq_off.ring_entries), + reinterpret_cast(cq_p.get() + p.cq_off.ring_entries), +} +,flags +{ + reinterpret_cast(sq_p.get() + p.sq_off.flags), + nullptr +} +,dropped +{ + reinterpret_cast(sq_p.get() + p.sq_off.dropped), + nullptr +} +,overflow +{ + nullptr, + reinterpret_cast(cq_p.get() + p.cq_off.overflow), +} +,sq +{ + reinterpret_cast(sq_p.get() + p.sq_off.array) +} +,sqe +{ + reinterpret_cast<::io_uring_sqe *>(sqe_p.get()) +} +,cqe +{ + reinterpret_cast<::io_uring_cqe *>(cq_p.get() + p.cq_off.cqes) +} +,ev_count +{ + 0 +} +,ev_fd +{ + ios::get(), int(syscall(::eventfd, ev_count, EFD_CLOEXEC | EFD_NONBLOCK)) +} +,handle_set +{ + false +} +,handle_size +{ + 0 +} +{ + log::debug + { + log, "io_uring sq_entries:%u cq_entries:%u flags:%u sq_thread_cpu:%u sq_thread_idle:%u", + p.sq_entries, + p.cq_entries, + p.flags, + p.sq_thread_cpu, + p.sq_thread_idle, + }; + + log::debug + { + log, "io_uring maps sq:%p len:%zu sqe:%p len:%zu cq:%p len:%zu", + sq_p.get(), + sq_len, + sqe_p.get(), + sqe_len, + cq_p.get(), + cq_len, + }; + + log::debug + { + log, "io_sqring head:%u tail:%u ring_mask:%u ring_entries:%u flags:%u dropped:%u array:%u map:%p len:%zu", + p.sq_off.head, + p.sq_off.tail, + p.sq_off.ring_mask, + p.sq_off.ring_entries, + p.sq_off.flags, + p.sq_off.dropped, + p.sq_off.array, + sq_p.get(), + sq_len, + }; + + log::debug + { + log, "io_cqring head:%u tail:%u ring_mask:%u ring_entries:%u overflow:%u cqes:%u map:%p len:%zu", + p.cq_off.head, + p.cq_off.tail, + p.cq_off.ring_mask, + p.cq_off.ring_entries, + p.cq_off.overflow, + p.cq_off.cqes, + cq_p.get(), + cq_len, + }; + + assert(0); +} +catch(const std::exception &e) +{ + log::error + { + log, "Error starting iou context %p :%s", + (const void *)this, + e.what() + }; +} + +ircd::fs::iou::system::~system() +noexcept try +{ + const ctx::uninterruptible::nothrow ui; + + interrupt(); + wait(); + + boost::system::error_code ec; + ev_fd.close(ec); +} +catch(const std::exception &e) +{ + log::critical + { + log, "Error shutting down iou context %p :%s", + (const void *)this, + e.what() + }; +} + +bool +ircd::fs::iou::system::interrupt() +{ + if(!ev_fd.is_open()) + return false; + + if(handle_set) + ev_fd.cancel(); + else + ev_count = -1; + + return true; +} + +bool +ircd::fs::iou::system::wait() +{ + if(!ev_fd.is_open()) + return false; + + log::debug + { + log, "Waiting for iou context %p", this + }; + + dock.wait([this] + { + return ev_count == uint64_t(-1); + }); + + return true; +} + +void +ircd::fs::iou::system::set_handle() +try +{ + assert(!handle_set); + handle_set = true; + ev_count = 0; + + const asio::mutable_buffers_1 bufs + { + &ev_count, sizeof(ev_count) + }; + + auto handler + { + std::bind(&system::handle, this, ph::_1, ph::_2) + }; + + ev_fd.async_read_some(bufs, ios::handle(handle_descriptor, std::move(handler))); +} +catch(...) +{ + handle_set = false; + throw; +} + +decltype(ircd::fs::iou::system::handle_descriptor) +ircd::fs::iou::system::handle_descriptor +{ + "ircd::fs::iou sigfd", + + // allocator; custom allocation strategy because this handler + // appears to excessively allocate and deallocate 120 bytes; this + // is a simple asynchronous operation, we can do better (and perhaps + // even better than this below). + [](auto &handler, const size_t &size) + { + assert(ircd::fs::iou::system); + auto &system(*ircd::fs::iou::system); + + if(unlikely(!system.handle_data)) + { + system.handle_size = size; + system.handle_data = std::make_unique(size); + } + + assert(system.handle_size == size); + return system.handle_data.get(); + }, + + // no deallocation; satisfied by class member unique_ptr + [](auto &handler, void *const &ptr, const auto &size) {} +}; + +/// Handle notifications that requests are complete. +void +ircd::fs::iou::system::handle(const boost::system::error_code &ec, + const size_t bytes) +noexcept try +{ + namespace errc = boost::system::errc; + + assert((bytes == 8 && !ec && ev_count >= 1) || (bytes == 0 && ec)); + assert(!ec || ec.category() == asio::error::get_system_category()); + assert(handle_set); + + handle_set = false; + switch(ec.value()) + { + case errc::success: + handle_events(); + break; + + case errc::interrupted: + break; + + case errc::operation_canceled: + throw ctx::interrupted(); + __builtin_unreachable(); + + default: + throw_system_error(ec); + __builtin_unreachable(); + } + + set_handle(); +} +catch(const ctx::interrupted &) +{ + log::debug + { + log, "iou context %p interrupted", this + }; + + ev_count = -1; + dock.notify_all(); +} + +void +ircd::fs::iou::system::handle_events() +noexcept try +{ + assert(!ctx::current); + + +} +catch(const std::exception &e) +{ + log::error + { + log, "iou(%p) handle_events: %s", + this, + e.what() + }; +} diff --git a/ircd/fs_iou.h b/ircd/fs_iou.h new file mode 100644 index 000000000..28964a672 --- /dev/null +++ b/ircd/fs_iou.h @@ -0,0 +1,62 @@ +// Matrix Construct +// +// Copyright (C) Matrix Construct Developers, Authors & Contributors +// Copyright (C) 2016-2019 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_FS_IOU_H +#include + +namespace ircd::fs::iou +{ + struct system; + struct request; + + size_t write(const fd &, const const_iovec_view &, const write_opts &); + size_t read(const fd &, const const_iovec_view &, const read_opts &); + void fsync(const fd &, const sync_opts &); +} + +struct ircd::fs::iou::system +{ + ctx::dock dock; + + ::io_uring_params p; + fs::fd fd; + size_t sq_len, cq_len, sqe_len; + custom_ptr sq_p, cq_p, sqe_p; + uint32_t *head[2]; + uint32_t *tail[2]; + uint32_t *ring_mask[2]; + uint32_t *ring_entries[2]; + uint32_t *flags[2]; + uint32_t *dropped[2]; + uint32_t *overflow[2]; + uint32_t *sq; + ::io_uring_sqe *sqe; + ::io_uring_cqe *cqe; + + size_t ev_count; + asio::posix::stream_descriptor ev_fd; + bool handle_set; + size_t handle_size; + std::unique_ptr handle_data; + static ios::descriptor handle_descriptor; + + void handle_events() noexcept; + void handle(const boost::system::error_code &ec, const size_t bytes) noexcept; + void set_handle(); + + bool interrupt(); + bool wait(); + + system(const size_t &max_events, + const size_t &max_submit); + + ~system() noexcept; +};