diff --git a/construct/construct.cc b/construct/construct.cc index b5afd1c37..7a7e7c38a 100644 --- a/construct/construct.cc +++ b/construct/construct.cc @@ -604,20 +604,15 @@ applyargs() // behavior of the boost::asio event loop by executing more queued tasks // before dropping to epoll_wait(2). This reduces the number of syscalls to // epoll_wait(2), which tend to occur at the start of every epoch except in a -// minority of cases. These syscalls produced nothing 99% of the time. -// -// boost::asio tends to call epoll_wait(2) with timeout=0 (non-blocking) when -// it has more work queued that it will execute. If there's nothing queued it -// will set a timeout. We don't need to collect epoll events so aggressively, -// Instead, we want to exhaust all work in userspace first, and then collect -// events from the kernel. We lose some responsiveness with asio::signal_set -// but gain overall performance in a post-meltdown/post-spectre virtualized -// reality. +// minority of cases, and produce nothing most of the time. See addl docs. // #if defined(BOOST_ASIO_HAS_EPOLL) extern "C" int -__real_epoll_wait(int __epfd, struct epoll_event *__events, int __maxevents, int __timeout); +__real_epoll_wait(int __epfd, + struct epoll_event *__events, + int __maxevents, + int __timeout); extern "C" int __wrap_epoll_wait(int __epfd, @@ -625,60 +620,14 @@ __wrap_epoll_wait(int __epfd, int __maxevents, int __timeout) { - static const uint64_t freq {12}; - static uint64_t calls, peeks, skips, results, stall[4]; - - const bool peek - { - __timeout == 0 - }; - - const bool tick - { - peeks % freq == 0 - }; - - const bool skip - { - peek && !tick - }; - - const auto ret - { - !skip? - __real_epoll_wait(__epfd, __events, __maxevents, __timeout): 0 - }; - - calls += 1; - peeks += peek; - skips += skip; - results += ret > 0? ret: 0; - stall[0] += ret >= 12; - stall[1] += ret >= 24; - stall[2] += ret >= 48; - stall[3] += ret >= 96; - - if constexpr(ircd::ios::profile::logging) - if(!skip) - ircd::log::logf - { - ircd::ios::log, ircd::log::DEBUG, - "EPOLL * timeout:%d results:%lu calls:%lu skips:%lu peeks:%lu stall[%6lu][%6lu][%6lu][%6lu] = %d", - __timeout, - results, - calls, - skips, - peeks, - stall[0], - stall[1], - stall[2], - stall[3], - ret, - }; - - assert(!skip || ret == 0); - assert(ret <= 128); - return ret; + // see addl documentation in ircd/ios + return ircd::ios::epoll_wait<__real_epoll_wait> + ( + __epfd, + __events, + __maxevents, + __timeout + ); } #endif diff --git a/include/ircd/ios/empt.h b/include/ircd/ios/empt.h new file mode 100644 index 000000000..c7c1b6c30 --- /dev/null +++ b/include/ircd/ios/empt.h @@ -0,0 +1,35 @@ +// The Construct +// +// Copyright (C) The Construct Developers, Authors & Contributors +// Copyright (C) 2016-2020 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_IRCD_IOS_EMPT_H + +/// Emption interface. +/// +/// On supporting systems and with the cooperation of libircd's embedder these +/// items can aid with optimizing and/or profiling the boost::asio core event +/// loop. See epoll.h for an epoll_wait(2) use of these items. This is a +/// separate header/namespace so this can remain abstract and applied to +/// different platforms. +/// +namespace ircd::ios::empt +{ + extern conf::item freq; + + extern stats::item peek; + extern stats::item skip; + extern stats::item call; + extern stats::item none; + extern stats::item result; + extern stats::item load_low; + extern stats::item load_med; + extern stats::item load_high; + extern stats::item load_stall; +} diff --git a/include/ircd/ios/epoll.h b/include/ircd/ios/epoll.h new file mode 100644 index 000000000..e01fd37f0 --- /dev/null +++ b/include/ircd/ios/epoll.h @@ -0,0 +1,113 @@ +// The Construct +// +// Copyright (C) The Construct Developers, Authors & Contributors +// Copyright (C) 2016-2020 Jason Volk +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice is present in all copies. The +// full license for this software is available in the LICENSE file. + +#pragma once +#define HAVE_IRCD_IOS_EPOLL_H + +// Forward declarations because sys/epoll.h is not included here +extern "C" +{ + struct epoll_event; +} + +// This interface provides special workarounds and optimizations for the epoll +// reactor in boost::asio on linux systems. It has to be used voluntarily by +// the embedder of libircd, who can hook calls to epoll_wait(2), and forward +// those calls to this interface. Our implementation then makes the syscall. +namespace ircd::ios +{ + using epoll_wait_proto = int (int, struct ::epoll_event *, int, int); + + template + int epoll_wait(int, struct ::epoll_event *, int, int) noexcept; +} + +/// This reduces the number of syscalls to epoll_wait(2), which tend to occur +/// at the start of every epoch except in a minority of cases. These syscalls +/// produce no ready events 99% of the time. +/// +/// boost::asio tends to call epoll_wait(2) with timeout=0 (non-blocking) when +/// it has more work queued that it will execute. If there's nothing queued it +/// will set a timeout. We don't need to collect epoll events so aggressively. +/// It's incumbent upon us to not spam thousands of non-blocking syscalls which +/// yield no results, especially when it negates the efficiency of ircd::ctx's +/// fast userspace context switching. We trade some responsiveness for things +/// like asio::signal_set but gain overall performance which now has actual +/// impact in the post-meltdown/spectre virtualized reality. +/// +template +[[using gnu: hot, always_inline]] +inline int +ircd::ios::epoll_wait(int _epfd, + struct ::epoll_event *const _events, + int _maxevents, + int _timeout) +noexcept +{ + const bool peek + { + _timeout == 0 + }; + + const uint64_t freq + { + empt::freq + }; + + const bool skip_tick + { + !freq || (uint64_t(empt::peek) % freq) + }; + + // Always allow blocking calls; only allow non-blocking calls which + // satisfy our conditions. + const bool call + { + !peek || !skip_tick + }; + + const int ret + { + call? + _real_epoll_wait(_epfd, _events, _maxevents, _timeout): 0 + }; + + // Update stats + empt::peek += peek; + empt::skip += !call; + empt::call += call; + empt::none += call && ret == 0; + empt::result += ret & boolmask(ret >= 0); + empt::load_low += ret >= _maxevents / 8; + empt::load_med += ret >= _maxevents / 4; + empt::load_high += ret >= _maxevents / 2; + empt::load_stall += ret >= _maxevents / 1; + + if constexpr(profile::logging) if(call) + log::logf + { + log, ircd::log::DEBUG, + "EPOLL %5d peek:%lu skip:%lu call:%lu none:%lu result:%lu low:%lu med:%lu high:%lu stall:%lu", + ret, + uint64_t(empt::peek), + uint64_t(empt::skip), + uint64_t(empt::call), + uint64_t(empt::none), + uint64_t(empt::result), + uint64_t(empt::load_low), + uint64_t(empt::load_med), + uint64_t(empt::load_high), + uint64_t(empt::load_stall), + }; + + assert(call || ret == 0); + assert(ret <= _maxevents); + return ret; +} diff --git a/include/ircd/ios/ios.h b/include/ircd/ios/ios.h index 2281754fe..095c3bf55 100644 --- a/include/ircd/ios/ios.h +++ b/include/ircd/ios/ios.h @@ -50,6 +50,8 @@ namespace ircd::ios::profile #include "handler.h" #include "asio.h" #include "dispatch.h" +#include "empt.h" +#include "epoll.h" inline const uint64_t & __attribute__((always_inline)) diff --git a/ircd/ios.cc b/ircd/ios.cc index af6f34685..e984567f9 100644 --- a/ircd/ios.cc +++ b/ircd/ios.cc @@ -97,6 +97,143 @@ noexcept return bool(main); } +// +// emption +// + +namespace ircd::ios::empt +{ + [[gnu::visibility("internal")]] + extern const string_view freq_help; + + [[gnu::visibility("internal")]] + extern uint64_t stats[9]; +} + +decltype(ircd::ios::empt::freq_help) +ircd::ios::empt::freq_help +{R"( + Coarse frequency to make non-blocking polls to the kernel for events at the + beginning of every iteration of the core event loop. boost::asio takes an + opportunity to first make a non-blocking poll to gather more events from + the kernel even when one or more tasks are already queued, this setting + allows more such tasks to first be executed and reduce syscall overhead + ncluding a large numbers of unnecessary calls as would be the case + otherwise. + + When the frequency is set to 1, the above-described default behavior is + unaltered. When greater than 1, voluntary non-blocking polls are only made + after N number of tasks. This reduces syscalls to increase overall + performance, but may cost in responsiveness and cause stalls. For example, + when set to 2, kernel context-switch is made every other userspace context + switch. When set to 0, voluntary non-blocking polls are never made. + + This value may be rounded down to nearest base2 so we can avoid invoking + the FPU in the core event loop's codepath. +)"}; + +decltype(ircd::ios::empt::stats) +ircd::ios::empt::stats; + +/// Voluntary kernel poll frequency. +decltype(ircd::ios::empt::freq) +ircd::ios::empt::freq +{ + { "name", "ircd.ios.empt.freq" }, + { "default", 32L }, + { "help", freq_help }, +}; + +/// Non-blocking call count. +decltype(ircd::ios::empt::peek) +ircd::ios::empt::peek +{ + stats + 0, + { + { "name", "ircd.ios.empt.peek" }, + }, +}; + +/// Skipped call count. +decltype(ircd::ios::empt::skip) +ircd::ios::empt::skip +{ + stats + 1, + { + { "name", "ircd.ios.empt.skip" }, + }, +}; + +/// Non-skipped call count. +decltype(ircd::ios::empt::call) +ircd::ios::empt::call +{ + stats + 2, + { + { "name", "ircd.ios.empt.call" }, + }, +}; + +/// Count of calls which reported zero ready events. +decltype(ircd::ios::empt::none) +ircd::ios::empt::none +{ + stats + 3, + { + { "name", "ircd.ios.empt.none" }, + }, +}; + +/// Total number of events reported from all calls. +decltype(ircd::ios::empt::result) +ircd::ios::empt::result +{ + stats + 4, + { + { "name", "ircd.ios.empt.result" }, + }, +}; + +/// Count of calls which reported more events than the low threshold. +decltype(ircd::ios::empt::load_low) +ircd::ios::empt::load_low +{ + stats + 5, + { + { "name", "ircd.ios.empt.load.low" }, + }, +}; + +/// Count of calls which reported more events than the medium threshold. +decltype(ircd::ios::empt::load_med) +ircd::ios::empt::load_med +{ + stats + 6, + { + { "name", "ircd.ios.empt.load.med" }, + }, +}; + +/// Count of calls which reported more events than the high threshold. +decltype(ircd::ios::empt::load_high) +ircd::ios::empt::load_high +{ + stats + 7, + { + { "name", "ircd.ios.empt.load.high" }, + }, +}; + +/// Count of calls which reported the maximum number of events. +decltype(ircd::ios::empt::load_stall) +ircd::ios::empt::load_stall +{ + stats + 8, + { + { "name", "ircd.ios.empt.load.stall" }, + } +}; + // // descriptor //