ircd::ios: Add legit framework for core loop optimizations. (related 7df5592ebe)

2024-12-26 07:23:53 +01:00 · 2020-12-13 22:44:33 -08:00 · 2020-12-13 22:44:33 -08:00 · 6696693f8b
commit 6696693f8b
parent a10429c15f
5 changed files with 300 additions and 64 deletions
--- a/construct/construct.cc
+++ b/construct/construct.cc
@ -604,20 +604,15 @@ applyargs()
 // behavior of the boost::asio event loop by executing more queued tasks
 // before dropping to epoll_wait(2). This reduces the number of syscalls to
 // epoll_wait(2), which tend to occur at the start of every epoch except in a
-// minority of cases. These syscalls produced nothing 99% of the time.
-//
-// boost::asio tends to call epoll_wait(2) with timeout=0 (non-blocking) when
-// it has more work queued that it will execute. If there's nothing queued it
-// will set a timeout. We don't need to collect epoll events so aggressively,
-// Instead, we want to exhaust all work in userspace first, and then collect
-// events from the kernel. We lose some responsiveness with asio::signal_set
-// but gain overall performance in a post-meltdown/post-spectre virtualized
-// reality.
+// minority of cases, and produce nothing most of the time. See addl docs.
 //
 #if defined(BOOST_ASIO_HAS_EPOLL)

 extern "C" int
-__real_epoll_wait(int __epfd, struct epoll_event *__events, int __maxevents, int __timeout);
+__real_epoll_wait(int __epfd,
+                  struct epoll_event *__events,
+                  int __maxevents,
+                  int __timeout);

 extern "C" int
 __wrap_epoll_wait(int __epfd,
@ -625,60 +620,14 @@ __wrap_epoll_wait(int __epfd,
                  int __maxevents,
                  int __timeout)
 {
-	static const uint64_t freq {12};
-	static uint64_t calls, peeks, skips, results, stall[4];
-
-	const bool peek
-	{
-		__timeout == 0
-	};
-
-	const bool tick
-	{
-		peeks % freq == 0
-	};
-
-	const bool skip
-	{
-		peek && !tick
-	};
-
-	const auto ret
-	{
-		!skip?
-			__real_epoll_wait(__epfd, __events, __maxevents, __timeout): 0
-	};
-
-	calls += 1;
-	peeks += peek;
-	skips += skip;
-	results += ret > 0? ret: 0;
-	stall[0] += ret >= 12;
-	stall[1] += ret >= 24;
-	stall[2] += ret >= 48;
-	stall[3] += ret >= 96;
-
-	if constexpr(ircd::ios::profile::logging)
-		if(!skip)
-			ircd::log::logf
-			{
-				ircd::ios::log, ircd::log::DEBUG,
-				"EPOLL     * timeout:%d results:%lu calls:%lu skips:%lu peeks:%lu stall[%6lu][%6lu][%6lu][%6lu] = %d",
-				__timeout,
-				results,
-				calls,
-				skips,
-				peeks,
-				stall[0],
-				stall[1],
-				stall[2],
-				stall[3],
-				ret,
-			};
-
-	assert(!skip || ret == 0);
-	assert(ret <= 128);
-	return ret;
+	// see addl documentation in ircd/ios
+	return ircd::ios::epoll_wait<__real_epoll_wait>
+	(
+		__epfd,
+		__events,
+		__maxevents,
+		__timeout
+	);
 }

 #endif
--- a/include/ircd/ios/empt.h
+++ b/include/ircd/ios/empt.h
@ -0,0 +1,35 @@
+// The Construct
+//
+// Copyright (C) The Construct Developers, Authors & Contributors
+// Copyright (C) 2016-2020 Jason Volk <jason@zemos.net>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice is present in all copies. The
+// full license for this software is available in the LICENSE file.
+
+#pragma once
+#define HAVE_IRCD_IOS_EMPT_H
+
+/// Emption interface.
+///
+/// On supporting systems and with the cooperation of libircd's embedder these
+/// items can aid with optimizing and/or profiling the boost::asio core event
+/// loop. See epoll.h for an epoll_wait(2) use of these items. This is a
+/// separate header/namespace so this can remain abstract and applied to
+/// different platforms.
+///
+namespace ircd::ios::empt
+{
+	extern conf::item<uint64_t> freq;
+
+	extern stats::item<uint64_t *> peek;
+	extern stats::item<uint64_t *> skip;
+	extern stats::item<uint64_t *> call;
+	extern stats::item<uint64_t *> none;
+	extern stats::item<uint64_t *> result;
+	extern stats::item<uint64_t *> load_low;
+	extern stats::item<uint64_t *> load_med;
+	extern stats::item<uint64_t *> load_high;
+	extern stats::item<uint64_t *> load_stall;
+}
--- a/include/ircd/ios/epoll.h
+++ b/include/ircd/ios/epoll.h
@ -0,0 +1,113 @@
+// The Construct
+//
+// Copyright (C) The Construct Developers, Authors & Contributors
+// Copyright (C) 2016-2020 Jason Volk <jason@zemos.net>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice is present in all copies. The
+// full license for this software is available in the LICENSE file.
+
+#pragma once
+#define HAVE_IRCD_IOS_EPOLL_H
+
+// Forward declarations because sys/epoll.h is not included here
+extern "C"
+{
+	struct epoll_event;
+}
+
+// This interface provides special workarounds and optimizations for the epoll
+// reactor in boost::asio on linux systems. It has to be used voluntarily by
+// the embedder of libircd, who can hook calls to epoll_wait(2), and forward
+// those calls to this interface. Our implementation then makes the syscall.
+namespace ircd::ios
+{
+	using epoll_wait_proto = int (int, struct ::epoll_event *, int, int);
+
+	template<epoll_wait_proto *>
+	int epoll_wait(int, struct ::epoll_event *, int, int) noexcept;
+}
+
+/// This reduces the number of syscalls to epoll_wait(2), which tend to occur
+/// at the start of every epoch except in a minority of cases. These syscalls
+/// produce no ready events 99% of the time.
+///
+/// boost::asio tends to call epoll_wait(2) with timeout=0 (non-blocking) when
+/// it has more work queued that it will execute. If there's nothing queued it
+/// will set a timeout. We don't need to collect epoll events so aggressively.
+/// It's incumbent upon us to not spam thousands of non-blocking syscalls which
+/// yield no results, especially when it negates the efficiency of ircd::ctx's
+/// fast userspace context switching. We trade some responsiveness for things
+/// like asio::signal_set but gain overall performance which now has actual
+/// impact in the post-meltdown/spectre virtualized reality.
+///
+template<ircd::ios::epoll_wait_proto *_real_epoll_wait>
+[[using gnu: hot, always_inline]]
+inline int
+ircd::ios::epoll_wait(int _epfd,
+                      struct ::epoll_event *const _events,
+                      int _maxevents,
+                      int _timeout)
+noexcept
+{
+	const bool peek
+	{
+		_timeout == 0
+	};
+
+	const uint64_t freq
+	{
+		empt::freq
+	};
+
+	const bool skip_tick
+	{
+		!freq || (uint64_t(empt::peek) % freq)
+	};
+
+	// Always allow blocking calls; only allow non-blocking calls which
+	// satisfy our conditions.
+	const bool call
+	{
+		!peek || !skip_tick
+	};
+
+	const int ret
+	{
+		call?
+			_real_epoll_wait(_epfd, _events, _maxevents, _timeout): 0
+	};
+
+	// Update stats
+	empt::peek += peek;
+	empt::skip += !call;
+	empt::call += call;
+	empt::none += call && ret == 0;
+	empt::result += ret & boolmask<uint>(ret >= 0);
+	empt::load_low += ret >= _maxevents / 8;
+	empt::load_med += ret >= _maxevents / 4;
+	empt::load_high += ret >= _maxevents / 2;
+	empt::load_stall += ret >= _maxevents / 1;
+
+	if constexpr(profile::logging) if(call)
+		log::logf
+		{
+			log, ircd::log::DEBUG,
+			"EPOLL %5d peek:%lu skip:%lu call:%lu none:%lu result:%lu low:%lu med:%lu high:%lu stall:%lu",
+			ret,
+			uint64_t(empt::peek),
+			uint64_t(empt::skip),
+			uint64_t(empt::call),
+			uint64_t(empt::none),
+			uint64_t(empt::result),
+			uint64_t(empt::load_low),
+			uint64_t(empt::load_med),
+			uint64_t(empt::load_high),
+			uint64_t(empt::load_stall),
+		};
+
+	assert(call || ret == 0);
+	assert(ret <= _maxevents);
+	return ret;
+}
--- a/include/ircd/ios/ios.h
+++ b/include/ircd/ios/ios.h
@ -50,6 +50,8 @@ namespace ircd::ios::profile
 #include "handler.h"
 #include "asio.h"
 #include "dispatch.h"
+#include "empt.h"
+#include "epoll.h"

 inline const uint64_t &
 __attribute__((always_inline))
--- a/ircd/ios.cc
+++ b/ircd/ios.cc
@ -97,6 +97,143 @@ noexcept
 	return bool(main);
 }

+//
+// emption
+//
+
+namespace ircd::ios::empt
+{
+	[[gnu::visibility("internal")]]
+	extern const string_view freq_help;
+
+	[[gnu::visibility("internal")]]
+	extern uint64_t stats[9];
+}
+
+decltype(ircd::ios::empt::freq_help)
+ircd::ios::empt::freq_help
+{R"(
+	Coarse frequency to make non-blocking polls to the kernel for events at the
+	beginning of every iteration of the core event loop. boost::asio takes an
+	opportunity to first make a non-blocking poll to gather more events from
+	the kernel even when one or more tasks are already queued, this setting
+	allows more such tasks to first be executed and reduce syscall overhead
+	ncluding a large numbers of unnecessary calls as would be the case
+	otherwise.
+
+	When the frequency is set to 1, the above-described default behavior is
+	unaltered. When greater than 1, voluntary non-blocking polls are only made
+	after N number of tasks. This reduces syscalls to increase overall
+	performance, but may cost in responsiveness and cause stalls. For example,
+	when set to 2, kernel context-switch is made every other userspace context
+	switch. When set to 0, voluntary non-blocking polls are never made.
+
+	This value may be rounded down to nearest base2 so we can avoid invoking
+	the FPU in the core event loop's codepath.
+)"};
+
+decltype(ircd::ios::empt::stats)
+ircd::ios::empt::stats;
+
+/// Voluntary kernel poll frequency.
+decltype(ircd::ios::empt::freq)
+ircd::ios::empt::freq
+{
+	{ "name",      "ircd.ios.empt.freq" },
+	{ "default",   32L                  },
+	{ "help",      freq_help            },
+};
+
+/// Non-blocking call count.
+decltype(ircd::ios::empt::peek)
+ircd::ios::empt::peek
+{
+	stats + 0,
+	{
+		{ "name", "ircd.ios.empt.peek" },
+	},
+};
+
+/// Skipped call count.
+decltype(ircd::ios::empt::skip)
+ircd::ios::empt::skip
+{
+	stats + 1,
+	{
+		{ "name", "ircd.ios.empt.skip" },
+	},
+};
+
+/// Non-skipped call count.
+decltype(ircd::ios::empt::call)
+ircd::ios::empt::call
+{
+	stats + 2,
+	{
+		{ "name", "ircd.ios.empt.call" },
+	},
+};
+
+/// Count of calls which reported zero ready events.
+decltype(ircd::ios::empt::none)
+ircd::ios::empt::none
+{
+	stats + 3,
+	{
+		{ "name", "ircd.ios.empt.none" },
+	},
+};
+
+/// Total number of events reported from all calls.
+decltype(ircd::ios::empt::result)
+ircd::ios::empt::result
+{
+	stats + 4,
+	{
+		{ "name", "ircd.ios.empt.result" },
+	},
+};
+
+/// Count of calls which reported more events than the low threshold.
+decltype(ircd::ios::empt::load_low)
+ircd::ios::empt::load_low
+{
+	stats + 5,
+	{
+		{ "name", "ircd.ios.empt.load.low" },
+	},
+};
+
+/// Count of calls which reported more events than the medium threshold.
+decltype(ircd::ios::empt::load_med)
+ircd::ios::empt::load_med
+{
+	stats + 6,
+	{
+		{ "name", "ircd.ios.empt.load.med" },
+	},
+};
+
+/// Count of calls which reported more events than the high threshold.
+decltype(ircd::ios::empt::load_high)
+ircd::ios::empt::load_high
+{
+	stats + 7,
+	{
+		{ "name", "ircd.ios.empt.load.high" },
+	},
+};
+
+/// Count of calls which reported the maximum number of events.
+decltype(ircd::ios::empt::load_stall)
+ircd::ios::empt::load_stall
+{
+	stats + 8,
+	{
+		{ "name", "ircd.ios.empt.load.stall" },
+	}
+};
+
 //
 // descriptor
 //