From 30b59f4736d927b715aeda4aec52ba3e595209db Mon Sep 17 00:00:00 2001 From: Jason Volk Date: Thu, 25 Apr 2019 20:49:04 -0700 Subject: [PATCH] ircd::prof: Reorg; deinline various; add comments. --- include/ircd/prof.h | 76 ++++++----- ircd/ctx.cc | 2 +- ircd/fs_aio.cc | 2 +- ircd/prof.cc | 307 ++++++++++++++++++++++++++++---------------- 4 files changed, 238 insertions(+), 149 deletions(-) diff --git a/include/ircd/prof.h b/include/ircd/prof.h index 02a5b1d06..6e4df13e9 100644 --- a/include/ircd/prof.h +++ b/include/ircd/prof.h @@ -66,6 +66,7 @@ namespace ircd::prof::vg struct enable; struct disable; + bool enabled(); void dump(const char *const reason = nullptr); void toggle(); void reset(); @@ -73,23 +74,50 @@ namespace ircd::prof::vg void stop() noexcept; } +// Exports to ircd:: namespace ircd { using prof::cycles; } +/// Enable callgrind profiling for the scope struct ircd::prof::vg::enable { - enable() noexcept { start(); } - ~enable() noexcept { stop(); } + enable() noexcept; + ~enable() noexcept; }; +/// Disable any enabled callgrind profiling for the scope; then restore. struct ircd::prof::vg::disable { - disable() noexcept { stop(); } - ~disable() noexcept { start(); } + disable() noexcept; + ~disable() noexcept; }; +/// This suite of devices is intended to figure out when a system call is +/// really slow or "blocking." The original use-case is for io_submit() in +/// fs::aio. +/// +/// The sample is conducted with times(2) which is itself a system call +/// though reasonably fast, and the result has poor resolution meaning +/// the result of at() is generally 0 unless the system call was very slow. +/// +/// It is started on construction. The user must later call sample() +/// which returns the value of at() as well. +struct ircd::prof::syscall_timer +{ + uint64_t started, stopped; + + public: + uint64_t at() const; + uint64_t sample(); + + syscall_timer() noexcept; +}; + +/// Frontend to times(2). This has low resolution in practice, but it's +/// very cheap as far as syscalls go; x-platform implementation courtesy +/// of boost::chrono. struct ircd::prof::times { uint64_t real {0}; @@ -100,35 +128,8 @@ struct ircd::prof::times times() = default; }; -/// This device intends to figure out when a system call is really slow -/// or "blocking." The original use-case is for io_submit() in fs::aio. -/// The sample is conducted with times(2) which is itself a system call, -/// and the result has poor resolution meaning the result of at() is -/// generally 0 unless the system call was actually slow. -struct ircd::prof::syscall_timer -{ - uint64_t started - { - time_kern() - }; - - uint64_t stopped - { - 0 - }; - - uint64_t at() const - { - return stopped - started; - } - - uint64_t stop() - { - stopped = time_kern(); - return at(); - } -}; - +/// Frontend to getrusage(2). This has higher resolution than prof::times +/// in practice with slight added expense. struct ircd::prof::resource :std::array { @@ -151,6 +152,7 @@ struct ircd::prof::resource {} }; +/// Frontend to perf_event_open(2). This has the highest resolution. struct ircd::prof::system :std::array, 7> { @@ -174,6 +176,10 @@ struct ircd::prof::system {} }; +/// Type descriptor for prof events. This structure is used to aggregate +/// information that describes a profiling event type, including whether +/// the kernel or the user is being profiled (dpl), the principal counter +/// type being profiled (counter) and any other contextual attributes. struct ircd::prof::type { enum dpl dpl {0}; @@ -189,8 +195,8 @@ struct ircd::prof::type enum ircd::prof::dpl :std::underlying_type::type { - KERNEL, - USER + KERNEL = 0, + USER = 1, }; enum ircd::prof::counter diff --git a/ircd/ctx.cc b/ircd/ctx.cc index d4f763398..f2d8d1119 100644 --- a/ircd/ctx.cc +++ b/ircd/ctx.cc @@ -877,7 +877,7 @@ noexcept { timer.stopped? timer.at(): - timer.stop() + timer.sample() }; if(likely(!total)) diff --git a/ircd/fs_aio.cc b/ircd/fs_aio.cc index f4501d17a..93fe71abd 100644 --- a/ircd/fs_aio.cc +++ b/ircd/fs_aio.cc @@ -896,7 +896,7 @@ try }; #ifdef RB_DEBUG - stats.stalls += warning.timer.stop() > 0; + stats.stalls += warning.timer.sample() > 0; #endif assert(!qcount || ret > 0); diff --git a/ircd/prof.cc b/ircd/prof.cc index 1f4747a3f..da5867015 100644 --- a/ircd/prof.cc +++ b/ircd/prof.cc @@ -188,6 +188,201 @@ catch(const std::exception &e) return nullptr; } +// +// prof::vg +// + +namespace ircd::prof::vg +{ + static bool _enabled; +} + +// +// prof::vg::enable +// + +ircd::prof::vg::enable::enable() +noexcept +{ + start(); +} + +ircd::prof::vg::enable::~enable() +noexcept +{ + stop(); +} + +// +// prof::vg::disable +// + +ircd::prof::vg::disable::disable() +noexcept +{ + stop(); +} + +ircd::prof::vg::disable::~disable() +noexcept +{ + start(); +} + +// +// prof::vg util +// + +void +ircd::prof::vg::stop() +noexcept +{ + #ifdef HAVE_VALGRIND_CALLGRIND_H + CALLGRIND_STOP_INSTRUMENTATION; + assert(_enabled); + _enabled = false; + #endif +} + +void +ircd::prof::vg::start() +noexcept +{ + #ifdef HAVE_VALGRIND_CALLGRIND_H + assert(!_enabled); + _enabled = true; + CALLGRIND_START_INSTRUMENTATION; + #endif +} + +void +ircd::prof::vg::reset() +{ + #ifdef HAVE_VALGRIND_CALLGRIND_H + CALLGRIND_ZERO_STATS; + #endif +} + +void +ircd::prof::vg::toggle() +{ + #ifdef HAVE_VALGRIND_CALLGRIND_H + CALLGRIND_TOGGLE_COLLECT; + #endif +} + +void +ircd::prof::vg::dump(const char *const reason) +{ + #ifdef HAVE_VALGRIND_CALLGRIND_H + CALLGRIND_DUMP_STATS_AT(reason); + #endif +} + +bool +ircd::prof::vg::enabled() +{ + return _enabled; +} + +// +// syscall_timer +// + +ircd::prof::syscall_timer::syscall_timer() +noexcept +:started +{ + time_kern() +} +,stopped +{ + 0UL +} +{ +} + +uint64_t +ircd::prof::syscall_timer::sample() +{ + stopped = time_kern(); + return at(); +} + +uint64_t +ircd::prof::syscall_timer::at() +const +{ + return stopped - started; +} + + +// +// time_*() suite +// + +uint64_t +ircd::prof::time_thrd() +{ + struct ::timespec tv; + syscall(::clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tv); + return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec; +} + +uint64_t +ircd::prof::time_proc() +{ + struct ::timespec tv; + syscall(::clock_gettime, CLOCK_PROCESS_CPUTIME_ID, &tv); + return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec; +} + +// +// Interface (cross-platform) +// + +uint64_t +ircd::prof::time_real() +{ + return boost::chrono::process_real_cpu_clock::now().time_since_epoch().count(); +} + +uint64_t +ircd::prof::time_kern() +{ + return boost::chrono::process_system_cpu_clock::now().time_since_epoch().count(); +} + +uint64_t +ircd::prof::time_user() +{ + return boost::chrono::process_user_cpu_clock::now().time_since_epoch().count(); +} + +// +// times +// + +ircd::prof::times::times(sample_t) +:real{} +,kern{} +,user{} +{ + const auto tp + { + boost::chrono::process_cpu_clock::now() + }; + + const auto d + { + tp.time_since_epoch() + }; + + this->real = d.count().real; + this->kern = d.count().system; + this->user = d.count().user; +} + // // resource // @@ -730,115 +925,3 @@ ircd::prof::debug(std::ostream &s, return s; } - -// -// Interface -// - -uint64_t -ircd::prof::time_thrd() -{ - struct ::timespec tv; - syscall(::clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tv); - return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec; -} - -uint64_t -ircd::prof::time_proc() -{ - struct ::timespec tv; - syscall(::clock_gettime, CLOCK_PROCESS_CPUTIME_ID, &tv); - return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec; -} - -// -// Interface (cross-platform) -// - -uint64_t -ircd::prof::time_real() -{ - return boost::chrono::process_real_cpu_clock::now().time_since_epoch().count(); -} - -uint64_t -ircd::prof::time_kern() -{ - return boost::chrono::process_system_cpu_clock::now().time_since_epoch().count(); -} - -uint64_t -ircd::prof::time_user() -{ - return boost::chrono::process_user_cpu_clock::now().time_since_epoch().count(); -} - -// -// prof::vg -// - -void -ircd::prof::vg::stop() -noexcept -{ - #ifdef HAVE_VALGRIND_CALLGRIND_H - CALLGRIND_STOP_INSTRUMENTATION; - #endif -} - -void -ircd::prof::vg::start() -noexcept -{ - #ifdef HAVE_VALGRIND_CALLGRIND_H - CALLGRIND_START_INSTRUMENTATION; - #endif -} - -void -ircd::prof::vg::reset() -{ - #ifdef HAVE_VALGRIND_CALLGRIND_H - CALLGRIND_ZERO_STATS; - #endif -} - -void -ircd::prof::vg::toggle() -{ - #ifdef HAVE_VALGRIND_CALLGRIND_H - CALLGRIND_TOGGLE_COLLECT; - #endif -} - -void -ircd::prof::vg::dump(const char *const reason) -{ - #ifdef HAVE_VALGRIND_CALLGRIND_H - CALLGRIND_DUMP_STATS_AT(reason); - #endif -} - -// -// times -// - -ircd::prof::times::times(sample_t) -:real{} -,kern{} -,user{} -{ - const auto tp - { - boost::chrono::process_cpu_clock::now() - }; - - const auto d - { - tp.time_since_epoch() - }; - - this->real = d.count().real; - this->kern = d.count().system; - this->user = d.count().user; -}