mirror of
https://github.com/matrix-construct/construct
synced 2024-11-29 02:02:38 +01:00
ircd::prof: Split header into directory.
ircd::prof: Split linux/perf_event specific to unit.
This commit is contained in:
parent
5e3f81685c
commit
fefaaca2c2
14 changed files with 1099 additions and 921 deletions
|
@ -52,7 +52,7 @@
|
|||
#include "magics.h"
|
||||
#include "conf.h"
|
||||
#include "stats.h"
|
||||
#include "prof.h"
|
||||
#include "prof/prof.h"
|
||||
#include "fs/fs.h"
|
||||
#include "ios.h"
|
||||
#include "ctx/ctx.h"
|
||||
|
|
|
@ -1,306 +0,0 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct init;
|
||||
struct type;
|
||||
struct event;
|
||||
struct times;
|
||||
struct system;
|
||||
struct resource;
|
||||
struct syscall_timer;
|
||||
struct instructions;
|
||||
enum dpl :uint8_t;
|
||||
using group = std::vector<std::unique_ptr<event>>;
|
||||
IRCD_OVERLOAD(sample)
|
||||
IRCD_EXCEPTION(ircd::error, error)
|
||||
|
||||
uint64_t cycles(); ///< Monotonic reference cycles (since system boot)
|
||||
uint64_t time_user(); ///< Nanoseconds of CPU time in userspace.
|
||||
uint64_t time_kern(); ///< Nanoseconds of CPU time in kernelland.
|
||||
uint64_t time_real(); ///< Nanoseconds of CPU time real.
|
||||
uint64_t time_proc(); ///< Nanoseconds of CPU time for process.
|
||||
uint64_t time_thrd(); ///< Nanoseconds of CPU time for thread.
|
||||
|
||||
system &hotsample(system &) noexcept;
|
||||
system &operator+=(system &a, const system &b);
|
||||
system &operator-=(system &a, const system &b);
|
||||
system operator+(const system &a, const system &b);
|
||||
system operator-(const system &a, const system &b);
|
||||
|
||||
resource &operator+=(resource &a, const resource &b);
|
||||
resource &operator-=(resource &a, const resource &b);
|
||||
resource operator+(const resource &a, const resource &b);
|
||||
resource operator-(const resource &a, const resource &b);
|
||||
|
||||
using read_closure = std::function<void (const type &, const uint64_t &val)>;
|
||||
void for_each(const const_buffer &read, const read_closure &);
|
||||
|
||||
// Control
|
||||
void stop(group &);
|
||||
void start(group &);
|
||||
void reset(group &);
|
||||
}
|
||||
|
||||
/// X86 platform related
|
||||
namespace ircd::prof::x86
|
||||
{
|
||||
unsigned long long rdpmc(const uint &);
|
||||
unsigned long long rdtscp();
|
||||
unsigned long long rdtsc();
|
||||
}
|
||||
|
||||
/// Callgrind hypercall suite
|
||||
namespace ircd::prof::vg
|
||||
{
|
||||
struct enable;
|
||||
struct disable;
|
||||
|
||||
bool enabled();
|
||||
void dump(const char *const reason = nullptr);
|
||||
void toggle();
|
||||
void reset();
|
||||
void start() noexcept;
|
||||
void stop() noexcept;
|
||||
}
|
||||
|
||||
// Exports to ircd::
|
||||
namespace ircd
|
||||
{
|
||||
using prof::cycles;
|
||||
}
|
||||
|
||||
/// Enable callgrind profiling for the scope
|
||||
struct ircd::prof::vg::enable
|
||||
{
|
||||
enable() noexcept;
|
||||
~enable() noexcept;
|
||||
};
|
||||
|
||||
/// Disable any enabled callgrind profiling for the scope; then restore.
|
||||
struct ircd::prof::vg::disable
|
||||
{
|
||||
disable() noexcept;
|
||||
~disable() noexcept;
|
||||
};
|
||||
|
||||
/// Gadget for hardware profiling of instructions for a scope.
|
||||
///
|
||||
struct ircd::prof::instructions
|
||||
{
|
||||
prof::group group;
|
||||
uint64_t retired {0};
|
||||
|
||||
public:
|
||||
const uint64_t &at() const;
|
||||
const uint64_t &sample();
|
||||
|
||||
instructions();
|
||||
instructions(instructions &&) = delete;
|
||||
instructions(const instructions &) = delete;
|
||||
~instructions() noexcept;
|
||||
};
|
||||
|
||||
/// This suite of devices is intended to figure out when a system call is
|
||||
/// really slow or "blocking." The original use-case is for io_submit() in
|
||||
/// fs::aio.
|
||||
///
|
||||
/// The sample is conducted with times(2) which is itself a system call
|
||||
/// though reasonably fast, and the result has poor resolution meaning
|
||||
/// the result of at() is generally 0 unless the system call was very slow.
|
||||
///
|
||||
/// It is started on construction. The user must later call sample()
|
||||
/// which returns the value of at() as well.
|
||||
struct ircd::prof::syscall_timer
|
||||
{
|
||||
struct high_resolution;
|
||||
|
||||
uint64_t started, stopped;
|
||||
|
||||
public:
|
||||
uint64_t at() const;
|
||||
uint64_t sample();
|
||||
|
||||
syscall_timer() noexcept;
|
||||
};
|
||||
|
||||
/// This is a higher resolution alternative. The sample may be conducted
|
||||
/// with getrusage() or perf events; the exact method is TBD and may be
|
||||
/// expensive/intrusive. This device should be used temporarily by developers
|
||||
/// and not left in place in committed code.
|
||||
struct ircd::prof::syscall_timer::high_resolution
|
||||
{
|
||||
uint64_t started, stopped;
|
||||
|
||||
public:
|
||||
uint64_t at() const;
|
||||
uint64_t sample();
|
||||
|
||||
high_resolution() noexcept;
|
||||
};
|
||||
|
||||
/// Frontend to times(2). This has low resolution in practice, but it's
|
||||
/// very cheap as far as syscalls go; x-platform implementation courtesy
|
||||
/// of boost::chrono.
|
||||
struct ircd::prof::times
|
||||
{
|
||||
uint64_t real {0};
|
||||
uint64_t kern {0};
|
||||
uint64_t user {0};
|
||||
|
||||
times(sample_t);
|
||||
times() = default;
|
||||
};
|
||||
|
||||
/// Frontend to getrusage(2). This has higher resolution than prof::times
|
||||
/// in practice with slight added expense.
|
||||
struct ircd::prof::resource
|
||||
:std::array<uint64_t, 9>
|
||||
{
|
||||
enum
|
||||
{
|
||||
TIME_USER, // microseconds
|
||||
TIME_KERN, // microseconds
|
||||
RSS_MAX,
|
||||
PF_MINOR,
|
||||
PF_MAJOR,
|
||||
BLOCK_IN,
|
||||
BLOCK_OUT,
|
||||
SCHED_YIELD,
|
||||
SCHED_PREEMPT,
|
||||
};
|
||||
|
||||
resource(sample_t);
|
||||
resource()
|
||||
:std::array<uint64_t, 9>{{0}}
|
||||
{}
|
||||
};
|
||||
|
||||
/// Frontend to perf_event_open(2). This has the highest resolution.
|
||||
struct ircd::prof::system
|
||||
:std::array<std::array<uint64_t, 2>, 7>
|
||||
{
|
||||
using array_type = std::array<std::array<uint64_t, 2>, 7>;
|
||||
|
||||
static prof::group group;
|
||||
|
||||
// [N][0] = KERNEL, [N][1] = USER
|
||||
//
|
||||
// 0: TIME_PROF,
|
||||
// 1: TIME_CPU,
|
||||
// 2: TIME_TASK,
|
||||
// 3: PF_MINOR,
|
||||
// 4: PF_MAJOR,
|
||||
// 5: SWITCH_TASK,
|
||||
// 6: SWITCH_CPU,
|
||||
|
||||
system(sample_t) noexcept;
|
||||
system()
|
||||
:array_type{{{0}}}
|
||||
{}
|
||||
};
|
||||
|
||||
/// Type descriptor for prof events. This structure is used to aggregate
|
||||
/// information that describes a profiling event type, including whether
|
||||
/// the kernel or the user is being profiled (dpl), the principal counter
|
||||
/// type being profiled (counter) and any other contextual attributes.
|
||||
struct ircd::prof::type
|
||||
{
|
||||
enum dpl dpl {0};
|
||||
uint8_t type_id {0};
|
||||
uint8_t counter {0};
|
||||
uint8_t cacheop {0};
|
||||
uint8_t cacheres {0};
|
||||
|
||||
type(const event &);
|
||||
type(const enum dpl & = (enum dpl)0,
|
||||
const uint8_t &attr_type = 0,
|
||||
const uint8_t &counter = 0,
|
||||
const uint8_t &cacheop = 0,
|
||||
const uint8_t &cacheres = 0);
|
||||
};
|
||||
|
||||
enum ircd::prof::dpl
|
||||
:std::underlying_type<ircd::prof::dpl>::type
|
||||
{
|
||||
KERNEL = 0,
|
||||
USER = 1,
|
||||
};
|
||||
|
||||
struct ircd::prof::init
|
||||
{
|
||||
init();
|
||||
~init() noexcept;
|
||||
};
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline uint64_t
|
||||
__attribute__((flatten, always_inline, gnu_inline, artificial))
|
||||
ircd::prof::cycles()
|
||||
{
|
||||
return x86::rdtsc();
|
||||
}
|
||||
#else
|
||||
ircd::prof::cycles()
|
||||
{
|
||||
static_assert(false, "Select reference cycle counter for platform.");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdtsc()
|
||||
{
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdtsc()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdtscp()
|
||||
{
|
||||
uint32_t ia32_tsc_aux;
|
||||
return __builtin_ia32_rdtscp(&ia32_tsc_aux);
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdtscp()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdpmc(const uint &c)
|
||||
{
|
||||
return __builtin_ia32_rdpmc(c);
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdpmc(const uint &c)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
6
include/ircd/prof/README.md
Normal file
6
include/ircd/prof/README.md
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Profiling & Performance Instruments
|
||||
|
||||
This is an accumulated collection of tools available to developers to aid with
|
||||
optimization and debugging. Some items here leverage hardware and OS-specific
|
||||
features and their interface may not yet be implemented or available on all
|
||||
platforms.
|
34
include/ircd/prof/instructions.h
Normal file
34
include/ircd/prof/instructions.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_INSTRUCTIONS_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct instructions;
|
||||
}
|
||||
|
||||
/// Gadget for hardware profiling of instructions for a scope.
|
||||
///
|
||||
struct ircd::prof::instructions
|
||||
{
|
||||
prof::group group;
|
||||
uint64_t retired {0};
|
||||
|
||||
public:
|
||||
const uint64_t &at() const;
|
||||
const uint64_t &sample();
|
||||
|
||||
instructions();
|
||||
instructions(instructions &&) = delete;
|
||||
instructions(const instructions &) = delete;
|
||||
~instructions() noexcept;
|
||||
};
|
101
include/ircd/prof/prof.h
Normal file
101
include/ircd/prof/prof.h
Normal file
|
@ -0,0 +1,101 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct init;
|
||||
struct type;
|
||||
struct event;
|
||||
enum dpl :uint8_t;
|
||||
using group = std::vector<std::unique_ptr<event>>;
|
||||
IRCD_EXCEPTION(ircd::error, error)
|
||||
IRCD_OVERLOAD(sample)
|
||||
|
||||
// Samples
|
||||
uint64_t cycles(); ///< Monotonic reference cycles (since system boot)
|
||||
uint64_t time_user(); ///< Nanoseconds of CPU time in userspace.
|
||||
uint64_t time_kern(); ///< Nanoseconds of CPU time in kernelland.
|
||||
uint64_t time_real(); ///< Nanoseconds of CPU time real.
|
||||
uint64_t time_proc(); ///< Nanoseconds of CPU time for process.
|
||||
uint64_t time_thrd(); ///< Nanoseconds of CPU time for thread.
|
||||
|
||||
// Control panel
|
||||
void stop(group &);
|
||||
void start(group &);
|
||||
void reset(group &);
|
||||
|
||||
// Config
|
||||
extern conf::item<bool> enable;
|
||||
}
|
||||
|
||||
#include "x86.h"
|
||||
#include "vg.h"
|
||||
#include "syscall_timer.h"
|
||||
#include "instructions.h"
|
||||
#include "resource.h"
|
||||
#include "times.h"
|
||||
#include "system.h"
|
||||
|
||||
// Exports to ircd::
|
||||
namespace ircd
|
||||
{
|
||||
using prof::cycles;
|
||||
}
|
||||
|
||||
/// Type descriptor for prof events. This structure is used to aggregate
|
||||
/// information that describes a profiling event type, including whether
|
||||
/// the kernel or the user is being profiled (dpl), the principal counter
|
||||
/// type being profiled (counter) and any other contextual attributes.
|
||||
struct ircd::prof::type
|
||||
{
|
||||
enum dpl dpl {0};
|
||||
uint8_t type_id {0};
|
||||
uint8_t counter {0};
|
||||
uint8_t cacheop {0};
|
||||
uint8_t cacheres {0};
|
||||
|
||||
type(const event &);
|
||||
type(const enum dpl & = (enum dpl)0,
|
||||
const uint8_t &attr_type = 0,
|
||||
const uint8_t &counter = 0,
|
||||
const uint8_t &cacheop = 0,
|
||||
const uint8_t &cacheres = 0);
|
||||
};
|
||||
|
||||
enum ircd::prof::dpl
|
||||
:std::underlying_type<ircd::prof::dpl>::type
|
||||
{
|
||||
KERNEL = 0,
|
||||
USER = 1,
|
||||
};
|
||||
|
||||
struct ircd::prof::init
|
||||
{
|
||||
init();
|
||||
~init() noexcept;
|
||||
};
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline uint64_t
|
||||
__attribute__((flatten, always_inline, gnu_inline, artificial))
|
||||
ircd::prof::cycles()
|
||||
{
|
||||
return x86::rdtsc();
|
||||
}
|
||||
#else
|
||||
ircd::prof::cycles()
|
||||
{
|
||||
static_assert(false, "Select reference cycle counter for platform.");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
46
include/ircd/prof/resource.h
Normal file
46
include/ircd/prof/resource.h
Normal file
|
@ -0,0 +1,46 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_RESOURCE_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct resource;
|
||||
|
||||
resource &operator+=(resource &a, const resource &b);
|
||||
resource &operator-=(resource &a, const resource &b);
|
||||
resource operator+(const resource &a, const resource &b);
|
||||
resource operator-(const resource &a, const resource &b);
|
||||
}
|
||||
|
||||
/// Frontend to getrusage(2). This has higher resolution than prof::times
|
||||
/// in practice with slight added expense.
|
||||
struct ircd::prof::resource
|
||||
:std::array<uint64_t, 9>
|
||||
{
|
||||
enum
|
||||
{
|
||||
TIME_USER, // microseconds
|
||||
TIME_KERN, // microseconds
|
||||
RSS_MAX,
|
||||
PF_MINOR,
|
||||
PF_MAJOR,
|
||||
BLOCK_IN,
|
||||
BLOCK_OUT,
|
||||
SCHED_YIELD,
|
||||
SCHED_PREEMPT,
|
||||
};
|
||||
|
||||
resource(sample_t);
|
||||
resource()
|
||||
:std::array<uint64_t, 9>{{0}}
|
||||
{}
|
||||
};
|
55
include/ircd/prof/syscall_timer.h
Normal file
55
include/ircd/prof/syscall_timer.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_SYSCALL_TIMER_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct syscall_timer;
|
||||
}
|
||||
|
||||
/// This suite of devices is intended to figure out when a system call is
|
||||
/// really slow or "blocking." The original use-case is for io_submit() in
|
||||
/// fs::aio.
|
||||
///
|
||||
/// The sample is conducted with times(2) which is itself a system call
|
||||
/// though reasonably fast, and the result has poor resolution meaning
|
||||
/// the result of at() is generally 0 unless the system call was very slow.
|
||||
///
|
||||
/// It is started on construction. The user must later call sample()
|
||||
/// which returns the value of at() as well.
|
||||
struct ircd::prof::syscall_timer
|
||||
{
|
||||
struct high_resolution;
|
||||
|
||||
uint64_t started, stopped;
|
||||
|
||||
public:
|
||||
uint64_t at() const;
|
||||
uint64_t sample();
|
||||
|
||||
syscall_timer() noexcept;
|
||||
};
|
||||
|
||||
/// This is a higher resolution alternative. The sample may be conducted
|
||||
/// with getrusage() or perf events; the exact method is TBD and may be
|
||||
/// expensive/intrusive. This device should be used temporarily by developers
|
||||
/// and not left in place in committed code.
|
||||
struct ircd::prof::syscall_timer::high_resolution
|
||||
{
|
||||
uint64_t started, stopped;
|
||||
|
||||
public:
|
||||
uint64_t at() const;
|
||||
uint64_t sample();
|
||||
|
||||
high_resolution() noexcept;
|
||||
};
|
50
include/ircd/prof/system.h
Normal file
50
include/ircd/prof/system.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_SYSTEM_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct system;
|
||||
|
||||
using read_closure = std::function<void (const type &, const uint64_t &val)>;
|
||||
void for_each(const const_buffer &read, const read_closure &);
|
||||
|
||||
system &hotsample(system &) noexcept;
|
||||
system &operator+=(system &a, const system &b);
|
||||
system &operator-=(system &a, const system &b);
|
||||
system operator+(const system &a, const system &b);
|
||||
system operator-(const system &a, const system &b);
|
||||
}
|
||||
|
||||
/// Frontend to perf_event_open(2). This has the highest resolution.
|
||||
struct ircd::prof::system
|
||||
:std::array<std::array<uint64_t, 2>, 7>
|
||||
{
|
||||
using array_type = std::array<std::array<uint64_t, 2>, 7>;
|
||||
|
||||
static prof::group group;
|
||||
|
||||
// [N][0] = KERNEL, [N][1] = USER
|
||||
//
|
||||
// 0: TIME_PROF,
|
||||
// 1: TIME_CPU,
|
||||
// 2: TIME_TASK,
|
||||
// 3: PF_MINOR,
|
||||
// 4: PF_MAJOR,
|
||||
// 5: SWITCH_TASK,
|
||||
// 6: SWITCH_CPU,
|
||||
|
||||
system(sample_t) noexcept;
|
||||
system()
|
||||
:array_type{{{0}}}
|
||||
{}
|
||||
};
|
30
include/ircd/prof/times.h
Normal file
30
include/ircd/prof/times.h
Normal file
|
@ -0,0 +1,30 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_TIMES_H
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
struct times;
|
||||
}
|
||||
|
||||
/// Frontend to times(2). This has low resolution in practice, but it's
|
||||
/// very cheap as far as syscalls go; x-platform implementation courtesy
|
||||
/// of boost::chrono.
|
||||
struct ircd::prof::times
|
||||
{
|
||||
uint64_t real {0};
|
||||
uint64_t kern {0};
|
||||
uint64_t user {0};
|
||||
|
||||
times(sample_t);
|
||||
times() = default;
|
||||
};
|
40
include/ircd/prof/vg.h
Normal file
40
include/ircd/prof/vg.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_VG_H
|
||||
|
||||
/// Callgrind hypercall suite
|
||||
namespace ircd::prof::vg
|
||||
{
|
||||
struct enable;
|
||||
struct disable;
|
||||
|
||||
bool enabled();
|
||||
void dump(const char *const reason = nullptr);
|
||||
void toggle();
|
||||
void reset();
|
||||
void start() noexcept;
|
||||
void stop() noexcept;
|
||||
}
|
||||
|
||||
/// Enable callgrind profiling for the scope
|
||||
struct ircd::prof::vg::enable
|
||||
{
|
||||
enable() noexcept;
|
||||
~enable() noexcept;
|
||||
};
|
||||
|
||||
/// Disable any enabled callgrind profiling for the scope; then restore.
|
||||
struct ircd::prof::vg::disable
|
||||
{
|
||||
disable() noexcept;
|
||||
~disable() noexcept;
|
||||
};
|
66
include/ircd/prof/x86.h
Normal file
66
include/ircd/prof/x86.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
#pragma once
|
||||
#define HAVE_IRCD_PROF_X86_H
|
||||
|
||||
/// X86 platform related
|
||||
namespace ircd::prof::x86
|
||||
{
|
||||
unsigned long long rdpmc(const uint &);
|
||||
unsigned long long rdtscp();
|
||||
unsigned long long rdtsc();
|
||||
}
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdtsc()
|
||||
{
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdtsc()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdtscp()
|
||||
{
|
||||
uint32_t ia32_tsc_aux;
|
||||
return __builtin_ia32_rdtscp(&ia32_tsc_aux);
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdtscp()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
extern inline unsigned long long
|
||||
__attribute__((always_inline, gnu_inline, artificial))
|
||||
ircd::prof::x86::rdpmc(const uint &c)
|
||||
{
|
||||
return __builtin_ia32_rdpmc(c);
|
||||
}
|
||||
#else
|
||||
inline unsigned long long
|
||||
ircd::prof::x86::rdpmc(const uint &c)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
|
@ -146,6 +146,9 @@ libircd_la_SOURCES += rfc1035.cc
|
|||
libircd_la_SOURCES += http.cc
|
||||
libircd_la_SOURCES += http2.cc
|
||||
libircd_la_SOURCES += prof.cc
|
||||
if LINUX
|
||||
libircd_la_SOURCES += prof_linux.cc
|
||||
endif
|
||||
libircd_la_SOURCES += fs.cc
|
||||
libircd_la_SOURCES += ios.cc
|
||||
libircd_la_SOURCES += ctx.cc
|
||||
|
|
657
ircd/prof.cc
657
ircd/prof.cc
|
@ -12,71 +12,10 @@
|
|||
#include <RB_INC_SYS_IOCTL_H
|
||||
#include <RB_INC_SYS_MMAN_H
|
||||
#include <RB_INC_SYS_RESOURCE_H
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <boost/chrono/chrono.hpp>
|
||||
#include <boost/chrono/process_cpu_clocks.hpp>
|
||||
|
||||
#ifndef __clang__
|
||||
#define IRCD_PROF_ALWAYS_OPTIMIZE __attribute__((optimize("s"), flatten))
|
||||
#else
|
||||
#define IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
#endif
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
std::ostream &debug(std::ostream &, const ::perf_event_mmap_page &);
|
||||
|
||||
template<class... args> event *
|
||||
create(group &,
|
||||
const uint32_t &,
|
||||
const uint64_t &,
|
||||
args&&...);
|
||||
|
||||
static event &leader(group &);
|
||||
static event *leader(group *const &);
|
||||
|
||||
extern conf::item<bool> enable;
|
||||
}
|
||||
|
||||
struct ircd::prof::event
|
||||
:instance_list<event>
|
||||
{
|
||||
perf_event_attr attr;
|
||||
fs::fd fd;
|
||||
uint64_t id {0};
|
||||
size_t map_size {0};
|
||||
char *map {nullptr};
|
||||
perf_event_mmap_page *head {nullptr};
|
||||
const_buffer body;
|
||||
|
||||
uint64_t rdpmc() const;
|
||||
long ioctl(const ulong &req, const long &arg = 0);
|
||||
void reset(const long & = 0);
|
||||
void enable(const long & = 0);
|
||||
void disable(const long & = 0);
|
||||
|
||||
event(const int &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
const bool &user,
|
||||
const bool &kernel,
|
||||
const bool &use_map = true);
|
||||
|
||||
~event() noexcept;
|
||||
};
|
||||
|
||||
template<>
|
||||
decltype(ircd::util::instance_list<ircd::prof::event>::allocator)
|
||||
ircd::util::instance_list<ircd::prof::event>::allocator
|
||||
{};
|
||||
|
||||
template<>
|
||||
decltype(ircd::util::instance_list<ircd::prof::event>::list)
|
||||
ircd::util::instance_list<ircd::prof::event>::list
|
||||
{
|
||||
allocator
|
||||
};
|
||||
|
||||
decltype(ircd::prof::enable)
|
||||
ircd::prof::enable
|
||||
{
|
||||
|
@ -85,124 +24,56 @@ ircd::prof::enable
|
|||
{ "persist", false },
|
||||
};
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_real()
|
||||
{
|
||||
return boost::chrono::process_real_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_kern()
|
||||
{
|
||||
return boost::chrono::process_system_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_user()
|
||||
{
|
||||
return boost::chrono::process_user_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
__attribute__((weak))
|
||||
ircd::prof::time_thrd()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
__attribute__((weak))
|
||||
ircd::prof::time_proc()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
// init
|
||||
//
|
||||
|
||||
__attribute__((weak))
|
||||
ircd::prof::init::init()
|
||||
try
|
||||
{
|
||||
if(!enable)
|
||||
return;
|
||||
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, false, true);
|
||||
}
|
||||
catch(const std::exception &e)
|
||||
{
|
||||
log::error
|
||||
{
|
||||
"Profiling system initialization :%s",
|
||||
e.what()
|
||||
};
|
||||
|
||||
system::group.clear();
|
||||
throw;
|
||||
}
|
||||
|
||||
__attribute__((weak))
|
||||
ircd::prof::init::~init()
|
||||
noexcept
|
||||
{
|
||||
system::group.clear();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// interface
|
||||
//
|
||||
|
||||
void
|
||||
ircd::prof::reset(group &group)
|
||||
{
|
||||
leader(group).reset(PERF_IOC_FLAG_GROUP);
|
||||
}
|
||||
|
||||
void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::start(group &group)
|
||||
{
|
||||
leader(group).enable(PERF_IOC_FLAG_GROUP);
|
||||
}
|
||||
|
||||
void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::stop(group &group)
|
||||
{
|
||||
auto &leader(*group.front());
|
||||
leader.disable(PERF_IOC_FLAG_GROUP);
|
||||
assert(!group.empty());
|
||||
}
|
||||
|
||||
ircd::prof::event &
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::leader(group &group)
|
||||
{
|
||||
assert(!group.empty() && group.front());
|
||||
return *group.front();
|
||||
}
|
||||
|
||||
ircd::prof::event *
|
||||
ircd::prof::leader(group *const &group)
|
||||
{
|
||||
return group && !group->empty()?
|
||||
group->front().get():
|
||||
nullptr;
|
||||
}
|
||||
|
||||
template<class... args>
|
||||
ircd::prof::event *
|
||||
ircd::prof::create(group &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
args&&... a)
|
||||
try
|
||||
{
|
||||
const int gfd
|
||||
{
|
||||
leader(&group)? leader(group).fd : -1
|
||||
};
|
||||
|
||||
group.emplace_back(std::make_unique<event>
|
||||
(
|
||||
gfd, type, config, std::forward<args>(a)...
|
||||
));
|
||||
|
||||
return group.back().get();
|
||||
}
|
||||
catch(const std::exception &e)
|
||||
{
|
||||
log::dwarning
|
||||
{
|
||||
"Failed to create event type:%u config:%lu :%s",
|
||||
type,
|
||||
config,
|
||||
e.what()
|
||||
};
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//
|
||||
// prof::vg
|
||||
// prof/vg.h
|
||||
//
|
||||
// note: further definitions calling valgrind isolated to ircd/vg.cc
|
||||
|
||||
|
@ -238,42 +109,11 @@ noexcept
|
|||
start();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// instructions
|
||||
// prof/syscall_timer.h
|
||||
//
|
||||
|
||||
ircd::prof::instructions::instructions()
|
||||
{
|
||||
if(!create(this->group, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, true, false))
|
||||
throw error
|
||||
{
|
||||
"Cannot sample instruction counter."
|
||||
};
|
||||
|
||||
reset(this->group);
|
||||
start(this->group);
|
||||
}
|
||||
|
||||
ircd::prof::instructions::~instructions()
|
||||
noexcept
|
||||
{
|
||||
}
|
||||
|
||||
const uint64_t &
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::instructions::sample()
|
||||
{
|
||||
retired = prof::leader(group).rdpmc();
|
||||
return retired;
|
||||
}
|
||||
|
||||
const uint64_t &
|
||||
ircd::prof::instructions::at()
|
||||
const
|
||||
{
|
||||
return retired;
|
||||
}
|
||||
|
||||
//
|
||||
// syscall_timer
|
||||
//
|
||||
|
@ -336,50 +176,9 @@ const
|
|||
return stopped - started;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// time_*() suite
|
||||
//
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_thrd()
|
||||
{
|
||||
struct ::timespec tv;
|
||||
syscall(::clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tv);
|
||||
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_proc()
|
||||
{
|
||||
struct ::timespec tv;
|
||||
syscall(::clock_gettime, CLOCK_PROCESS_CPUTIME_ID, &tv);
|
||||
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
|
||||
}
|
||||
|
||||
//
|
||||
// Interface (cross-platform)
|
||||
//
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_real()
|
||||
{
|
||||
return boost::chrono::process_real_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_kern()
|
||||
{
|
||||
return boost::chrono::process_system_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_user()
|
||||
{
|
||||
return boost::chrono::process_user_cpu_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
//
|
||||
// times
|
||||
// prof/times.h
|
||||
//
|
||||
|
||||
ircd::prof::times::times(sample_t)
|
||||
|
@ -402,8 +201,9 @@ ircd::prof::times::times(sample_t)
|
|||
this->user = d.count().user;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// resource
|
||||
// prof/resource.h
|
||||
//
|
||||
|
||||
ircd::prof::resource
|
||||
|
@ -463,374 +263,3 @@ ircd::prof::resource::resource(sample_t)
|
|||
at(SCHED_YIELD) = ru.ru_nvcsw;
|
||||
at(SCHED_PREEMPT) = ru.ru_nivcsw;
|
||||
}
|
||||
|
||||
//
|
||||
// system
|
||||
//
|
||||
|
||||
decltype(ircd::prof::system::group)
|
||||
ircd::prof::system::group;
|
||||
|
||||
ircd::prof::system
|
||||
ircd::prof::operator-(const system &a,
|
||||
const system &b)
|
||||
{
|
||||
system ret(a);
|
||||
ret -= b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ircd::prof::system
|
||||
ircd::prof::operator+(const system &a,
|
||||
const system &b)
|
||||
{
|
||||
system ret(a);
|
||||
ret += b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::operator-=(system &a,
|
||||
const system &b)
|
||||
{
|
||||
for(size_t i(0); i < a.size(); ++i)
|
||||
for(size_t j(0); j < a[i].size(); ++j)
|
||||
a[i][j] -= b[i][j];
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::operator+=(system &a,
|
||||
const system &b)
|
||||
{
|
||||
for(size_t i(0); i < a.size(); ++i)
|
||||
for(size_t j(0); j < a[i].size(); ++j)
|
||||
a[i][j] += b[i][j];
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::hotsample(system &s)
|
||||
noexcept
|
||||
{
|
||||
thread_local char buf[1024];
|
||||
|
||||
auto &leader
|
||||
{
|
||||
prof::leader(system::group)
|
||||
};
|
||||
|
||||
const const_buffer read
|
||||
{
|
||||
buf, size_t(syscall(::read, int(leader.fd), buf, sizeof(buf)))
|
||||
};
|
||||
|
||||
for_each(read, [&s]
|
||||
(const type &type, const uint64_t &val)
|
||||
{
|
||||
auto &r0
|
||||
{
|
||||
s.at(size_t(type.counter))
|
||||
};
|
||||
|
||||
auto &r1
|
||||
{
|
||||
r0.at(size_t(type.dpl))
|
||||
};
|
||||
|
||||
r1 = val;
|
||||
});
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void
|
||||
ircd::prof::for_each(const const_buffer &buf,
|
||||
const read_closure &closure)
|
||||
{
|
||||
struct head
|
||||
{
|
||||
uint64_t nr, te, tr;
|
||||
}
|
||||
const *const &head
|
||||
{
|
||||
reinterpret_cast<const struct head *>(data(buf))
|
||||
};
|
||||
|
||||
struct body
|
||||
{
|
||||
uint64_t val, id;
|
||||
}
|
||||
const *const &body
|
||||
{
|
||||
reinterpret_cast<const struct body *>(data(buf) + sizeof(struct head))
|
||||
};
|
||||
|
||||
// Start with the pseudo-results; these should always be the same for
|
||||
// non-hw profiling, so the DPL is meaningless.
|
||||
closure(type{dpl::KERNEL, uint8_t(-1)}, head->te);
|
||||
closure(type{dpl::USER, uint8_t(-1)}, head->tr);
|
||||
|
||||
// Iterate the result list
|
||||
for(size_t i(0); i < head->nr; ++i)
|
||||
for(auto it(begin(event::list)); it != end(event::list); ++it)
|
||||
if((*it)->id == body[i].id)
|
||||
return closure(type(**it), body[i].val);
|
||||
}
|
||||
|
||||
ircd::prof::system::system(sample_t)
|
||||
noexcept
|
||||
{
|
||||
stop(group);
|
||||
hotsample(*this);
|
||||
start(group);
|
||||
}
|
||||
|
||||
//
|
||||
// event
|
||||
//
|
||||
|
||||
//
|
||||
// event::event
|
||||
//
|
||||
|
||||
ircd::prof::event::event(const int &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
const bool &user,
|
||||
const bool &kernel,
|
||||
const bool &use_map)
|
||||
:attr{[&]
|
||||
{
|
||||
struct ::perf_event_attr ret {0};
|
||||
ret.size = sizeof(ret);
|
||||
|
||||
ret.type = type;
|
||||
ret.config = config;
|
||||
ret.exclude_user = !user;
|
||||
ret.exclude_kernel = !kernel;
|
||||
|
||||
ret.read_format |= PERF_FORMAT_GROUP;
|
||||
ret.read_format |= PERF_FORMAT_ID;
|
||||
ret.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
|
||||
ret.read_format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
|
||||
|
||||
ret.exclude_idle = true;
|
||||
ret.exclude_host = false;
|
||||
ret.exclude_hv = true;
|
||||
ret.exclude_guest = true;
|
||||
ret.exclude_callchain_user = true;
|
||||
ret.exclude_callchain_kernel = true;
|
||||
|
||||
ret.disabled = true;
|
||||
return ret;
|
||||
}()}
|
||||
,fd{[this, &group]
|
||||
{
|
||||
ulong flags(0);
|
||||
flags |= PERF_FLAG_FD_CLOEXEC;
|
||||
|
||||
const int cpu(-1);
|
||||
const pid_t pid(0);
|
||||
return int(syscall<SYS_perf_event_open>(&attr, pid, cpu, group, flags));
|
||||
}()}
|
||||
,id{[this]
|
||||
{
|
||||
uint64_t ret;
|
||||
syscall(::ioctl, int(fd), PERF_EVENT_IOC_ID, &ret);
|
||||
return ret;
|
||||
}()}
|
||||
,map_size
|
||||
{
|
||||
use_map && type == PERF_TYPE_HARDWARE?
|
||||
size_t(1UL + 0UL) * info::page_size:
|
||||
0UL
|
||||
}
|
||||
,map{[this]
|
||||
{
|
||||
int prot(0);
|
||||
prot |= PROT_READ;
|
||||
prot |= PROT_WRITE;
|
||||
|
||||
int flags(0);
|
||||
flags |= MAP_SHARED;
|
||||
|
||||
void *const ret
|
||||
{
|
||||
map_size?
|
||||
::mmap(nullptr, map_size, prot, flags, int(this->fd), 0):
|
||||
nullptr
|
||||
};
|
||||
|
||||
if(ret == (void *)-1)
|
||||
throw std::system_error
|
||||
{
|
||||
errno, std::system_category()
|
||||
};
|
||||
|
||||
if(map_size && ret == nullptr)
|
||||
throw error
|
||||
{
|
||||
"mmap(2) failed on event (fd:%d)", int(fd)
|
||||
};
|
||||
|
||||
return reinterpret_cast<char *>(ret);
|
||||
}()}
|
||||
,head
|
||||
{
|
||||
map?
|
||||
reinterpret_cast<::perf_event_mmap_page *>(map):
|
||||
nullptr
|
||||
}
|
||||
,body
|
||||
{
|
||||
head?
|
||||
map + head->data_offset:
|
||||
nullptr,
|
||||
head?
|
||||
head->data_size:
|
||||
0UL
|
||||
}
|
||||
{
|
||||
assert(size(body) % info::page_size == 0);
|
||||
assert(map_size % info::page_size == 0);
|
||||
}
|
||||
|
||||
ircd::prof::event::~event()
|
||||
noexcept
|
||||
{
|
||||
assert(!map || map_size);
|
||||
assert(!map_size || map);
|
||||
|
||||
if(map)
|
||||
syscall(::munmap, map, map_size);
|
||||
}
|
||||
|
||||
inline void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::disable(const long &arg)
|
||||
{
|
||||
::ioctl(int(fd), PERF_EVENT_IOC_DISABLE, arg);
|
||||
}
|
||||
|
||||
inline void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::enable(const long &arg)
|
||||
{
|
||||
const int &fd(this->fd);
|
||||
__builtin_ia32_mfence();
|
||||
__builtin_ia32_lfence();
|
||||
::ioctl(fd, PERF_EVENT_IOC_ENABLE, arg);
|
||||
}
|
||||
|
||||
void
|
||||
ircd::prof::event::reset(const long &arg)
|
||||
{
|
||||
ioctl(PERF_EVENT_IOC_RESET, arg);
|
||||
}
|
||||
|
||||
long
|
||||
ircd::prof::event::ioctl(const ulong &req,
|
||||
const long &arg)
|
||||
{
|
||||
return syscall(::ioctl, int(fd), req, arg);
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::rdpmc()
|
||||
const
|
||||
{
|
||||
assert(head->cap_user_time);
|
||||
assert(head->cap_user_rdpmc);
|
||||
|
||||
uint64_t ret;
|
||||
uint32_t seq; do
|
||||
{
|
||||
seq = head->lock;
|
||||
__sync_synchronize();
|
||||
//assert(head->time_enabled == head->time_running);
|
||||
ret = head->offset;
|
||||
ret += head->index? x86::rdpmc(head->index - 1) : 0UL;
|
||||
__sync_synchronize();
|
||||
}
|
||||
while(head->lock != seq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
//
|
||||
// type
|
||||
//
|
||||
|
||||
ircd::prof::type::type(const enum dpl &dpl,
|
||||
const uint8_t &type_id,
|
||||
const uint8_t &counter,
|
||||
const uint8_t &cacheop,
|
||||
const uint8_t &cacheres)
|
||||
:dpl{dpl}
|
||||
,type_id{type_id}
|
||||
,counter{counter}
|
||||
,cacheop{cacheop}
|
||||
,cacheres{cacheres}
|
||||
{
|
||||
}
|
||||
|
||||
ircd::prof::type::type(const event &event)
|
||||
:dpl
|
||||
{
|
||||
event.attr.exclude_kernel? dpl::USER : dpl::KERNEL
|
||||
}
|
||||
,type_id
|
||||
{
|
||||
uint8_t(event.attr.type)
|
||||
}
|
||||
,counter
|
||||
{
|
||||
uint8_t(event.attr.config)
|
||||
}
|
||||
,cacheop
|
||||
{
|
||||
uint8_t(event.attr.config >> 8)
|
||||
}
|
||||
,cacheres
|
||||
{
|
||||
uint8_t(event.attr.config >> 16)
|
||||
}
|
||||
{
|
||||
}
|
||||
|
||||
//
|
||||
// internal
|
||||
//
|
||||
|
||||
std::ostream &
|
||||
ircd::prof::debug(std::ostream &s,
|
||||
const ::perf_event_mmap_page &head)
|
||||
{
|
||||
s << "version: " << head.version << std::endl;
|
||||
s << "compat: " << head.compat_version << std::endl;
|
||||
s << "lock: " << head.lock << std::endl;
|
||||
s << "index: " << head.index << std::endl;
|
||||
s << "offset: " << head.offset << std::endl;
|
||||
s << "time_enabled: " << head.time_enabled << std::endl;
|
||||
s << "time_running: " << head.time_running << std::endl;
|
||||
s << "cap_user_rdpmc: " << head.cap_user_rdpmc << std::endl;
|
||||
s << "cap_user_time: " << head.cap_user_time << std::endl;
|
||||
s << "cap_user_time_zero: " << head.cap_user_time_zero << std::endl;
|
||||
s << "pmc_width: " << head.pmc_width << std::endl;
|
||||
s << "time_shift: " << head.time_shift << std::endl;
|
||||
s << "time_mult: " << head.time_mult << std::endl;
|
||||
s << "time_offset: " << head.time_offset << std::endl;
|
||||
s << "data_head: " << head.data_head << std::endl;
|
||||
s << "data_tail: " << head.data_tail << std::endl;
|
||||
s << "data_offset: " << head.data_offset << std::endl;
|
||||
s << "data_size: " << head.data_size << std::endl;
|
||||
s << "aux_head: " << head.aux_head << std::endl;
|
||||
s << "aux_tail: " << head.aux_tail << std::endl;
|
||||
s << "aux_offset: " << head.aux_offset << std::endl;
|
||||
s << "aux_size: " << head.aux_size << std::endl;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
|
624
ircd/prof_linux.cc
Normal file
624
ircd/prof_linux.cc
Normal file
|
@ -0,0 +1,624 @@
|
|||
// Matrix Construct
|
||||
//
|
||||
// Copyright (C) Matrix Construct Developers, Authors & Contributors
|
||||
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted, provided that the above
|
||||
// copyright notice and this permission notice is present in all copies. The
|
||||
// full license for this software is available in the LICENSE file.
|
||||
|
||||
static_assert
|
||||
(
|
||||
__linux__,
|
||||
"This unit is only compiled for linux targets."
|
||||
);
|
||||
|
||||
#include <RB_INC_SYS_SYSCALL_H
|
||||
#include <RB_INC_SYS_IOCTL_H
|
||||
#include <RB_INC_SYS_MMAN_H
|
||||
#include <RB_INC_SYS_RESOURCE_H
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#ifndef __clang__
|
||||
#define IRCD_PROF_ALWAYS_OPTIMIZE __attribute__((optimize("s"), flatten))
|
||||
#else
|
||||
#define IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
#endif
|
||||
|
||||
namespace ircd::prof
|
||||
{
|
||||
std::ostream &debug(std::ostream &, const ::perf_event_mmap_page &);
|
||||
|
||||
template<class... args> event *
|
||||
create(group &,
|
||||
const uint32_t &,
|
||||
const uint64_t &,
|
||||
args&&...);
|
||||
|
||||
static event &leader(group &);
|
||||
static event *leader(group *const &);
|
||||
}
|
||||
|
||||
struct ircd::prof::event
|
||||
:instance_list<event>
|
||||
{
|
||||
perf_event_attr attr;
|
||||
fs::fd fd;
|
||||
uint64_t id {0};
|
||||
size_t map_size {0};
|
||||
char *map {nullptr};
|
||||
perf_event_mmap_page *head {nullptr};
|
||||
const_buffer body;
|
||||
|
||||
uint64_t rdpmc() const;
|
||||
long ioctl(const ulong &req, const long &arg = 0);
|
||||
void reset(const long & = 0);
|
||||
void enable(const long & = 0);
|
||||
void disable(const long & = 0);
|
||||
|
||||
event(const int &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
const bool &user,
|
||||
const bool &kernel,
|
||||
const bool &use_map = true);
|
||||
|
||||
~event() noexcept;
|
||||
};
|
||||
|
||||
template<>
|
||||
decltype(ircd::util::instance_list<ircd::prof::event>::allocator)
|
||||
ircd::util::instance_list<ircd::prof::event>::allocator
|
||||
{};
|
||||
|
||||
template<>
|
||||
decltype(ircd::util::instance_list<ircd::prof::event>::list)
|
||||
ircd::util::instance_list<ircd::prof::event>::list
|
||||
{
|
||||
allocator
|
||||
};
|
||||
|
||||
//
|
||||
// prof
|
||||
//
|
||||
|
||||
void
|
||||
ircd::prof::reset(group &group)
|
||||
{
|
||||
leader(group).reset(PERF_IOC_FLAG_GROUP);
|
||||
}
|
||||
|
||||
void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::start(group &group)
|
||||
{
|
||||
leader(group).enable(PERF_IOC_FLAG_GROUP);
|
||||
}
|
||||
|
||||
void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::stop(group &group)
|
||||
{
|
||||
auto &leader(*group.front());
|
||||
leader.disable(PERF_IOC_FLAG_GROUP);
|
||||
assert(!group.empty());
|
||||
}
|
||||
|
||||
ircd::prof::event &
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::leader(group &group)
|
||||
{
|
||||
assert(!group.empty() && group.front());
|
||||
return *group.front();
|
||||
}
|
||||
|
||||
ircd::prof::event *
|
||||
ircd::prof::leader(group *const &group)
|
||||
{
|
||||
return group && !group->empty()?
|
||||
group->front().get():
|
||||
nullptr;
|
||||
}
|
||||
|
||||
template<class... args>
|
||||
ircd::prof::event *
|
||||
ircd::prof::create(group &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
args&&... a)
|
||||
try
|
||||
{
|
||||
const int gfd
|
||||
{
|
||||
leader(&group)? leader(group).fd : -1
|
||||
};
|
||||
|
||||
group.emplace_back(std::make_unique<event>
|
||||
(
|
||||
gfd, type, config, std::forward<args>(a)...
|
||||
));
|
||||
|
||||
return group.back().get();
|
||||
}
|
||||
catch(const std::exception &e)
|
||||
{
|
||||
log::dwarning
|
||||
{
|
||||
"Failed to create event type:%u config:%lu :%s",
|
||||
type,
|
||||
config,
|
||||
e.what()
|
||||
};
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//
|
||||
// init
|
||||
//
|
||||
|
||||
ircd::prof::init::init()
|
||||
try
|
||||
{
|
||||
if(!enable)
|
||||
return;
|
||||
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, false, true);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, true, false);
|
||||
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, false, true);
|
||||
}
|
||||
catch(const std::exception &e)
|
||||
{
|
||||
log::error
|
||||
{
|
||||
"Profiling system initialization :%s",
|
||||
e.what()
|
||||
};
|
||||
|
||||
system::group.clear();
|
||||
throw;
|
||||
}
|
||||
|
||||
ircd::prof::init::~init()
|
||||
noexcept
|
||||
{
|
||||
system::group.clear();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// prof/instructions.h
|
||||
//
|
||||
|
||||
ircd::prof::instructions::instructions()
|
||||
{
|
||||
if(!create(this->group, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, true, false))
|
||||
throw error
|
||||
{
|
||||
"Cannot sample instruction counter."
|
||||
};
|
||||
|
||||
reset(this->group);
|
||||
start(this->group);
|
||||
}
|
||||
|
||||
ircd::prof::instructions::~instructions()
|
||||
noexcept
|
||||
{
|
||||
}
|
||||
|
||||
const uint64_t &
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::instructions::sample()
|
||||
{
|
||||
retired = prof::leader(group).rdpmc();
|
||||
return retired;
|
||||
}
|
||||
|
||||
const uint64_t &
|
||||
ircd::prof::instructions::at()
|
||||
const
|
||||
{
|
||||
return retired;
|
||||
}
|
||||
|
||||
//
|
||||
// time_*() suite
|
||||
//
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_thrd()
|
||||
{
|
||||
struct ::timespec tv;
|
||||
syscall(::clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tv);
|
||||
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
ircd::prof::time_proc()
|
||||
{
|
||||
struct ::timespec tv;
|
||||
syscall(::clock_gettime, CLOCK_PROCESS_CPUTIME_ID, &tv);
|
||||
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// prof::system
|
||||
//
|
||||
|
||||
decltype(ircd::prof::system::group)
|
||||
ircd::prof::system::group;
|
||||
|
||||
ircd::prof::system
|
||||
ircd::prof::operator-(const system &a,
|
||||
const system &b)
|
||||
{
|
||||
system ret(a);
|
||||
ret -= b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ircd::prof::system
|
||||
ircd::prof::operator+(const system &a,
|
||||
const system &b)
|
||||
{
|
||||
system ret(a);
|
||||
ret += b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::operator-=(system &a,
|
||||
const system &b)
|
||||
{
|
||||
for(size_t i(0); i < a.size(); ++i)
|
||||
for(size_t j(0); j < a[i].size(); ++j)
|
||||
a[i][j] -= b[i][j];
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::operator+=(system &a,
|
||||
const system &b)
|
||||
{
|
||||
for(size_t i(0); i < a.size(); ++i)
|
||||
for(size_t j(0); j < a[i].size(); ++j)
|
||||
a[i][j] += b[i][j];
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
ircd::prof::system &
|
||||
ircd::prof::hotsample(system &s)
|
||||
noexcept
|
||||
{
|
||||
thread_local char buf[1024];
|
||||
|
||||
auto &leader
|
||||
{
|
||||
prof::leader(system::group)
|
||||
};
|
||||
|
||||
const const_buffer read
|
||||
{
|
||||
buf, size_t(syscall(::read, int(leader.fd), buf, sizeof(buf)))
|
||||
};
|
||||
|
||||
for_each(read, [&s]
|
||||
(const type &type, const uint64_t &val)
|
||||
{
|
||||
auto &r0
|
||||
{
|
||||
s.at(size_t(type.counter))
|
||||
};
|
||||
|
||||
auto &r1
|
||||
{
|
||||
r0.at(size_t(type.dpl))
|
||||
};
|
||||
|
||||
r1 = val;
|
||||
});
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
void
|
||||
ircd::prof::for_each(const const_buffer &buf,
|
||||
const read_closure &closure)
|
||||
{
|
||||
struct head
|
||||
{
|
||||
uint64_t nr, te, tr;
|
||||
}
|
||||
const *const &head
|
||||
{
|
||||
reinterpret_cast<const struct head *>(data(buf))
|
||||
};
|
||||
|
||||
struct body
|
||||
{
|
||||
uint64_t val, id;
|
||||
}
|
||||
const *const &body
|
||||
{
|
||||
reinterpret_cast<const struct body *>(data(buf) + sizeof(struct head))
|
||||
};
|
||||
|
||||
// Start with the pseudo-results; these should always be the same for
|
||||
// non-hw profiling, so the DPL is meaningless.
|
||||
closure(type{dpl::KERNEL, uint8_t(-1)}, head->te);
|
||||
closure(type{dpl::USER, uint8_t(-1)}, head->tr);
|
||||
|
||||
// Iterate the result list
|
||||
for(size_t i(0); i < head->nr; ++i)
|
||||
for(auto it(begin(event::list)); it != end(event::list); ++it)
|
||||
if((*it)->id == body[i].id)
|
||||
return closure(type(**it), body[i].val);
|
||||
}
|
||||
|
||||
ircd::prof::system::system(sample_t)
|
||||
noexcept
|
||||
{
|
||||
stop(group);
|
||||
hotsample(*this);
|
||||
start(group);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// prof::event
|
||||
//
|
||||
|
||||
ircd::prof::event::event(const int &group,
|
||||
const uint32_t &type,
|
||||
const uint64_t &config,
|
||||
const bool &user,
|
||||
const bool &kernel,
|
||||
const bool &use_map)
|
||||
:attr{[&]
|
||||
{
|
||||
struct ::perf_event_attr ret {0};
|
||||
ret.size = sizeof(ret);
|
||||
|
||||
ret.type = type;
|
||||
ret.config = config;
|
||||
ret.exclude_user = !user;
|
||||
ret.exclude_kernel = !kernel;
|
||||
|
||||
ret.read_format |= PERF_FORMAT_GROUP;
|
||||
ret.read_format |= PERF_FORMAT_ID;
|
||||
ret.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
|
||||
ret.read_format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
|
||||
|
||||
ret.exclude_idle = true;
|
||||
ret.exclude_host = false;
|
||||
ret.exclude_hv = true;
|
||||
ret.exclude_guest = true;
|
||||
ret.exclude_callchain_user = true;
|
||||
ret.exclude_callchain_kernel = true;
|
||||
|
||||
ret.disabled = true;
|
||||
return ret;
|
||||
}()}
|
||||
,fd{[this, &group]
|
||||
{
|
||||
ulong flags(0);
|
||||
flags |= PERF_FLAG_FD_CLOEXEC;
|
||||
|
||||
const int cpu(-1);
|
||||
const pid_t pid(0);
|
||||
return int(syscall<SYS_perf_event_open>(&attr, pid, cpu, group, flags));
|
||||
}()}
|
||||
,id{[this]
|
||||
{
|
||||
uint64_t ret;
|
||||
syscall(::ioctl, int(fd), PERF_EVENT_IOC_ID, &ret);
|
||||
return ret;
|
||||
}()}
|
||||
,map_size
|
||||
{
|
||||
use_map && type == PERF_TYPE_HARDWARE?
|
||||
size_t(1UL + 0UL) * info::page_size:
|
||||
0UL
|
||||
}
|
||||
,map{[this]
|
||||
{
|
||||
int prot(0);
|
||||
prot |= PROT_READ;
|
||||
prot |= PROT_WRITE;
|
||||
|
||||
int flags(0);
|
||||
flags |= MAP_SHARED;
|
||||
|
||||
void *const ret
|
||||
{
|
||||
map_size?
|
||||
::mmap(nullptr, map_size, prot, flags, int(this->fd), 0):
|
||||
nullptr
|
||||
};
|
||||
|
||||
if(ret == (void *)-1)
|
||||
throw std::system_error
|
||||
{
|
||||
errno, std::system_category()
|
||||
};
|
||||
|
||||
if(map_size && ret == nullptr)
|
||||
throw error
|
||||
{
|
||||
"mmap(2) failed on event (fd:%d)", int(fd)
|
||||
};
|
||||
|
||||
return reinterpret_cast<char *>(ret);
|
||||
}()}
|
||||
,head
|
||||
{
|
||||
map?
|
||||
reinterpret_cast<::perf_event_mmap_page *>(map):
|
||||
nullptr
|
||||
}
|
||||
,body
|
||||
{
|
||||
head?
|
||||
map + head->data_offset:
|
||||
nullptr,
|
||||
head?
|
||||
head->data_size:
|
||||
0UL
|
||||
}
|
||||
{
|
||||
assert(size(body) % info::page_size == 0);
|
||||
assert(map_size % info::page_size == 0);
|
||||
}
|
||||
|
||||
ircd::prof::event::~event()
|
||||
noexcept
|
||||
{
|
||||
assert(!map || map_size);
|
||||
assert(!map_size || map);
|
||||
|
||||
if(map)
|
||||
syscall(::munmap, map, map_size);
|
||||
}
|
||||
|
||||
inline void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::disable(const long &arg)
|
||||
{
|
||||
::ioctl(int(fd), PERF_EVENT_IOC_DISABLE, arg);
|
||||
}
|
||||
|
||||
inline void
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::enable(const long &arg)
|
||||
{
|
||||
const int &fd(this->fd);
|
||||
__builtin_ia32_mfence();
|
||||
__builtin_ia32_lfence();
|
||||
::ioctl(fd, PERF_EVENT_IOC_ENABLE, arg);
|
||||
}
|
||||
|
||||
void
|
||||
ircd::prof::event::reset(const long &arg)
|
||||
{
|
||||
ioctl(PERF_EVENT_IOC_RESET, arg);
|
||||
}
|
||||
|
||||
long
|
||||
ircd::prof::event::ioctl(const ulong &req,
|
||||
const long &arg)
|
||||
{
|
||||
return syscall(::ioctl, int(fd), req, arg);
|
||||
}
|
||||
|
||||
inline uint64_t
|
||||
IRCD_PROF_ALWAYS_OPTIMIZE
|
||||
ircd::prof::event::rdpmc()
|
||||
const
|
||||
{
|
||||
assert(head->cap_user_time);
|
||||
assert(head->cap_user_rdpmc);
|
||||
|
||||
uint64_t ret;
|
||||
uint32_t seq; do
|
||||
{
|
||||
seq = head->lock;
|
||||
__sync_synchronize();
|
||||
//assert(head->time_enabled == head->time_running);
|
||||
ret = head->offset;
|
||||
ret += head->index? x86::rdpmc(head->index - 1) : 0UL;
|
||||
__sync_synchronize();
|
||||
}
|
||||
while(head->lock != seq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// prof::type
|
||||
//
|
||||
|
||||
ircd::prof::type::type(const enum dpl &dpl,
|
||||
const uint8_t &type_id,
|
||||
const uint8_t &counter,
|
||||
const uint8_t &cacheop,
|
||||
const uint8_t &cacheres)
|
||||
:dpl{dpl}
|
||||
,type_id{type_id}
|
||||
,counter{counter}
|
||||
,cacheop{cacheop}
|
||||
,cacheres{cacheres}
|
||||
{
|
||||
}
|
||||
|
||||
ircd::prof::type::type(const event &event)
|
||||
:dpl
|
||||
{
|
||||
event.attr.exclude_kernel? dpl::USER : dpl::KERNEL
|
||||
}
|
||||
,type_id
|
||||
{
|
||||
uint8_t(event.attr.type)
|
||||
}
|
||||
,counter
|
||||
{
|
||||
uint8_t(event.attr.config)
|
||||
}
|
||||
,cacheop
|
||||
{
|
||||
uint8_t(event.attr.config >> 8)
|
||||
}
|
||||
,cacheres
|
||||
{
|
||||
uint8_t(event.attr.config >> 16)
|
||||
}
|
||||
{
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// internal
|
||||
//
|
||||
|
||||
std::ostream &
|
||||
ircd::prof::debug(std::ostream &s,
|
||||
const ::perf_event_mmap_page &head)
|
||||
{
|
||||
s << "version: " << head.version << std::endl;
|
||||
s << "compat: " << head.compat_version << std::endl;
|
||||
s << "lock: " << head.lock << std::endl;
|
||||
s << "index: " << head.index << std::endl;
|
||||
s << "offset: " << head.offset << std::endl;
|
||||
s << "time_enabled: " << head.time_enabled << std::endl;
|
||||
s << "time_running: " << head.time_running << std::endl;
|
||||
s << "cap_user_rdpmc: " << head.cap_user_rdpmc << std::endl;
|
||||
s << "cap_user_time: " << head.cap_user_time << std::endl;
|
||||
s << "cap_user_time_zero: " << head.cap_user_time_zero << std::endl;
|
||||
s << "pmc_width: " << head.pmc_width << std::endl;
|
||||
s << "time_shift: " << head.time_shift << std::endl;
|
||||
s << "time_mult: " << head.time_mult << std::endl;
|
||||
s << "time_offset: " << head.time_offset << std::endl;
|
||||
s << "data_head: " << head.data_head << std::endl;
|
||||
s << "data_tail: " << head.data_tail << std::endl;
|
||||
s << "data_offset: " << head.data_offset << std::endl;
|
||||
s << "data_size: " << head.data_size << std::endl;
|
||||
s << "aux_head: " << head.aux_head << std::endl;
|
||||
s << "aux_tail: " << head.aux_tail << std::endl;
|
||||
s << "aux_offset: " << head.aux_offset << std::endl;
|
||||
s << "aux_size: " << head.aux_size << std::endl;
|
||||
|
||||
return s;
|
||||
}
|
Loading…
Reference in a new issue