0
0
Fork 0
mirror of https://github.com/matrix-construct/construct synced 2024-11-30 10:42:47 +01:00
construct/ircd/prof_linux.cc

873 lines
18 KiB
C++
Raw Normal View History

// Matrix Construct
//
// Copyright (C) Matrix Construct Developers, Authors & Contributors
// Copyright (C) 2016-2019 Jason Volk <jason@zemos.net>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
// copyright notice and this permission notice is present in all copies. The
// full license for this software is available in the LICENSE file.
static_assert
(
__linux__,
"This unit is only compiled for linux targets."
);
#include <RB_INC_SYS_SYSCALL_H
#include <RB_INC_SYS_IOCTL_H
#include <RB_INC_SYS_MMAN_H
#include <RB_INC_SYS_RESOURCE_H
#include <linux/perf_event.h>
#ifndef __clang__
#define IRCD_PROF_ALWAYS_OPTIMIZE __attribute__((optimize("s"), flatten))
#else
#define IRCD_PROF_ALWAYS_OPTIMIZE
#endif
namespace ircd::prof
{
std::ostream &debug(std::ostream &, const ::perf_event_mmap_page &);
template<class... args> event *
create(group &,
const uint32_t &,
const uint64_t &,
args&&...);
static event &leader(group &);
static event *leader(group *const &);
}
struct ircd::prof::event
:instance_list<event>
{
perf_event_attr attr;
fs::fd fd;
uint64_t id {0};
size_t map_size {0};
char *map {nullptr};
perf_event_mmap_page *head {nullptr};
const_buffer body;
uint64_t rdpmc() const;
long ioctl(const ulong &req, const long &arg = 0);
void reset(const long & = 0);
void enable(const long & = 0);
void disable(const long & = 0);
event(const int &group,
const uint32_t &type,
const uint64_t &config,
const bool &user,
const bool &kernel,
const bool &use_map = true);
~event() noexcept;
};
template<>
decltype(ircd::util::instance_list<ircd::prof::event>::allocator)
ircd::util::instance_list<ircd::prof::event>::allocator
{};
template<>
decltype(ircd::util::instance_list<ircd::prof::event>::list)
ircd::util::instance_list<ircd::prof::event>::list
{
allocator
};
//
// prof
//
void
ircd::prof::reset(group &group)
{
leader(group).reset(PERF_IOC_FLAG_GROUP);
}
void
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::start(group &group)
{
leader(group).enable(PERF_IOC_FLAG_GROUP);
}
void
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::stop(group &group)
{
auto &leader(*group.front());
leader.disable(PERF_IOC_FLAG_GROUP);
assert(!group.empty());
}
ircd::prof::event &
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::leader(group &group)
{
assert(!group.empty() && group.front());
return *group.front();
}
ircd::prof::event *
ircd::prof::leader(group *const &group)
{
return group && !group->empty()?
group->front().get():
nullptr;
}
template<class... args>
ircd::prof::event *
ircd::prof::create(group &group,
const uint32_t &type,
const uint64_t &config,
args&&... a)
try
{
const int gfd
{
leader(&group)? leader(group).fd : -1
};
group.emplace_back(std::make_unique<event>
(
gfd, type, config, std::forward<args>(a)...
));
return group.back().get();
}
catch(const std::exception &e)
{
log::dwarning
{
"Failed to create event type:%u config:%lu :%s",
type,
config,
e.what()
};
return nullptr;
}
///////////////////////////////////////////////////////////////////////////////
//
// prof/psi.h
//
decltype(ircd::prof::psi::supported)
ircd::prof::psi::supported
{
info::kernel_version[0] > 4 ||
(info::kernel_version[0] >= 4 && info::kernel_version[1] >= 20)
};
decltype(ircd::prof::psi::path)
ircd::prof::psi::path
{
"/proc/pressure/cpu",
"/proc/pressure/memory",
"/proc/pressure/io",
};
decltype(ircd::prof::psi::cpu)
ircd::prof::psi::cpu
{
"cpu"
};
decltype(ircd::prof::psi::mem)
ircd::prof::psi::mem
{
"memory"
};
decltype(ircd::prof::psi::io)
ircd::prof::psi::io
{
"io"
};
//
// prof::psi::metric::refresh
//
ircd::prof::psi::file &
ircd::prof::psi::wait(const vector_view<const trigger> &cmd)
try
{
static const size_t max{3};
size_t trig_num {0}, trig_idx[max]
{
size_t(-1),
size_t(-1),
size_t(-1),
};
// Associate all of the trigger inputs (cmd) with one of the files; the
// cmds can be arranged any way and may not be for all files or any.
for(size_t i(0); i < cmd.size(); ++i)
{
const auto it
{
std::find_if(begin(path), end(path), [&cmd, &i]
(const auto &name)
{
return lstrip(name, "/proc/pressure/") == cmd[i].file.name;
})
};
const auto pos
{
std::distance(begin(path), it)
};
if(unlikely(size_t(pos) >= max))
throw error
{
"%s does not exist",
cmd[i].file.name,
};
trig_idx[pos] = i;
trig_num++;
}
const fs::fd::opts opts
{
std::ios::in | std::ios::out
};
// Open the fd's; if triggers were given we don't open files that were
// not included in the cmd vector; otherwise we open all files.
const fs::fd fd[max]
{
!trig_num || trig_idx[0] < max?
fs::fd{path[0], opts}:
fs::fd{},
!trig_num || trig_idx[1] < max?
fs::fd{path[1], opts}:
fs::fd{},
!trig_num || trig_idx[2] < max?
fs::fd{path[2], opts}:
fs::fd{},
};
// Write all triggers to their respective file
for(size_t i(0); i < max; ++i)
{
if(trig_idx[i] >= max)
continue;
const auto &trig(cmd[trig_idx[i]]); try
{
// psi_write() in the kernel wants a write length of one greater
// than the length of the string, but it places a \0 in its own
// buffer unconditionally. This is noteworthy because our string
// may not be null terminated and this length requirement smells.
assert(trig.file.name == lstrip(path[i], "/proc/pressure/"));
syscall(::write, fd[i], trig.string.c_str(), size(trig.string) + 1);
}
catch(const ctx::interrupted &)
{
throw;
}
catch(const std::exception &e)
{
log::error
{
"Failed to set pressure stall trigger [%s] on /proc/pressure/%s :%s",
trig.string,
trig.file.name,
e.what(),
};
throw;
}
}
// Yield ircd::ctx until fd[n] has a result.
const size_t n
{
fs::select(fd)
};
switch(n)
{
case 0: return cpu;
case 1: return mem;
case 2: return io;
default:
always_assert(false);
__builtin_unreachable();
}
}
catch(const ctx::interrupted &)
{
throw;
}
catch(const std::exception &e)
{
log::error
{
"Failed to poll pressure stall information :%s",
e.what(),
};
throw;
}
bool
ircd::prof::psi::refresh(file &file)
noexcept try
{
if(!supported)
return false;
if(unlikely(!file.name))
return false;
thread_local unique_mutable_buffer path_buf
{
fs::PATH_MAX_LEN
};
const auto &path
{
fs::path(path_buf, vector_view<const string_view>
{
"/proc/pressure"_sv, file.name
})
};
// Copy value into userspace
char buf[256];
fs::read_opts opts;
opts.aio = false; // can't read /proc through AIO
opts.all = false; // don't need posix read-loop; make one read(2) only.
const auto &result
{
fs::read(path, buf, opts)
};
tokens(result, '\n', [&file] // Read each line
(const string_view &line)
{
const auto &[type, vals]
{
split(line, ' ')
};
// The first token tells us what the metric is; we have allocated
// results for the following
if(type != "full" && type != "some")
return;
auto &metric
{
type == "full"?
file.full:
file.some
};
size_t i(0);
tokens(vals, ' ', [&file, &metric, &i] // Read each key=value pair
(const string_view &key_val)
{
const auto &[key, val]
{
split(key_val, '=')
};
if(key == "total")
{
const auto total(lex_cast<microseconds>(val));
metric.stall.relative = total - metric.stall.total;
metric.stall.window = duration_cast<microseconds>(now<system_point>() - file.sampled);
metric.stall.pct = metric.stall.window.count()?
metric.stall.relative.count() / double(metric.stall.window.count()):
0.0;
metric.stall.pct *= 100;
metric.stall.total = total;
return;
}
else if(startswith(key, "avg") && i < metric.avg.size())
{
metric.avg.at(i).window = lex_cast<seconds>(lstrip(key, "avg"));
metric.avg.at(i).pct = lex_cast<float>(val);
++i;
}
});
});
file.sampled = ircd::now<system_point>();
return true;
}
catch(const std::exception &e)
{
log::error
{
"Failed to refresh pressure stall information '%s' :%s",
file.name,
e.what(),
};
return false;
}
///////////////////////////////////////////////////////////////////////////////
//
// prof/instructions.h
//
ircd::prof::instructions::instructions()
{
if(!create(this->group, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, true, false))
throw error
{
"Cannot sample instruction counter."
};
reset(this->group);
start(this->group);
}
ircd::prof::instructions::~instructions()
noexcept
{
}
const uint64_t &
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::instructions::sample()
{
retired = prof::leader(group).rdpmc();
return retired;
}
const uint64_t &
ircd::prof::instructions::at()
const
{
return retired;
}
//
// time_*() suite
//
uint64_t
ircd::prof::time_thrd()
{
struct ::timespec tv;
syscall(::clock_gettime, CLOCK_THREAD_CPUTIME_ID, &tv);
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
}
uint64_t
ircd::prof::time_proc()
{
struct ::timespec tv;
syscall(::clock_gettime, CLOCK_PROCESS_CPUTIME_ID, &tv);
return ulong(tv.tv_sec) * 1000000000UL + tv.tv_nsec;
}
///////////////////////////////////////////////////////////////////////////////
//
// prof::system
//
decltype(ircd::prof::system::group)
ircd::prof::system::group;
ircd::prof::system
ircd::prof::operator-(const system &a,
const system &b)
{
system ret(a);
ret -= b;
return ret;
}
ircd::prof::system
ircd::prof::operator+(const system &a,
const system &b)
{
system ret(a);
ret += b;
return ret;
}
ircd::prof::system &
ircd::prof::operator-=(system &a,
const system &b)
{
for(size_t i(0); i < a.size(); ++i)
for(size_t j(0); j < a[i].size(); ++j)
a[i][j] -= b[i][j];
return a;
}
ircd::prof::system &
ircd::prof::operator+=(system &a,
const system &b)
{
for(size_t i(0); i < a.size(); ++i)
for(size_t j(0); j < a[i].size(); ++j)
a[i][j] += b[i][j];
return a;
}
ircd::prof::system &
ircd::prof::hotsample(system &s)
noexcept
{
thread_local char buf[1024];
auto &leader
{
prof::leader(system::group)
};
const const_buffer read
{
buf, size_t(syscall(::read, int(leader.fd), buf, sizeof(buf)))
};
for_each(read, [&s]
(const type &type, const uint64_t &val)
{
auto &r0
{
s.at(size_t(type.counter))
};
auto &r1
{
r0.at(size_t(type.dpl))
};
r1 = val;
});
return s;
}
void
ircd::prof::for_each(const const_buffer &buf,
const read_closure &closure)
{
struct head
{
uint64_t nr, te, tr;
}
const *const &head
{
reinterpret_cast<const struct head *>(data(buf))
};
struct body
{
uint64_t val, id;
}
const *const &body
{
reinterpret_cast<const struct body *>(data(buf) + sizeof(struct head))
};
// Start with the pseudo-results; these should always be the same for
// non-hw profiling, so the DPL is meaningless.
closure(type{dpl::KERNEL, uint8_t(-1)}, head->te);
closure(type{dpl::USER, uint8_t(-1)}, head->tr);
// Iterate the result list
for(size_t i(0); i < head->nr; ++i)
for(auto it(begin(event::list)); it != end(event::list); ++it)
if((*it)->id == body[i].id)
return closure(type(**it), body[i].val);
}
ircd::prof::system::system(sample_t)
noexcept
{
stop(group);
hotsample(*this);
start(group);
}
ircd::prof::system::~system()
noexcept
{
}
/*
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, false, true);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, false, true);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, false, true);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, false, true);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, false, true);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, true, false);
create(system::group, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, false, true);
system::group.clear();
*/
///////////////////////////////////////////////////////////////////////////////
//
// prof::event
//
ircd::prof::event::event(const int &group,
const uint32_t &type,
const uint64_t &config,
const bool &user,
const bool &kernel,
const bool &use_map)
:attr{[&]
{
struct ::perf_event_attr ret {0};
ret.size = sizeof(ret);
ret.type = type;
ret.config = config;
ret.exclude_user = !user;
ret.exclude_kernel = !kernel;
ret.read_format |= PERF_FORMAT_GROUP;
ret.read_format |= PERF_FORMAT_ID;
ret.read_format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
ret.read_format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
ret.exclude_idle = true;
ret.exclude_host = false;
ret.exclude_hv = true;
ret.exclude_guest = true;
ret.exclude_callchain_user = true;
ret.exclude_callchain_kernel = true;
ret.disabled = true;
return ret;
}()}
,fd{[this, &group]
{
ulong flags(0);
flags |= PERF_FLAG_FD_CLOEXEC;
const int cpu(-1);
const pid_t pid(0);
return int(syscall<SYS_perf_event_open>(&attr, pid, cpu, group, flags));
}()}
,id{[this]
{
uint64_t ret;
syscall(::ioctl, int(fd), PERF_EVENT_IOC_ID, &ret);
return ret;
}()}
,map_size
{
use_map && type == PERF_TYPE_HARDWARE?
size_t(1UL + 0UL) * info::page_size:
0UL
}
,map{[this]
{
int prot(0);
prot |= PROT_READ;
prot |= PROT_WRITE;
int flags(0);
flags |= MAP_SHARED;
void *const ret
{
map_size?
::mmap(nullptr, map_size, prot, flags, int(this->fd), 0):
nullptr
};
if(ret == (void *)-1)
throw std::system_error
{
errno, std::system_category()
};
if(map_size && ret == nullptr)
throw error
{
"mmap(2) failed on event (fd:%d)", int(fd)
};
return reinterpret_cast<char *>(ret);
}()}
,head
{
map?
reinterpret_cast<::perf_event_mmap_page *>(map):
nullptr
}
,body
{
head?
map + head->data_offset:
nullptr,
head?
head->data_size:
0UL
}
{
assert(size(body) % info::page_size == 0);
assert(map_size % info::page_size == 0);
}
ircd::prof::event::~event()
noexcept
{
assert(!map || map_size);
assert(!map_size || map);
if(map)
syscall(::munmap, map, map_size);
}
inline void
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::event::disable(const long &arg)
{
::ioctl(int(fd), PERF_EVENT_IOC_DISABLE, arg);
}
inline void
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::event::enable(const long &arg)
{
const int &fd(this->fd);
__builtin_ia32_mfence();
__builtin_ia32_lfence();
::ioctl(fd, PERF_EVENT_IOC_ENABLE, arg);
}
void
ircd::prof::event::reset(const long &arg)
{
ioctl(PERF_EVENT_IOC_RESET, arg);
}
long
ircd::prof::event::ioctl(const ulong &req,
const long &arg)
{
return syscall(::ioctl, int(fd), req, arg);
}
inline uint64_t
IRCD_PROF_ALWAYS_OPTIMIZE
ircd::prof::event::rdpmc()
const
{
assert(head->cap_user_time);
assert(head->cap_user_rdpmc);
uint64_t ret;
uint32_t seq; do
{
seq = head->lock;
__sync_synchronize();
//assert(head->time_enabled == head->time_running);
ret = head->offset;
ret += head->index? x86::rdpmc(head->index - 1) : 0UL;
__sync_synchronize();
}
while(head->lock != seq);
return ret;
}
///////////////////////////////////////////////////////////////////////////////
//
// prof::type
//
ircd::prof::type::type(const enum dpl &dpl,
const uint8_t &type_id,
const uint8_t &counter,
const uint8_t &cacheop,
const uint8_t &cacheres)
:dpl{dpl}
,type_id{type_id}
,counter{counter}
,cacheop{cacheop}
,cacheres{cacheres}
{
}
ircd::prof::type::type(const event &event)
:dpl
{
event.attr.exclude_kernel? dpl::USER : dpl::KERNEL
}
,type_id
{
uint8_t(event.attr.type)
}
,counter
{
uint8_t(event.attr.config)
}
,cacheop
{
uint8_t(event.attr.config >> 8)
}
,cacheres
{
uint8_t(event.attr.config >> 16)
}
{
}
///////////////////////////////////////////////////////////////////////////////
//
// internal
//
std::ostream &
ircd::prof::debug(std::ostream &s,
const ::perf_event_mmap_page &head)
{
s << "version: " << head.version << std::endl;
s << "compat: " << head.compat_version << std::endl;
s << "lock: " << head.lock << std::endl;
s << "index: " << head.index << std::endl;
s << "offset: " << head.offset << std::endl;
s << "time_enabled: " << head.time_enabled << std::endl;
s << "time_running: " << head.time_running << std::endl;
s << "cap_user_rdpmc: " << head.cap_user_rdpmc << std::endl;
s << "cap_user_time: " << head.cap_user_time << std::endl;
s << "cap_user_time_zero: " << head.cap_user_time_zero << std::endl;
s << "pmc_width: " << head.pmc_width << std::endl;
s << "time_shift: " << head.time_shift << std::endl;
s << "time_mult: " << head.time_mult << std::endl;
s << "time_offset: " << head.time_offset << std::endl;
s << "data_head: " << head.data_head << std::endl;
s << "data_tail: " << head.data_tail << std::endl;
s << "data_offset: " << head.data_offset << std::endl;
s << "data_size: " << head.data_size << std::endl;
s << "aux_head: " << head.aux_head << std::endl;
s << "aux_tail: " << head.aux_tail << std::endl;
s << "aux_offset: " << head.aux_offset << std::endl;
s << "aux_size: " << head.aux_size << std::endl;
return s;
}