Fetches control group resource information (#10402)

Adds control group data to status API and kbnServer.metrics
This commit is contained in:
Tyler Smalley 2017-04-11 10:20:30 -07:00 committed by GitHub
parent 38f6cc876c
commit 006fae00f7
8 changed files with 513 additions and 33 deletions

View file

@ -69,6 +69,8 @@ information and all requests.
The minimum value is 100.
`status.allowAnonymous`:: *Default: false* If authentication is enabled, setting this to `true` allows
unauthenticated users to access the Kibana server status API and status page.
`cpu.cgroup.path.override`:: Override for cgroup cpu path when mounted in manner that is inconsistent with `/proc/self/cgroup`
`cpuacct.cgroup.path.override`:: Override for cgroup cpuacct path when mounted in manner that is inconsistent with `/proc/self/cgroup`
`console.enabled`:: *Default: true* Set to false to disable Console. Toggling this will cause the server to regenerate assets on the next startup, which may cause a delay before pages start being served.
`elasticsearch.tribe.url:`:: Optional URL of the Elasticsearch tribe instance to use for all your

View file

@ -257,6 +257,7 @@
"makelogs": "3.2.3",
"marked-text-renderer": "0.1.0",
"mocha": "2.5.3",
"mock-fs": "4.0.0",
"murmurhash3js": "3.0.1",
"ncp": "2.0.0",
"nock": "8.0.0",

View file

@ -27,6 +27,21 @@ module.exports = () => Joi.object({
exclusive: Joi.boolean().default(false)
}).default(),
cpu: Joi.object({
cgroup: Joi.object({
path: Joi.object({
override: Joi.string().default()
})
})
}),
cpuacct: Joi.object({
cgroup: Joi.object({
path: Joi.object({
override: Joi.string().default()
})
})
}),
server: Joi.object({
uuid: Joi.string().guid().default(),

View file

@ -0,0 +1,170 @@
import expect from 'expect.js';
import mockFs from 'mock-fs';
import { cGroups as cGroupsFsStub } from './fs_stubs';
import { getAllStats, readControlGroups, readCPUStat } from '../cgroup';
describe('Control Group', function () {
const fsStub = cGroupsFsStub();
afterEach(() => {
mockFs.restore();
});
describe('readControlGroups', () => {
it('parses the file', async () => {
mockFs({ '/proc/self/cgroup': fsStub.cGroupContents });
const cGroup = await readControlGroups();
expect(cGroup).to.eql({
freezer: '/',
net_cls: '/',
net_prio: '/',
pids: '/',
blkio: '/',
memory: '/',
devices: '/user.slice',
hugetlb: '/',
perf_event: '/',
cpu: `/${fsStub.hierarchy}`,
cpuacct: `/${fsStub.hierarchy}`,
cpuset: `/${fsStub.hierarchy}`,
'name=systemd': '/user.slice/user-1000.slice/session-2359.scope'
});
});
});
describe('readCPUStat', () => {
it('parses the file', async () => {
mockFs({ '/sys/fs/cgroup/cpu/fakeGroup/cpu.stat': fsStub.cpuStatContents });
const cpuStat = await readCPUStat('fakeGroup');
expect(cpuStat).to.eql({
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
});
});
it('returns default stats for missing file', async () => {
mockFs();
const cpuStat = await readCPUStat('fakeGroup');
expect(cpuStat).to.eql({
number_of_elapsed_periods: -1,
number_of_times_throttled: -1,
time_throttled_nanos: -1
});
});
});
describe('getAllStats', () => {
it('can override the cpu group path', async () => {
mockFs({
'/proc/self/cgroup': fsStub.cGroupContents,
[`${fsStub.cpuAcctDir}/cpuacct.usage`]: '357753491408',
'/sys/fs/cgroup/cpu/docker/cpu.cfs_period_us': '100000',
'/sys/fs/cgroup/cpu/docker/cpu.cfs_quota_us': '5000',
'/sys/fs/cgroup/cpu/docker/cpu.stat': fsStub.cpuStatContents,
});
console.log('fsStub.cpuAcctDir', fsStub.cpuAcctDir);
const stats = await getAllStats({ cpuPath: '/docker' });
expect(stats).to.eql({
cpuacct: {
control_group: `/${fsStub.hierarchy}`,
usage_nanos: 357753491408,
},
cpu: {
control_group: '/docker',
cfs_period_micros: 100000,
cfs_quota_micros: 5000,
stat: {
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
}
}
});
});
it('can override the cpuacct group path', async () => {
mockFs({
'/proc/self/cgroup': fsStub.cGroupContents,
'/sys/fs/cgroup/cpuacct/docker/cpuacct.usage': '357753491408',
[`${fsStub.cpuDir}/cpu.cfs_period_us`]: '100000',
[`${fsStub.cpuDir}/cpu.cfs_quota_us`]: '5000',
[`${fsStub.cpuDir}/cpu.stat`]: fsStub.cpuStatContents,
});
const stats = await getAllStats({ cpuAcctPath: '/docker' });
expect(stats).to.eql({
cpuacct: {
control_group: '/docker',
usage_nanos: 357753491408,
},
cpu: {
control_group: `/${fsStub.hierarchy}`,
cfs_period_micros: 100000,
cfs_quota_micros: 5000,
stat: {
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
}
}
});
});
it('extracts control group stats', async () => {
mockFs(fsStub.files);
const stats = await getAllStats();
expect(stats).to.eql({
cpuacct: {
control_group: `/${fsStub.hierarchy}`,
usage_nanos: 357753491408,
},
cpu: {
control_group: `/${fsStub.hierarchy}`,
cfs_period_micros: 100000,
cfs_quota_micros: 5000,
stat: {
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
}
}
});
});
it('returns null when all files are missing', async () => {
mockFs({});
const stats = await getAllStats();
expect(stats).to.be.null;
});
it('returns null if CPU accounting files are missing', async () => {
mockFs({
'/proc/self/cgroup': fsStub.cGroupContents,
[`${fsStub.cpuDir}/cpu.stat`]: fsStub.cpuStatContents
});
const stats = await getAllStats();
expect(stats).to.be.null;
});
it('returns null if cpuStat file is missing', async () => {
mockFs({
'/proc/self/cgroup': fsStub.cGroupContents,
[`${fsStub.cpuAcctDir}/cpuacct.usage`]: '357753491408',
[`${fsStub.cpuDir}/cpu.cfs_period_us`]: '100000',
[`${fsStub.cpuDir}/cpu.cfs_quota_us`]: '5000'
});
const stats = await getAllStats();
expect(stats).to.be.null;
});
});
});

View file

@ -0,0 +1,42 @@
export function cGroups(hierarchy) {
if (!hierarchy) {
hierarchy = Math.random().toString(36).substring(7);
}
const cpuAcctDir = `/sys/fs/cgroup/cpuacct/${hierarchy}`;
const cpuDir = `/sys/fs/cgroup/cpu/${hierarchy}`;
const cGroupContents = [
'10:freezer:/',
'9:net_cls,net_prio:/',
'8:pids:/',
'7:blkio:/',
'6:memory:/',
'5:devices:/user.slice',
'4:hugetlb:/',
'3:perf_event:/',
'2:cpu,cpuacct,cpuset:/' + hierarchy,
'1:name=systemd:/user.slice/user-1000.slice/session-2359.scope'
].join('\n');
const cpuStatContents = [
'nr_periods 0',
'nr_throttled 10',
'throttled_time 20'
].join('\n');
return {
hierarchy,
cGroupContents,
cpuStatContents,
cpuAcctDir,
cpuDir,
files: {
'/proc/self/cgroup': cGroupContents,
[`${cpuAcctDir}/cpuacct.usage`]: '357753491408',
[`${cpuDir}/cpu.cfs_period_us`]: '100000',
[`${cpuDir}/cpu.cfs_quota_us`]: '5000',
[`${cpuDir}/cpu.stat`]: cpuStatContents,
}
};
}

View file

@ -1,5 +1,8 @@
import _ from 'lodash';
import expect from 'expect.js';
import sinon from 'sinon';
import mockFs from 'mock-fs';
import { cGroups as cGroupsFsStub } from './fs_stubs';
import { getMetrics } from '../metrics';
@ -20,7 +23,8 @@ describe('Metrics', function () {
'psdelay': 1.6091690063476562,
'host': '123'
};
const config = {
const sampleConfig = {
ops: {
interval: 5000
},
@ -29,28 +33,104 @@ describe('Metrics', function () {
}
};
let metrics;
beforeEach(() => {
metrics = getMetrics({
event: _.cloneDeep(mockOps),
config: {
get: path => _.get(config, path)
}
describe('with cgroups', () => {
it('should provide cgroups', async () => {
const fsStub = cGroupsFsStub();
const event = _.cloneDeep(mockOps);
const config = { get: path => _.get(sampleConfig, path) };
const kbnServer = { log: sinon.mock() };
mockFs(fsStub.files);
const metrics = await getMetrics(event, config, kbnServer);
mockFs.restore();
expect(_.get(metrics, 'os.cgroup')).to.eql({
cpuacct: {
control_group: `/${fsStub.hierarchy}`,
usage_nanos: 357753491408,
},
cpu: {
control_group: `/${fsStub.hierarchy}`,
cfs_period_micros: 100000,
cfs_quota_micros: 5000,
stat: {
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
}
}
});
});
it('can override cgroup path', async () => {
const fsStub = cGroupsFsStub('foo');
const event = _.cloneDeep(mockOps);
const configOverride = Object.assign(sampleConfig, {
cpu: {
cgroup: {
path: {
override: '/foo'
}
}
},
cpuacct: {
cgroup: {
path: {
override: '/foo'
}
}
},
});
const config = { get: path => _.get(configOverride, path) };
const kbnServer = { log: sinon.mock() };
mockFs(fsStub.files);
const metrics = await getMetrics(event, config, kbnServer);
mockFs.restore();
expect(_.get(metrics, 'os.cgroup')).to.eql({
cpuacct: {
control_group: `/foo`,
usage_nanos: 357753491408,
},
cpu: {
control_group: `/foo`,
cfs_period_micros: 100000,
cfs_quota_micros: 5000,
stat: {
number_of_elapsed_periods: 0,
number_of_times_throttled: 10,
time_throttled_nanos: 20
}
}
});
});
});
it('should snake case the request object', () => {
expect(metrics.requests.status_codes).not.to.be(undefined);
expect(metrics.requests.statusCodes).to.be(undefined);
});
describe('without cgroups', () => {
let metrics;
beforeEach(async () => {
const event = _.cloneDeep(mockOps);
const config = { get: path => _.get(sampleConfig, path) };
const kbnServer = { log: sinon.mock() };
it('should provide defined metrics', () => {
(function checkMetrics(currentMetric) {
_.forOwn(currentMetric, value => {
if (typeof value === 'object') return checkMetrics(value);
expect(currentMetric).not.to.be(undefined);
});
metrics = await getMetrics(event, config, kbnServer);
});
}(metrics));
it('should snake case the request object', () => {
expect(metrics.requests.status_codes).not.to.be(undefined);
expect(metrics.requests.statusCodes).to.be(undefined);
});
it('should provide defined metrics', () => {
(function checkMetrics(currentMetric) {
_.forOwn(currentMetric, value => {
if (typeof value === 'object') return checkMetrics(value);
expect(currentMetric).not.to.be(undefined);
});
}(metrics));
});
});
});

138
src/server/status/cgroup.js Normal file
View file

@ -0,0 +1,138 @@
import fs from 'fs';
import { promisify } from 'bluebird';
import { join as joinPath } from 'path';
// Logic from elasticsearch/core/src/main/java/org/elasticsearch/monitor/os/OsProbe.java
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
const CONTROLLER_SEPERATOR_RE = ',';
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu';
const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct';
const GROUP_CPUACCT = 'cpuacct';
const CPUACCT_USAGE_FILE = 'cpuacct.usage';
const GROUP_CPU = 'cpu';
const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us';
const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us';
const CPU_STATS_FILE = 'cpu.stat';
const readFile = promisify(fs.readFile);
export function readControlGroups() {
return readFile(PROC_SELF_CGROUP_FILE)
.then(data => {
const response = {};
data.toString().split(/\n/).forEach(line => {
const matches = line.match(CONTROL_GROUP_RE);
if (matches === null) {
return;
}
const controllers = matches[1].split(CONTROLLER_SEPERATOR_RE);
controllers.forEach(controller => {
response[controller] = matches[2];
});
});
return response;
});
}
function fileContentsToInteger(path) {
return readFile(path).then(data => {
return parseInt(data.toString(), 10);
});
}
function readCPUAcctUsage(controlGroup) {
return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE));
}
function readCPUFsPeriod(controlGroup) {
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE));
}
function readCPUFsQuota(controlGroup) {
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE));
}
export function readCPUStat(controlGroup) {
return new Promise((resolve, reject) => {
const stat = {
number_of_elapsed_periods: -1,
number_of_times_throttled: -1,
time_throttled_nanos: -1
};
readFile(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE)).then(data => {
data.toString().split(/\n/).forEach(line => {
const fields = line.split(/\s+/);
switch(fields[0]) {
case 'nr_periods':
stat.number_of_elapsed_periods = parseInt(fields[1], 10);
break;
case 'nr_throttled':
stat.number_of_times_throttled = parseInt(fields[1], 10);
break;
case 'throttled_time':
stat.time_throttled_nanos = parseInt(fields[1], 10);
break;
}
});
resolve(stat);
}).catch(err => {
if (err.code === 'ENOENT') {
return resolve(stat);
}
reject(err);
});
});
}
export function getAllStats(options = {}) {
return new Promise((resolve, reject) => {
readControlGroups().then(groups => {
const cpuPath = options.cpuPath || groups[GROUP_CPU];
const cpuAcctPath = options.cpuAcctPath || groups[GROUP_CPUACCT];
return Promise.all([
readCPUAcctUsage(cpuAcctPath),
readCPUFsPeriod(cpuPath),
readCPUFsQuota(cpuPath),
readCPUStat(cpuPath)
]).then(([ cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat ]) => {
resolve({
cpuacct: {
control_group: cpuAcctPath,
usage_nanos: cpuAcctUsage
},
cpu: {
control_group: cpuPath,
cfs_period_micros: cpuFsPeriod,
cfs_quota_micros: cpuFsQuota,
stat: cpuStat
}
});
}).catch(rejectUnlessFileNotFound);
}).catch(rejectUnlessFileNotFound);
function rejectUnlessFileNotFound(err) {
if (err.code === 'ENOENT') {
resolve(null);
}
reject(err);
}
});
}

View file

@ -1,39 +1,71 @@
import _ from 'lodash';
import { get, set, isObject } from 'lodash';
import { keysToSnakeCaseShallow } from '../../utils/case_conversion';
import { getAllStats as cGroupStats } from './cgroup';
let cGroupStatsAvailable = true;
export function collectMetrics(kbnServer, server, config) {
server.plugins['even-better'].monitor.on('ops', function (event) {
kbnServer.metrics = getMetrics({ event, config });
server.plugins['even-better'].monitor.on('ops', event => {
getMetrics(event, config, server).then(data => { kbnServer.metrics = data; });
});
}
export function getMetrics({ event, config }) {
export async function getMetrics(event, config, server) {
const port = config.get('server.port');
const timestamp = new Date().toISOString();
return {
const cgroup = await cGroupStatsIfAvailable();
const metrics = {
last_updated: timestamp,
collection_interval_in_millis: config.get('ops.interval'),
uptime_in_millis: process.uptime() * 1000,
process: {
mem: {
heap_max_in_bytes: _.get(event, 'psmem.heapTotal'),
heap_used_in_bytes: _.get(event, 'psmem.heapUsed')
heap_max_in_bytes: get(event, 'psmem.heapTotal'),
heap_used_in_bytes: get(event, 'psmem.heapUsed')
}
},
os: {
cpu: {
load_average: {
'1m': _.get(event, 'osload.0'),
'5m': _.get(event, 'osload.1'),
'15m': _.get(event, 'osload.1')
'1m': get(event, 'osload.0'),
'5m': get(event, 'osload.1'),
'15m': get(event, 'osload.1')
}
}
},
response_times: {
avg_in_millis: _.get(event, ['responseTimes', port, 'avg']),
max_in_millis: _.get(event, ['responseTimes', port, 'max'])
avg_in_millis: get(event, ['responseTimes', port, 'avg']),
max_in_millis: get(event, ['responseTimes', port, 'max'])
},
requests: keysToSnakeCaseShallow(_.get(event, ['requests', port])),
concurrent_connections: _.get(event, ['concurrents', port])
requests: keysToSnakeCaseShallow(get(event, ['requests', port])),
concurrent_connections: get(event, ['concurrents', port])
};
async function cGroupStatsIfAvailable() {
if (!cGroupStatsAvailable) {
return;
}
try {
const cgroup = await cGroupStats({
cpuPath: config.get('cpu.cgroup.path.override'),
cpuAcctPath: config.get('cpuacct.cgroup.path.override')
});
if (isObject(cgroup)) {
return cgroup;
}
cGroupStatsAvailable = false;
} catch (e) {
server.log(['error', 'metrics', 'cgroup'], e);
}
}
if (isObject(cgroup)) {
set(metrics, 'os.cgroup', cgroup);
}
return metrics;
}