Introduces manual Integration Test for stress testing Task Manager (#46214)

Introduces a disabled (enabled manually when needed) Integration Test for stress testing Task Manager.

This is used to generate baseline stats for Task manager performance which can then be rerun to compare performance oriented changes and measure their performance.
This commit is contained in:
Gidi Meir Morris 2019-09-23 13:55:03 -07:00 committed by GitHub
parent 24e9bc33d7
commit 82fb767804
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 324 additions and 0 deletions

View file

@ -0,0 +1,53 @@
# Task Manager Performance Integration Test
This test provides a test framework for spawning multiple concurrent tasks in
Task Manager and measuring how well Task Manager performs in claiming, processing
and finalising these tasks.
We keep this test disabled as it is used on an ad hoc basis and we feel it is
worth keeping around for future use, rather than being rewritten time and time again.
## How To Run The Tests
### Setup
In the `./test_suites/task_manager/task_manager_perf_integration.ts` file you see the following configuration:
```json
{ tasksToSpawn: 10, durationInSeconds: 60 }
```
`tasksToSpawn` tells the test runner how many tasks to spawn. Each task has a 1s interval so it will try to rerun it every second.
`durationInSeconds` tells the test running how many seconds you'd like the test to run for.
### running
1. Enable the test in `./test_suites/task_manager/index.ts` by removing the `.skip` from the `describe.skip`.
1. Run the test server from within the `x-pack` folder: `node scripts/functional_tests_server.js --config=test/plugin_api_perf/config.js`
1. Run the test runner from within the `x-pack` folder: `node scripts/functional_test_runner.js --config=test/plugin_api_perf/config.js`
## The Results
After the test runs you should get the following output:
```
└-: task_manager_perf
└-> "before all" hook
└-: stressing task manager
└-> "before all" hook
└-> should run 10 tasks every second for a minute
└-> "before each" hook: global before each
└-> "before each" hook
│ debg Stress Test Result:
│ debg Average number of tasks executed per second: 4.846153846153846
│ debg Average time it took from the moment a task's scheduled time was reached, until Task Manager picked it up: 8220.473076923077
└- ✓ pass (1.0m) "task_manager_perf stressing task manager should run 10 tasks every second for a minute"
└-> "after all" hook
└-> "after all" hook
1 passing (1.0m)
```
If you look at the debug output you'll see a summary of how the test went:
You'll see the average number of tasks executed per second, over a period of each 5 second window (meaning we calculate the running average based on a sliding window of 5 seconds).
You'll also see the average time it takes from the moment a task's scheduled time was reached, until Task Manager picked it up for execution.

View file

@ -0,0 +1,50 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import path from 'path';
import fs from 'fs';
import { services } from './services';
export default async function ({ readConfigFile }) {
const integrationConfig = await readConfigFile(require.resolve('../api_integration/config'));
// Find all folders in ./plugins since we treat all them as plugin folder
const allFiles = fs.readdirSync(path.resolve(__dirname, 'plugins'));
const plugins = allFiles.filter(file =>
fs.statSync(path.resolve(__dirname, 'plugins', file)).isDirectory()
);
return {
testFiles: [require.resolve('./test_suites/task_manager')],
services,
servers: integrationConfig.get('servers'),
esTestCluster: integrationConfig.get('esTestCluster'),
apps: integrationConfig.get('apps'),
esArchiver: {
directory: path.resolve(__dirname, '../functional/es_archives'),
},
screenshots: integrationConfig.get('screenshots'),
junit: {
reportName: 'Plugin Functional Tests',
},
kbnTestServer: {
...integrationConfig.get('kbnTestServer'),
serverArgs: [
...integrationConfig.get('kbnTestServer.serverArgs'),
`--plugin-path=${path.resolve(
__dirname,
'..',
'plugin_api_integration',
'plugins',
'task_manager'
)}`,
...plugins.map(
pluginDir => `--plugin-path=${path.resolve(__dirname, 'plugins', pluginDir)}`
),
],
},
};
}

View file

@ -0,0 +1,11 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { GenericFtrProviderContext } from '@kbn/test/types/ftr';
import { services } from './services';
export type FtrProviderContext = GenericFtrProviderContext<typeof services, {}>;

View file

@ -0,0 +1,91 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { initRoutes } from './init_routes';
function avg(items) {
return (
items.reduce((sum, val) => {
return sum + val;
}, 0) / items.length
);
}
export default function TaskManagerPerformanceAPI(kibana) {
return new kibana.Plugin({
name: 'perfTask',
require: ['elasticsearch', 'task_manager'],
config(Joi) {
return Joi.object({
enabled: Joi.boolean().default(true),
}).default();
},
init(server) {
const taskManager = server.plugins.task_manager;
const performanceState = {
runningAverageTasks: 0,
averagesTaken: [],
runningAverageLeadTime: -1,
averagesTakenLeadTime: [],
leadTimeQueue: [],
};
setInterval(() => {
const tasks = performanceState.leadTimeQueue.length;
console.log(`I have processed ${tasks} tasks in the past 5s`);
if (tasks > 0) {
const latestAverage = avg(performanceState.leadTimeQueue.splice(0, tasks));
performanceState.averagesTakenLeadTime.push(latestAverage);
performanceState.averagesTaken.push(tasks);
if (performanceState.averagesTakenLeadTime.length > 1) {
performanceState.runningAverageLeadTime = avg(performanceState.averagesTakenLeadTime);
performanceState.runningAverageTasks = avg(performanceState.averagesTaken);
} else {
performanceState.runningAverageLeadTime = latestAverage;
performanceState.runningAverageTasks = tasks;
}
}
}, 5000);
taskManager.registerTaskDefinitions({
performanceTestTask: {
title: 'Perf Test Task',
description: 'A task for stress testing task_manager.',
timeout: '1m',
createTaskRunner: ({ taskInstance }) => {
return {
async run() {
const { state } = taskInstance;
const leadTime = Date.now() - taskInstance.runAt;
performanceState.leadTimeQueue.push(leadTime);
return {
state,
runAt: millisecondsFromNow(1000),
};
},
};
},
},
});
initRoutes(server, performanceState);
},
});
}
function millisecondsFromNow(ms) {
if (!ms) {
return;
}
const dt = new Date();
dt.setTime(dt.getTime() + ms);
return dt;
}

View file

@ -0,0 +1,48 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import Joi from 'joi';
const scope = 'perf-testing';
export function initRoutes(server, performanceState) {
const taskManager = server.plugins.task_manager;
server.route({
path: '/api/perf_tasks',
method: 'POST',
config: {
validate: {
payload: Joi.object({
tasksToSpawn: Joi.number().required(),
durationInSeconds: Joi.number().required(),
}),
},
},
async handler(request) {
const { tasksToSpawn, durationInSeconds } = request.payload;
const tasks = [];
for (let taskIndex = 0; taskIndex < tasksToSpawn; taskIndex++) {
tasks.push(
await taskManager.schedule(
{
taskType: 'performanceTestTask',
params: { taskIndex },
scope: [scope],
},
{ request }
)
);
}
return new Promise(resolve => {
setTimeout(() => {
resolve(performanceState);
}, durationInSeconds * 1000);
});
},
});
}

View file

@ -0,0 +1,12 @@
{
"name": "perf_task_plugin",
"version": "1.0.0",
"kibana": {
"version": "kibana",
"templateVersion": "1.0.0"
},
"license": "Apache-2.0",
"dependencies": {
"joi": "^13.5.2"
}
}

View file

@ -0,0 +1,7 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
export { services } from '../api_integration/services';

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
export default function({ loadTestFile }: { loadTestFile: (file: string) => void }) {
/*
* This test provides a test framework for spawning multiple concurrent tasks in
* Task Manager and measuring how well Task Manager performs in claiming, processing
* and finalising these tasks.
*
* We keep this test disabled as it is used on an ad hoc basis and we feel it is
* worth keeping around for future use, rather than being rewritten time and time again.
*/
describe.skip('task_manager_perf', function taskManagerSuite() {
this.tags('ciGroup2');
loadTestFile(require.resolve('./task_manager_perf_integration'));
});
}

View file

@ -0,0 +1,32 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import expect from '@kbn/expect';
export default function({ getService }: { getService: (service: string) => any }) {
const log = getService('log');
const supertest = getService('supertest');
describe('stressing task manager', () => {
it('should run 10 tasks every second for a minute', async () => {
const { runningAverageTasks, runningAverageLeadTime } = await supertest
.post('/api/perf_tasks')
.set('kbn-xsrf', 'xxx')
.send({ tasksToSpawn: 10, durationInSeconds: 60 })
.expect(200)
.then((response: any) => response.body);
log.debug(`Stress Test Result:`);
log.debug(`Average number of tasks executed per second: ${runningAverageTasks}`);
log.debug(
`Average time it took from the moment a task's scheduled time was reached, until Task Manager picked it up: ${runningAverageLeadTime}`
);
expect(runningAverageTasks).to.be.greaterThan(0);
expect(runningAverageLeadTime).to.be.greaterThan(0);
});
});
}