[task-manager] Improves messaging on errors due to inline scripts being disabled (#49860)

This PR detects when claiming tasks fails due to inline scripts being disabled in Elasticsearch and improves the message reported in the Kibana log.
This commit is contained in:
Gidi Meir Morris 2019-11-01 15:30:04 +00:00 committed by GitHub
parent f479f1e925
commit 128948c2a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 279 additions and 12 deletions

View file

@ -0,0 +1,167 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { identifyEsError, ESErrorCausedBy } from './identify_es_error';
describe('identifyEsError', () => {
test('extracts messages from root cause', () => {
expect(
identifyEsError(
generateESErrorWithResponse(
[
{
type: 'illegal_argument_exception',
reason: 'root cause',
},
],
{}
)
)
).toContain('root cause');
});
test('extracts messages from deep root cause', () => {
expect(
identifyEsError(
generateESErrorWithResponse(
[
{
type: 'illegal_argument_exception',
reason: 'root cause',
},
{
type: 'illegal_argument_exception',
reason: 'deep root cause',
},
],
{}
)
)
).toContain('deep root cause');
});
test('extracts messages from first caused by', () => {
expect(
identifyEsError(
generateESErrorWithResponse(
[
{
type: 'illegal_argument_exception',
reason: 'root cause',
},
{
type: 'illegal_argument_exception',
reason: 'deep root cause',
},
],
{
type: 'illegal_argument_exception',
reason: 'first caused by',
caused_by: {
type: 'illegal_argument_exception',
reason: 'second caused by',
},
}
)
)
).toContain('first caused by');
});
test('extracts messages from deep caused by', () => {
expect(
identifyEsError(
generateESErrorWithResponse(
[
{
type: 'illegal_argument_exception',
reason: 'root cause',
},
{
type: 'illegal_argument_exception',
reason: 'deep root cause',
},
],
{
type: 'illegal_argument_exception',
reason: 'first caused by',
caused_by: {
type: 'illegal_argument_exception',
reason: 'second caused by',
},
}
)
)
).toContain('second caused by');
});
test('extracts all messages in error', () => {
expect(
identifyEsError(
generateESErrorWithResponse(
[
{
type: 'illegal_argument_exception',
reason: 'root cause',
},
{
type: 'illegal_argument_exception',
reason: 'deep root cause',
},
],
{
type: 'illegal_argument_exception',
reason: 'first caused by',
caused_by: {
type: 'illegal_argument_exception',
reason: 'second caused by',
},
}
)
)
).toMatchInlineSnapshot(`
Array [
"first caused by",
"second caused by",
"root cause",
"deep root cause",
]
`);
});
});
function generateESErrorWithResponse(
rootCause: ESErrorCausedBy[] = [],
causeBy: ESErrorCausedBy = {}
) {
return Object.assign(new Error(), {
msg: '[illegal_argument_exception] cannot execute [inline] scripts',
path: '/.kibana_task_manager/_update_by_query',
query: {},
body: '{"query":{}}',
statusCode: 400,
response: JSON.stringify({
error: {
root_cause: rootCause,
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: '.kibana_task_manager_1',
node: '24A4QbjHSK6prvtopAKLKw',
reason: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
},
},
],
caused_by: causeBy,
},
status: 400,
}),
});
}

View file

@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
export interface ESErrorCausedBy {
type?: string;
reason?: string;
caused_by?: ESErrorCausedBy;
}
export interface ESError {
root_cause?: ESErrorCausedBy[];
caused_by?: ESErrorCausedBy;
}
function extractCausedByChain(
causedBy: ESErrorCausedBy = {},
accumulator: string[] = []
): string[] {
const { reason, caused_by: innerCausedBy } = causedBy;
if (reason) {
accumulator.push(reason);
}
if (innerCausedBy) {
return extractCausedByChain(innerCausedBy, accumulator);
}
return accumulator;
}
/**
* Identified causes for ES Error
*
* @param err Object Error thrown by ES JS client
* @return ES error cause
*/
export function identifyEsError(err: { response: string }) {
const { response } = err;
if (response) {
const { error } = JSON.parse(response) as { error?: ESError };
if (error) {
const { root_cause: rootCause = [], caused_by: causedBy } = error;
return [
...extractCausedByChain(causedBy),
...rootCause.reduce(
(acc: string[], innerRootCause) => extractCausedByChain(innerRootCause, acc),
[]
),
];
}
}
return [];
}

View file

@ -214,5 +214,34 @@ describe('TaskManager', () => {
expect(claim).not.toHaveBeenCalled();
});
/**
* This handles the case in which Elasticsearch has had inline script disabled.
* This is achieved by setting the `script.allowed_types` flag on Elasticsearch to `none`
*/
test('handles failure due to inline scripts being disabled', () => {
const logger = mockLogger();
const claim = jest.fn(() => {
throw Object.assign(new Error(), {
msg: '[illegal_argument_exception] cannot execute [inline] scripts',
path: '/.kibana_task_manager/_update_by_query',
query: {
ignore_unavailable: true,
refresh: true,
max_docs: 200,
conflicts: 'proceed',
},
body:
'{"query":{"bool":{"must":[{"term":{"type":"task"}},{"bool":{"must":[{"bool":{"should":[{"bool":{"must":[{"term":{"task.status":"idle"}},{"range":{"task.runAt":{"lte":"now"}}}]}},{"bool":{"must":[{"bool":{"should":[{"term":{"task.status":"running"}},{"term":{"task.status":"claiming"}}]}},{"range":{"task.retryAt":{"lte":"now"}}}]}}]}},{"bool":{"should":[{"exists":{"field":"task.interval"}},{"bool":{"must":[{"term":{"task.taskType":"vis_telemetry"}},{"range":{"task.attempts":{"lt":3}}}]}},{"bool":{"must":[{"term":{"task.taskType":"lens_telemetry"}},{"range":{"task.attempts":{"lt":3}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.server-log"}},{"range":{"task.attempts":{"lt":1}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.slack"}},{"range":{"task.attempts":{"lt":1}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.email"}},{"range":{"task.attempts":{"lt":1}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.index"}},{"range":{"task.attempts":{"lt":1}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.pagerduty"}},{"range":{"task.attempts":{"lt":1}}}]}},{"bool":{"must":[{"term":{"task.taskType":"actions:.webhook"}},{"range":{"task.attempts":{"lt":1}}}]}}]}}]}}]}},"sort":{"_script":{"type":"number","order":"asc","script":{"lang":"expression","source":"doc[\'task.retryAt\'].value || doc[\'task.runAt\'].value"}}},"seq_no_primary_term":true,"script":{"source":"ctx._source.task.ownerId=params.ownerId; ctx._source.task.status=params.status; ctx._source.task.retryAt=params.retryAt;","lang":"painless","params":{"ownerId":"kibana:5b2de169-2785-441b-ae8c-186a1936b17d","retryAt":"2019-10-31T13:35:43.579Z","status":"claiming"}}}',
statusCode: 400,
response:
'{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":".kibana_task_manager_1","node":"24A4QbjHSK6prvtopAKLKw","reason":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}],"caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts","caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}},"status":400}',
});
});
claimAvailableTasks(claim, 10, logger);
sinon.assert.calledWithMatch(logger.warn, /inline scripts/);
});
});
});

View file

@ -27,6 +27,7 @@ import {
OwnershipClaimingOpts,
ClaimOwnershipResult,
} from './task_store';
import { identifyEsError } from './lib/identify_es_error';
export interface TaskManagerOpts {
logger: Logger;
@ -259,20 +260,31 @@ export async function claimAvailableTasks(
logger: Logger
) {
if (availableWorkers > 0) {
const { docs, claimedTasks } = await claim({
size: availableWorkers,
claimOwnershipUntil: intervalFromNow('30s')!,
});
try {
const { docs, claimedTasks } = await claim({
size: availableWorkers,
claimOwnershipUntil: intervalFromNow('30s')!,
});
if (docs.length !== claimedTasks) {
logger.warn(
`[Task Ownership error]: (${claimedTasks}) tasks were claimed by Kibana, but (${docs.length}) tasks were fetched`
);
if (docs.length !== claimedTasks) {
logger.warn(
`[Task Ownership error]: (${claimedTasks}) tasks were claimed by Kibana, but (${docs.length}) tasks were fetched`
);
}
return docs;
} catch (ex) {
if (identifyEsError(ex).includes('cannot execute [inline] scripts')) {
logger.warn(
`Task Manager cannot operate when inline scripts are disabled in Elasticsearch`
);
} else {
throw ex;
}
}
return docs;
} else {
logger.info(
`[Task Ownership]: Task Manager has skipped Claiming Ownership of available tasks at it has ran out Available Workers. If this happens often, consider adjusting the "xpack.task_manager.max_workers" configuration.`
);
}
logger.info(
`[Task Ownership]: Task Manager has skipped Claiming Ownership of available tasks at it has ran out Available Workers. If this happens often, consider adjusting the "xpack.task_manager.max_workers" configuration.`
);
return [];
}