[Task Manager] Fixed the behavior of the claiming tasks function failing, when inline scripts are disabled. (#94870)

* [Task Manager] Fixed the behavior of the claiming tasks funtion failing, when inline scripts are disabled.

* added docs

* fixed test

* added tests

* fixed due to comments

* Fixed docs due to comments

* extended TM configuration changes message with the possible errors description
This commit is contained in:
Yuliia Naumenko 2021-03-26 10:46:33 -07:00 committed by GitHub
parent 8101419cb9
commit e31ef5235e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 185 additions and 53 deletions

View file

@ -706,3 +706,21 @@ These rough calculations give you a lower bound to the required throughput, whic
Given these inferred attributes, it would be safe to assume that a single {kib} instance with default settings **would not** provide the required throughput. It is possible that scaling horizontally by adding a couple more {kib} instances will.
For details on scaling Task Manager, see <<task-manager-scaling-guidance>>.
[float]
[[task-manager-cannot-operate-when-inline-scripts-are-disabled]]
==== Inline scripts are disabled in {es}
*Problem*:
Tasks are not running, and the server logs contain the following error message:
[source, txt]
--------------------------------------------------
[warning][plugins][taskManager] Task Manager cannot operate when inline scripts are disabled in {es}
--------------------------------------------------
*Solution*:
Inline scripts are a hard requirement for Task Manager to function.
To enable inline scripting, see the Elasticsearch documentation for {ref}/modules-scripting-security.html#allowed-script-types-setting[configuring allowed script types setting].

View file

@ -6,7 +6,10 @@
*/
import sinon from 'sinon';
import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks';
import {
elasticsearchServiceMock,
savedObjectsRepositoryMock,
} from '../../../../../src/core/server/mocks';
import { SavedObjectsErrorHelpers, Logger } from '../../../../../src/core/server';
import { ADJUST_THROUGHPUT_INTERVAL } from '../lib/create_managed_configuration';
import { TaskManagerPlugin, TaskManagerStartContract } from '../plugin';
@ -19,6 +22,7 @@ describe('managed configuration', () => {
let clock: sinon.SinonFakeTimers;
const savedObjectsClient = savedObjectsRepositoryMock.create();
const esStart = elasticsearchServiceMock.createStart();
beforeEach(async () => {
jest.resetAllMocks();
@ -55,6 +59,7 @@ describe('managed configuration', () => {
});
const coreStart = coreMock.createStart();
coreStart.elasticsearch = esStart;
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
taskManagerStart = await taskManager.start(coreStart);
@ -81,10 +86,10 @@ describe('managed configuration', () => {
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
expect(logger.debug).toHaveBeenCalledWith(
'Max workers configuration changing from 10 to 8 after seeing 1 error(s)'
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
);
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
});
@ -105,10 +110,57 @@ describe('managed configuration', () => {
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
expect(logger.debug).toHaveBeenCalledWith(
'Poll interval configuration changing from 3000 to 3600 after seeing 1 error(s)'
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
);
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
});
test('should lower max workers when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
esStart
.createClient('taskManager')
.asInternalUser.search.mockRejectedValueOnce(
elasticsearchServiceMock.createErrorTransportRequestPromise(
new Error('cannot execute [inline] scripts" error')
)
);
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
`"cannot execute [inline] scripts" error"`
);
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
expect(logger.debug).toHaveBeenCalledWith(
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
);
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
});
test('should increase poll interval when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
esStart
.createClient('taskManager')
.asInternalUser.search.mockRejectedValueOnce(
elasticsearchServiceMock.createErrorTransportRequestPromise(
new Error('cannot execute [inline] scripts" error')
)
);
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
`"cannot execute [inline] scripts" error"`
);
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
expect(logger.debug).toHaveBeenCalledWith(
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
);
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
});

View file

@ -96,7 +96,7 @@ describe('createManagedConfiguration()', () => {
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
});
@ -180,7 +180,7 @@ describe('createManagedConfiguration()', () => {
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
expect(logger.warn).toHaveBeenCalledWith(
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
);
});

View file

@ -9,6 +9,7 @@ import { interval, merge, of, Observable } from 'rxjs';
import { filter, mergeScan, map, scan, distinctUntilChanged, startWith } from 'rxjs/operators';
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
import { Logger } from '../../../../../src/core/server';
import { isEsCannotExecuteScriptError } from './identify_es_error';
const FLUSH_MARKER = Symbol('flush');
export const ADJUST_THROUGHPUT_INTERVAL = 10 * 1000;
@ -76,11 +77,11 @@ function createMaxWorkersScan(logger: Logger, startingMaxWorkers: number) {
}
if (newMaxWorkers !== previousMaxWorkers) {
logger.debug(
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} error(s)`
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
);
if (previousMaxWorkers === startingMaxWorkers) {
logger.warn(
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" error(s).`
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
);
}
}
@ -105,11 +106,11 @@ function createPollIntervalScan(logger: Logger, startingPollInterval: number) {
}
if (newPollInterval !== previousPollInterval) {
logger.debug(
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} error(s)`
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
);
if (previousPollInterval === startingPollInterval) {
logger.warn(
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" error(s).`
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
);
}
}
@ -121,7 +122,11 @@ function countErrors(errors$: Observable<Error>, countInterval: number): Observa
return merge(
// Flush error count at fixed interval
interval(countInterval).pipe(map(() => FLUSH_MARKER)),
errors$.pipe(filter((e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e)))
errors$.pipe(
filter(
(e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e) || isEsCannotExecuteScriptError(e)
)
)
).pipe(
// When tag is "flush", reset the error counter
// Otherwise increment the error counter

View file

@ -137,33 +137,32 @@ function generateESErrorWithResponse(
rootCause: ESErrorCausedBy[] = [],
causeBy: ESErrorCausedBy = {}
) {
return Object.assign(new Error(), {
msg: '[illegal_argument_exception] cannot execute [inline] scripts',
path: '/.kibana_task_manager/_update_by_query',
query: {},
body: '{"query":{}}',
statusCode: 400,
response: JSON.stringify({
error: {
root_cause: rootCause,
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: '.kibana_task_manager_1',
node: '24A4QbjHSK6prvtopAKLKw',
reason: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
return {
name: 'ResponseError',
meta: {
body: {
error: {
root_cause: rootCause,
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: '.kibana_task_manager_8.0.0_001',
node: 'GJ7ekIWTT56-h-aC6Y89Gw',
reason: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
},
},
},
],
caused_by: causeBy,
],
caused_by: causeBy,
},
status: 400,
},
status: 400,
}),
});
statusCode: 400,
},
};
}

View file

@ -16,13 +16,27 @@ export interface ESError {
caused_by?: ESErrorCausedBy;
}
export interface ESErrorBody {
error?: ESError;
status?: number;
}
export interface ESErrorMeta {
body?: ESErrorBody;
statusCode?: number;
}
export interface ElasticsearchResponseError {
name?: string;
meta?: ESErrorMeta;
}
function extractCausedByChain(
causedBy: ESErrorCausedBy = {},
accumulator: string[] = []
): string[] {
const { reason, caused_by: innerCausedBy } = causedBy;
if (reason) {
if (reason && !accumulator.includes(reason)) {
accumulator.push(reason);
}
@ -39,11 +53,15 @@ function extractCausedByChain(
* @param err Object Error thrown by ES JS client
* @return ES error cause
*/
export function identifyEsError(err: { response: string }) {
const { response } = err;
export function identifyEsError(err: ElasticsearchResponseError) {
if (!err.meta) {
return [];
}
const {
meta: { body: response },
} = err;
if (response) {
const { error } = JSON.parse(response) as { error?: ESError };
const { error } = response;
if (error) {
const { root_cause: rootCause = [], caused_by: causedBy } = error;
@ -58,3 +76,7 @@ export function identifyEsError(err: { response: string }) {
}
return [];
}
export function isEsCannotExecuteScriptError(err: ElasticsearchResponseError): boolean {
return identifyEsError(err).includes('cannot execute [inline] scripts');
}

View file

@ -19,6 +19,7 @@ import { TaskClaiming, ClaimOwnershipResult } from './queries/task_claiming';
import type { TaskClaiming as TaskClaimingClass } from './queries/task_claiming';
import { asOk, Err, isErr, isOk, Result } from './lib/result_type';
import { FillPoolResult } from './lib/fill_pool';
import { ElasticsearchResponseError } from './lib/identify_es_error';
let mockTaskClaiming = taskClaimingMock.create({});
jest.mock('./queries/task_claiming', () => {
@ -204,12 +205,46 @@ describe('TaskPollingLifecycle', () => {
taskClaiming.claimAvailableTasksIfCapacityIsAvailable.mockImplementation(
() =>
new Observable<Result<ClaimOwnershipResult, FillPoolResult>>((observer) => {
observer.error(
Object.assign(new Error(), {
response:
'{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":".kibana_task_manager_1","node":"24A4QbjHSK6prvtopAKLKw","reason":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}],"caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts","caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}},"status":400}',
})
);
observer.error({
name: 'ResponseError',
meta: {
body: {
error: {
root_cause: [
{
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
},
],
type: 'search_phase_execution_exception',
reason: 'all shards failed',
phase: 'query',
grouped: true,
failed_shards: [
{
shard: 0,
index: '.kibana_task_manager_1',
node: '24A4QbjHSK6prvtopAKLKw',
reason: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
},
},
],
caused_by: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
caused_by: {
type: 'illegal_argument_exception',
reason: 'cannot execute [inline] scripts',
},
},
},
status: 400,
},
},
statusCode: 400,
} as ElasticsearchResponseError);
})
);

View file

@ -39,7 +39,7 @@ import {
import { TaskPool } from './task_pool';
import { TaskManagerRunner, TaskRunner } from './task_running';
import { TaskStore } from './task_store';
import { identifyEsError } from './lib/identify_es_error';
import { identifyEsError, isEsCannotExecuteScriptError } from './lib/identify_es_error';
import { BufferedTaskStore } from './buffered_task_store';
import { TaskTypeDictionary } from './task_type_dictionary';
import { delayOnClaimConflicts } from './polling';
@ -299,15 +299,16 @@ export function claimAvailableTasks(
// we can identify the reason
// if we can - we emit an FillPoolResult error rather than erroring out the wrapping Observable
// returned by `claimAvailableTasks`
if (identifyEsError(ex).includes('cannot execute [inline] scripts')) {
if (isEsCannotExecuteScriptError(ex)) {
logger.warn(
`Task Manager cannot operate when inline scripts are disabled in Elasticsearch`
);
observer.next(asErr(FillPoolResult.Failed));
observer.complete();
} else {
const esError = identifyEsError(ex);
// as we could't identify the reason - we'll error out the wrapping Observable too
observer.error(ex);
observer.error(esError.length > 0 ? esError : ex);
}
},
() => {