[Task Manager] Fixed the behavior of the claiming tasks function failing, when inline scripts are disabled. (#94870)
* [Task Manager] Fixed the behavior of the claiming tasks funtion failing, when inline scripts are disabled. * added docs * fixed test * added tests * fixed due to comments * Fixed docs due to comments * extended TM configuration changes message with the possible errors description
This commit is contained in:
parent
8101419cb9
commit
e31ef5235e
|
@ -706,3 +706,21 @@ These rough calculations give you a lower bound to the required throughput, whic
|
|||
Given these inferred attributes, it would be safe to assume that a single {kib} instance with default settings **would not** provide the required throughput. It is possible that scaling horizontally by adding a couple more {kib} instances will.
|
||||
|
||||
For details on scaling Task Manager, see <<task-manager-scaling-guidance>>.
|
||||
|
||||
[float]
|
||||
[[task-manager-cannot-operate-when-inline-scripts-are-disabled]]
|
||||
==== Inline scripts are disabled in {es}
|
||||
|
||||
*Problem*:
|
||||
|
||||
Tasks are not running, and the server logs contain the following error message:
|
||||
|
||||
[source, txt]
|
||||
--------------------------------------------------
|
||||
[warning][plugins][taskManager] Task Manager cannot operate when inline scripts are disabled in {es}
|
||||
--------------------------------------------------
|
||||
|
||||
*Solution*:
|
||||
|
||||
Inline scripts are a hard requirement for Task Manager to function.
|
||||
To enable inline scripting, see the Elasticsearch documentation for {ref}/modules-scripting-security.html#allowed-script-types-setting[configuring allowed script types setting].
|
||||
|
|
|
@ -6,7 +6,10 @@
|
|||
*/
|
||||
|
||||
import sinon from 'sinon';
|
||||
import { savedObjectsRepositoryMock } from '../../../../../src/core/server/mocks';
|
||||
import {
|
||||
elasticsearchServiceMock,
|
||||
savedObjectsRepositoryMock,
|
||||
} from '../../../../../src/core/server/mocks';
|
||||
import { SavedObjectsErrorHelpers, Logger } from '../../../../../src/core/server';
|
||||
import { ADJUST_THROUGHPUT_INTERVAL } from '../lib/create_managed_configuration';
|
||||
import { TaskManagerPlugin, TaskManagerStartContract } from '../plugin';
|
||||
|
@ -19,6 +22,7 @@ describe('managed configuration', () => {
|
|||
|
||||
let clock: sinon.SinonFakeTimers;
|
||||
const savedObjectsClient = savedObjectsRepositoryMock.create();
|
||||
const esStart = elasticsearchServiceMock.createStart();
|
||||
|
||||
beforeEach(async () => {
|
||||
jest.resetAllMocks();
|
||||
|
@ -55,6 +59,7 @@ describe('managed configuration', () => {
|
|||
});
|
||||
|
||||
const coreStart = coreMock.createStart();
|
||||
coreStart.elasticsearch = esStart;
|
||||
coreStart.savedObjects.createInternalRepository.mockReturnValue(savedObjectsClient);
|
||||
taskManagerStart = await taskManager.start(coreStart);
|
||||
|
||||
|
@ -81,10 +86,10 @@ describe('managed configuration', () => {
|
|||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Max workers configuration changing from 10 to 8 after seeing 1 error(s)'
|
||||
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
|
||||
});
|
||||
|
@ -105,10 +110,57 @@ describe('managed configuration', () => {
|
|||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 error(s)'
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
|
||||
test('should lower max workers when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
esStart
|
||||
.createClient('taskManager')
|
||||
.asInternalUser.search.mockRejectedValueOnce(
|
||||
elasticsearchServiceMock.createErrorTransportRequestPromise(
|
||||
new Error('cannot execute [inline] scripts" error')
|
||||
)
|
||||
);
|
||||
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts" error"`
|
||||
);
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Max workers configuration changing from 10 to 8 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task pool now using 10 as the max worker value');
|
||||
});
|
||||
|
||||
test('should increase poll interval when Elasticsearch returns "cannot execute [inline] scripts" error', async () => {
|
||||
esStart
|
||||
.createClient('taskManager')
|
||||
.asInternalUser.search.mockRejectedValueOnce(
|
||||
elasticsearchServiceMock.createErrorTransportRequestPromise(
|
||||
new Error('cannot execute [inline] scripts" error')
|
||||
)
|
||||
);
|
||||
|
||||
await expect(taskManagerStart.fetch({})).rejects.toThrowErrorMatchingInlineSnapshot(
|
||||
`"cannot execute [inline] scripts" error"`
|
||||
);
|
||||
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
'Poll interval configuration changing from 3000 to 3600 after seeing 1 "too many request" and/or "execute [inline] script" error(s)'
|
||||
);
|
||||
expect(logger.debug).toHaveBeenCalledWith('Task poller now using interval of 3600ms');
|
||||
});
|
||||
|
|
|
@ -96,7 +96,7 @@ describe('createManagedConfiguration()', () => {
|
|||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" error(s).'
|
||||
'Max workers configuration is temporarily reduced after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
});
|
||||
|
||||
|
@ -180,7 +180,7 @@ describe('createManagedConfiguration()', () => {
|
|||
errors$.next(SavedObjectsErrorHelpers.createTooManyRequestsError('a', 'b'));
|
||||
clock.tick(ADJUST_THROUGHPUT_INTERVAL);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" error(s).'
|
||||
'Poll interval configuration is temporarily increased after Elasticsearch returned 1 "too many request" and/or "execute [inline] script" error(s).'
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import { interval, merge, of, Observable } from 'rxjs';
|
|||
import { filter, mergeScan, map, scan, distinctUntilChanged, startWith } from 'rxjs/operators';
|
||||
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
|
||||
import { Logger } from '../../../../../src/core/server';
|
||||
import { isEsCannotExecuteScriptError } from './identify_es_error';
|
||||
|
||||
const FLUSH_MARKER = Symbol('flush');
|
||||
export const ADJUST_THROUGHPUT_INTERVAL = 10 * 1000;
|
||||
|
@ -76,11 +77,11 @@ function createMaxWorkersScan(logger: Logger, startingMaxWorkers: number) {
|
|||
}
|
||||
if (newMaxWorkers !== previousMaxWorkers) {
|
||||
logger.debug(
|
||||
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} error(s)`
|
||||
`Max workers configuration changing from ${previousMaxWorkers} to ${newMaxWorkers} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
|
||||
);
|
||||
if (previousMaxWorkers === startingMaxWorkers) {
|
||||
logger.warn(
|
||||
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" error(s).`
|
||||
`Max workers configuration is temporarily reduced after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -105,11 +106,11 @@ function createPollIntervalScan(logger: Logger, startingPollInterval: number) {
|
|||
}
|
||||
if (newPollInterval !== previousPollInterval) {
|
||||
logger.debug(
|
||||
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} error(s)`
|
||||
`Poll interval configuration changing from ${previousPollInterval} to ${newPollInterval} after seeing ${errorCount} "too many request" and/or "execute [inline] script" error(s)`
|
||||
);
|
||||
if (previousPollInterval === startingPollInterval) {
|
||||
logger.warn(
|
||||
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" error(s).`
|
||||
`Poll interval configuration is temporarily increased after Elasticsearch returned ${errorCount} "too many request" and/or "execute [inline] script" error(s).`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -121,7 +122,11 @@ function countErrors(errors$: Observable<Error>, countInterval: number): Observa
|
|||
return merge(
|
||||
// Flush error count at fixed interval
|
||||
interval(countInterval).pipe(map(() => FLUSH_MARKER)),
|
||||
errors$.pipe(filter((e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e)))
|
||||
errors$.pipe(
|
||||
filter(
|
||||
(e) => SavedObjectsErrorHelpers.isTooManyRequestsError(e) || isEsCannotExecuteScriptError(e)
|
||||
)
|
||||
)
|
||||
).pipe(
|
||||
// When tag is "flush", reset the error counter
|
||||
// Otherwise increment the error counter
|
||||
|
|
|
@ -137,33 +137,32 @@ function generateESErrorWithResponse(
|
|||
rootCause: ESErrorCausedBy[] = [],
|
||||
causeBy: ESErrorCausedBy = {}
|
||||
) {
|
||||
return Object.assign(new Error(), {
|
||||
msg: '[illegal_argument_exception] cannot execute [inline] scripts',
|
||||
path: '/.kibana_task_manager/_update_by_query',
|
||||
query: {},
|
||||
body: '{"query":{}}',
|
||||
statusCode: 400,
|
||||
response: JSON.stringify({
|
||||
error: {
|
||||
root_cause: rootCause,
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'all shards failed',
|
||||
phase: 'query',
|
||||
grouped: true,
|
||||
failed_shards: [
|
||||
{
|
||||
shard: 0,
|
||||
index: '.kibana_task_manager_1',
|
||||
node: '24A4QbjHSK6prvtopAKLKw',
|
||||
reason: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
return {
|
||||
name: 'ResponseError',
|
||||
meta: {
|
||||
body: {
|
||||
error: {
|
||||
root_cause: rootCause,
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'all shards failed',
|
||||
phase: 'query',
|
||||
grouped: true,
|
||||
failed_shards: [
|
||||
{
|
||||
shard: 0,
|
||||
index: '.kibana_task_manager_8.0.0_001',
|
||||
node: 'GJ7ekIWTT56-h-aC6Y89Gw',
|
||||
reason: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
caused_by: causeBy,
|
||||
],
|
||||
caused_by: causeBy,
|
||||
},
|
||||
status: 400,
|
||||
},
|
||||
status: 400,
|
||||
}),
|
||||
});
|
||||
statusCode: 400,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
@ -16,13 +16,27 @@ export interface ESError {
|
|||
caused_by?: ESErrorCausedBy;
|
||||
}
|
||||
|
||||
export interface ESErrorBody {
|
||||
error?: ESError;
|
||||
status?: number;
|
||||
}
|
||||
|
||||
export interface ESErrorMeta {
|
||||
body?: ESErrorBody;
|
||||
statusCode?: number;
|
||||
}
|
||||
export interface ElasticsearchResponseError {
|
||||
name?: string;
|
||||
meta?: ESErrorMeta;
|
||||
}
|
||||
|
||||
function extractCausedByChain(
|
||||
causedBy: ESErrorCausedBy = {},
|
||||
accumulator: string[] = []
|
||||
): string[] {
|
||||
const { reason, caused_by: innerCausedBy } = causedBy;
|
||||
|
||||
if (reason) {
|
||||
if (reason && !accumulator.includes(reason)) {
|
||||
accumulator.push(reason);
|
||||
}
|
||||
|
||||
|
@ -39,11 +53,15 @@ function extractCausedByChain(
|
|||
* @param err Object Error thrown by ES JS client
|
||||
* @return ES error cause
|
||||
*/
|
||||
export function identifyEsError(err: { response: string }) {
|
||||
const { response } = err;
|
||||
|
||||
export function identifyEsError(err: ElasticsearchResponseError) {
|
||||
if (!err.meta) {
|
||||
return [];
|
||||
}
|
||||
const {
|
||||
meta: { body: response },
|
||||
} = err;
|
||||
if (response) {
|
||||
const { error } = JSON.parse(response) as { error?: ESError };
|
||||
const { error } = response;
|
||||
if (error) {
|
||||
const { root_cause: rootCause = [], caused_by: causedBy } = error;
|
||||
|
||||
|
@ -58,3 +76,7 @@ export function identifyEsError(err: { response: string }) {
|
|||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
export function isEsCannotExecuteScriptError(err: ElasticsearchResponseError): boolean {
|
||||
return identifyEsError(err).includes('cannot execute [inline] scripts');
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ import { TaskClaiming, ClaimOwnershipResult } from './queries/task_claiming';
|
|||
import type { TaskClaiming as TaskClaimingClass } from './queries/task_claiming';
|
||||
import { asOk, Err, isErr, isOk, Result } from './lib/result_type';
|
||||
import { FillPoolResult } from './lib/fill_pool';
|
||||
import { ElasticsearchResponseError } from './lib/identify_es_error';
|
||||
|
||||
let mockTaskClaiming = taskClaimingMock.create({});
|
||||
jest.mock('./queries/task_claiming', () => {
|
||||
|
@ -204,12 +205,46 @@ describe('TaskPollingLifecycle', () => {
|
|||
taskClaiming.claimAvailableTasksIfCapacityIsAvailable.mockImplementation(
|
||||
() =>
|
||||
new Observable<Result<ClaimOwnershipResult, FillPoolResult>>((observer) => {
|
||||
observer.error(
|
||||
Object.assign(new Error(), {
|
||||
response:
|
||||
'{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}],"type":"search_phase_execution_exception","reason":"all shards failed","phase":"query","grouped":true,"failed_shards":[{"shard":0,"index":".kibana_task_manager_1","node":"24A4QbjHSK6prvtopAKLKw","reason":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}],"caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts","caused_by":{"type":"illegal_argument_exception","reason":"cannot execute [inline] scripts"}}},"status":400}',
|
||||
})
|
||||
);
|
||||
observer.error({
|
||||
name: 'ResponseError',
|
||||
meta: {
|
||||
body: {
|
||||
error: {
|
||||
root_cause: [
|
||||
{
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
},
|
||||
],
|
||||
type: 'search_phase_execution_exception',
|
||||
reason: 'all shards failed',
|
||||
phase: 'query',
|
||||
grouped: true,
|
||||
failed_shards: [
|
||||
{
|
||||
shard: 0,
|
||||
index: '.kibana_task_manager_1',
|
||||
node: '24A4QbjHSK6prvtopAKLKw',
|
||||
reason: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
},
|
||||
},
|
||||
],
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'cannot execute [inline] scripts',
|
||||
},
|
||||
},
|
||||
},
|
||||
status: 400,
|
||||
},
|
||||
},
|
||||
statusCode: 400,
|
||||
} as ElasticsearchResponseError);
|
||||
})
|
||||
);
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ import {
|
|||
import { TaskPool } from './task_pool';
|
||||
import { TaskManagerRunner, TaskRunner } from './task_running';
|
||||
import { TaskStore } from './task_store';
|
||||
import { identifyEsError } from './lib/identify_es_error';
|
||||
import { identifyEsError, isEsCannotExecuteScriptError } from './lib/identify_es_error';
|
||||
import { BufferedTaskStore } from './buffered_task_store';
|
||||
import { TaskTypeDictionary } from './task_type_dictionary';
|
||||
import { delayOnClaimConflicts } from './polling';
|
||||
|
@ -299,15 +299,16 @@ export function claimAvailableTasks(
|
|||
// we can identify the reason
|
||||
// if we can - we emit an FillPoolResult error rather than erroring out the wrapping Observable
|
||||
// returned by `claimAvailableTasks`
|
||||
if (identifyEsError(ex).includes('cannot execute [inline] scripts')) {
|
||||
if (isEsCannotExecuteScriptError(ex)) {
|
||||
logger.warn(
|
||||
`Task Manager cannot operate when inline scripts are disabled in Elasticsearch`
|
||||
);
|
||||
observer.next(asErr(FillPoolResult.Failed));
|
||||
observer.complete();
|
||||
} else {
|
||||
const esError = identifyEsError(ex);
|
||||
// as we could't identify the reason - we'll error out the wrapping Observable too
|
||||
observer.error(ex);
|
||||
observer.error(esError.length > 0 ? esError : ex);
|
||||
}
|
||||
},
|
||||
() => {
|
||||
|
|
Loading…
Reference in a new issue