[Alerting] Change execution of alerts from async to sync (#97311)

* added ability to run ephemeral tasks

* fixed typing

* added typing on plugin

* WIP

* Fix type issues

* Hook up the ephemeral task into the task runner for actions

* Tasks can now run independently of one another

* Use deferred language

* Refactor taskParams slightly

* Use Promise.all

* Remove deferred logic

* Add config options to limit the amount of tasks executing at once

* Add ephemeral task monitoring

* WIP

* Add single test so far

* Ensure we log after actions have executed

* Remove confusing * 1

* Add logic to ensure we fallback to default enqueueing if the total actions is above the config

* Add additional test

* Fix tests a bit, ensure we log the alerting:actions-execute right away and the tests should listen for alerts:execute

* Better tests

* If the queue is at capacity, attempt to execute the ephemeral task as a regular action

* Ensure we run ephemeral tasks before to avoid them getting stuck in the queue

* Do not handle the promise anymore

* Remove unnecessary code

* Properly handle errors from ephemeral task lifecycle

* moved acitons domain out of alerting and into actions plugin

* Remove some tests

* Fix TS and test issues

* Fix type issues

* Fix more type issues

* Fix more type issues

* Fix jest tests

* Fix more jest tests

* Off by default

* Fix jest tests

* Update config for this suite too

* Start of telemetry code

* Fix types and add missing files

* Fix telemetry schema

* Fix types

* Fix more types

* moved load event emission to pollingcycle and added health stats on Ephemeral tasks

* Add more telemetry data based on new health metrics for the ephemeral queue

* Fix tests and types

* Add separate request capacity for ephemeral queue

* Fix telemetry schema and add tests for usage collection

* track polled tasks by persistence and use in capacity estimation instead of executions

* fixed typing

* Bump default capacity

* added delay metric to ephemeral stats

* Fix bad merge

* Fix tests

* Fix tests

* Fix types

* Skip failing tests

* Exclude ephemeral stats from capacity estimation tests

* PR feedback

* More PR feedback

* PR feedback

* Fix merge conflict

* Try fixing CI

* Fix broken lock file from merge

* Match master

* Add this back

* PR feedback

* Change to queue and add test

* Disable ephemeral queue in tests

* Updated desc

* Comment out ephemeral-specific tests tha require the entire test suite to support ephemeral tasks

* Add clarifying comment

Co-authored-by: Gidi Meir Morris <github@gidi.io>
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Chris Roberson 2021-07-20 13:24:24 -04:00 committed by GitHub
parent 710c17fab6
commit 1f798aac3f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
78 changed files with 4499 additions and 832 deletions

View file

@ -207,4 +207,10 @@ Use `full` to perform hostname verification, `certificate` to skip hostname veri
[[alert-settings]]
==== Alerting settings
You do not need to configure any additional settings to use alerting in {kib}.
[cols="2*<"]
|===
| `xpack.alerting.maxEphemeralActionsPerAlert`
| Sets the number of actions that will be executed ephemerally. To use this, enable ephemeral tasks in task manager first with <<task-manager-settings,`xpack.task_manager.ephemeral_tasks.enabled`>>
|===

View file

@ -37,6 +37,14 @@ Task Manager runs background tasks by polling for work on an interval. You can
`monitored_stats_health_verbose_log.`
`warn_delayed_task_start_in_seconds`
| The amount of seconds we allow a task to delay before printing a warning server log. Defaults to 60.
| `xpack.task_manager.ephemeral_tasks.enabled`
| Enables an experimental feature that executes a limited (and configurable) number of actions in the same task as the alert which triggered them.
These action tasks will reduce the latency of the time it takes an action to run after it's triggered, but are not persisted as SavedObjects.
These non-persisted action tasks have a risk that they won't be run at all if the Kibana instance running them exits unexpectedly. Defaults to false.
| `xpack.task_manager.ephemeral_tasks.request_capacity`
| Sets the size of the ephemeral queue defined above. Defaults to 10.
|===
[float]

View file

@ -21,6 +21,7 @@ const createActionsClientMock = () => {
getBulk: jest.fn(),
execute: jest.fn(),
enqueueExecution: jest.fn(),
ephemeralEnqueuedExecution: jest.fn(),
listTypes: jest.fn(),
isActionTypeEnabled: jest.fn(),
};

View file

@ -44,6 +44,7 @@ const scopedClusterClient = elasticsearchServiceMock.createScopedClusterClient()
const actionExecutor = actionExecutorMock.create();
const authorization = actionsAuthorizationMock.create();
const executionEnqueuer = jest.fn();
const ephemeralExecutionEnqueuer = jest.fn();
const request = httpServerMock.createKibanaRequest();
const auditLogger = auditServiceMock.create().asScoped(request);
@ -77,6 +78,7 @@ beforeEach(() => {
preconfiguredActions: [],
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
auditLogger,
@ -453,6 +455,7 @@ describe('create()', () => {
preconfiguredActions: [],
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
});
@ -553,6 +556,7 @@ describe('get()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -608,6 +612,7 @@ describe('get()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -724,6 +729,7 @@ describe('get()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -793,6 +799,7 @@ describe('getAll()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -930,6 +937,7 @@ describe('getAll()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -1005,6 +1013,7 @@ describe('getBulk()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [
@ -1136,6 +1145,7 @@ describe('getBulk()', () => {
defaultKibanaIndex,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization: (authorization as unknown) as ActionsAuthorization,
preconfiguredActions: [

View file

@ -41,6 +41,7 @@ import {
AuthorizationMode,
} from './authorization/get_authorization_mode_by_source';
import { connectorAuditEvent, ConnectorAuditAction } from './lib/audit_events';
import { RunNowResult } from '../../task_manager/server';
// We are assuming there won't be many actions. This is why we will load
// all the actions in advance and assume the total count to not go over 10000.
@ -68,7 +69,8 @@ interface ConstructorOptions {
unsecuredSavedObjectsClient: SavedObjectsClientContract;
preconfiguredActions: PreConfiguredAction[];
actionExecutor: ActionExecutorContract;
executionEnqueuer: ExecutionEnqueuer;
executionEnqueuer: ExecutionEnqueuer<void>;
ephemeralExecutionEnqueuer: ExecutionEnqueuer<RunNowResult>;
request: KibanaRequest;
authorization: ActionsAuthorization;
auditLogger?: AuditLogger;
@ -88,7 +90,8 @@ export class ActionsClient {
private readonly actionExecutor: ActionExecutorContract;
private readonly request: KibanaRequest;
private readonly authorization: ActionsAuthorization;
private readonly executionEnqueuer: ExecutionEnqueuer;
private readonly executionEnqueuer: ExecutionEnqueuer<void>;
private readonly ephemeralExecutionEnqueuer: ExecutionEnqueuer<RunNowResult>;
private readonly auditLogger?: AuditLogger;
constructor({
@ -99,6 +102,7 @@ export class ActionsClient {
preconfiguredActions,
actionExecutor,
executionEnqueuer,
ephemeralExecutionEnqueuer,
request,
authorization,
auditLogger,
@ -110,6 +114,7 @@ export class ActionsClient {
this.preconfiguredActions = preconfiguredActions;
this.actionExecutor = actionExecutor;
this.executionEnqueuer = executionEnqueuer;
this.ephemeralExecutionEnqueuer = ephemeralExecutionEnqueuer;
this.request = request;
this.authorization = authorization;
this.auditLogger = auditLogger;
@ -497,6 +502,17 @@ export class ActionsClient {
return this.executionEnqueuer(this.unsecuredSavedObjectsClient, options);
}
public async ephemeralEnqueuedExecution(options: EnqueueExecutionOptions): Promise<RunNowResult> {
const { source } = options;
if (
(await getAuthorizationModeBySource(this.unsecuredSavedObjectsClient, source)) ===
AuthorizationMode.RBAC
) {
await this.authorization.ensureAuthorized('execute');
}
return this.ephemeralExecutionEnqueuer(this.unsecuredSavedObjectsClient, options);
}
public async listTypes(): Promise<ActionType[]> {
return this.actionTypeRegistry.list();
}

View file

@ -6,8 +6,13 @@
*/
import { SavedObjectsClientContract } from '../../../../src/core/server';
import { TaskManagerStartContract } from '../../task_manager/server';
import { RawAction, ActionTypeRegistryContract, PreConfiguredAction } from './types';
import { RunNowResult, TaskManagerStartContract } from '../../task_manager/server';
import {
RawAction,
ActionTypeRegistryContract,
PreConfiguredAction,
ActionTaskExecutorParams,
} from './types';
import { ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE } from './constants/saved_objects';
import { ExecuteOptions as ActionExecutorOptions } from './lib/action_executor';
import { isSavedObjectExecutionSource } from './lib';
@ -27,17 +32,17 @@ export interface ExecuteOptions extends Pick<ActionExecutorOptions, 'params' | '
relatedSavedObjects?: RelatedSavedObjects;
}
export type ExecutionEnqueuer = (
export type ExecutionEnqueuer<T> = (
unsecuredSavedObjectsClient: SavedObjectsClientContract,
options: ExecuteOptions
) => Promise<void>;
) => Promise<T>;
export function createExecutionEnqueuerFunction({
taskManager,
actionTypeRegistry,
isESOCanEncrypt,
preconfiguredActions,
}: CreateExecuteFunctionOptions) {
}: CreateExecuteFunctionOptions): ExecutionEnqueuer<void> {
return async function execute(
unsecuredSavedObjectsClient: SavedObjectsClientContract,
{ id, params, spaceId, source, apiKey, relatedSavedObjects }: ExecuteOptions
@ -48,18 +53,10 @@ export function createExecutionEnqueuerFunction({
);
}
const { actionTypeId, name, isMissingSecrets } = await getAction(
unsecuredSavedObjectsClient,
preconfiguredActions,
id
);
if (isMissingSecrets) {
throw new Error(
`Unable to execute action because no secrets are defined for the "${name}" connector.`
);
}
const action = await getAction(unsecuredSavedObjectsClient, preconfiguredActions, id);
validateCanActionBeUsed(action);
const { actionTypeId } = action;
if (!actionTypeRegistry.isActionExecutable(id, actionTypeId, { notifyUsage: true })) {
actionTypeRegistry.ensureActionTypeEnabled(actionTypeId);
}
@ -76,7 +73,7 @@ export function createExecutionEnqueuerFunction({
);
await taskManager.schedule({
taskType: `actions:${actionTypeId}`,
taskType: `actions:${action.actionTypeId}`,
params: {
spaceId,
actionTaskParamsId: actionTaskParamsRecord.id,
@ -87,6 +84,53 @@ export function createExecutionEnqueuerFunction({
};
}
export function createEphemeralExecutionEnqueuerFunction({
taskManager,
actionTypeRegistry,
preconfiguredActions,
}: CreateExecuteFunctionOptions): ExecutionEnqueuer<RunNowResult> {
return async function execute(
unsecuredSavedObjectsClient: SavedObjectsClientContract,
{ id, params, spaceId, source, apiKey }: ExecuteOptions
): Promise<RunNowResult> {
const action = await getAction(unsecuredSavedObjectsClient, preconfiguredActions, id);
validateCanActionBeUsed(action);
const { actionTypeId } = action;
if (!actionTypeRegistry.isActionExecutable(id, actionTypeId, { notifyUsage: true })) {
actionTypeRegistry.ensureActionTypeEnabled(actionTypeId);
}
const taskParams: ActionTaskExecutorParams = {
spaceId,
taskParams: {
actionId: id,
// Saved Objects won't allow us to enforce unknown rather than any
// eslint-disable-next-line @typescript-eslint/no-explicit-any
params: params as Record<string, any>,
...(apiKey ? { apiKey } : {}),
},
...executionSourceAsSavedObjectReferences(source),
};
return taskManager.ephemeralRunNow({
taskType: `actions:${action.actionTypeId}`,
params: taskParams,
state: {},
scope: ['actions'],
});
};
}
function validateCanActionBeUsed(action: PreConfiguredAction | RawAction) {
const { name, isMissingSecrets } = action;
if (isMissingSecrets) {
throw new Error(
`Unable to execute action because no secrets are defined for the "${name}" connector.`
);
}
}
function executionSourceAsSavedObjectReferences(executionSource: ActionExecutorOptions['source']) {
return isSavedObjectExecutionSource(executionSource)
? {

View file

@ -48,6 +48,7 @@ export interface TaskInfo {
export interface ExecuteOptions<Source = unknown> {
actionId: string;
isEphemeral?: boolean;
request: KibanaRequest;
params: Record<string, unknown>;
source?: ActionExecutionSource<Source>;
@ -79,6 +80,7 @@ export class ActionExecutor {
params,
request,
source,
isEphemeral,
taskInfo,
relatedSavedObjects,
}: ExecuteOptions): Promise<ActionTypeExecutorResult<unknown>> {
@ -207,6 +209,7 @@ export class ActionExecutor {
params: validatedParams,
config: validatedConfig,
secrets: validatedSecrets,
isEphemeral,
});
} catch (err) {
rawResult = {

View file

@ -125,6 +125,7 @@ test('executes the task by calling the executor with proper parameters', async (
expect(mockedActionExecutor.execute).toHaveBeenCalledWith({
actionId: '2',
isEphemeral: false,
params: { baz: true },
relatedSavedObjects: [],
request: expect.objectContaining({
@ -250,6 +251,7 @@ test('uses API key when provided', async () => {
expect(mockedActionExecutor.execute).toHaveBeenCalledWith({
actionId: '2',
isEphemeral: false,
params: { baz: true },
relatedSavedObjects: [],
request: expect.objectContaining({
@ -293,6 +295,7 @@ test('uses relatedSavedObjects when provided', async () => {
expect(mockedActionExecutor.execute).toHaveBeenCalledWith({
actionId: '2',
isEphemeral: false,
params: { baz: true },
relatedSavedObjects: [
{
@ -334,14 +337,15 @@ test('sanitizes invalid relatedSavedObjects when provided', async () => {
await taskRunner.run();
expect(mockedActionExecutor.execute).toHaveBeenCalledWith({
actionId: '2',
isEphemeral: false,
params: { baz: true },
relatedSavedObjects: [],
request: expect.objectContaining({
headers: {
// base64 encoded "123:abc"
authorization: 'ApiKey MTIzOmFiYw==',
},
}),
relatedSavedObjects: [],
taskInfo: {
scheduled: new Date(),
},
@ -369,6 +373,7 @@ test(`doesn't use API key when not provided`, async () => {
expect(mockedActionExecutor.execute).toHaveBeenCalledWith({
actionId: '2',
isEphemeral: false,
params: { baz: true },
relatedSavedObjects: [],
request: expect.objectContaining({

View file

@ -16,6 +16,7 @@ import {
KibanaRequest,
SavedObjectReference,
IBasePath,
SavedObject,
} from '../../../../../src/core/server';
import { ActionExecutorContract } from './action_executor';
import { ExecutorError } from './executor_error';
@ -27,6 +28,8 @@ import {
ActionTypeRegistryContract,
SpaceIdToNamespaceFunction,
ActionTypeExecutorResult,
ActionTaskExecutorParams,
isPersistedActionTask,
} from '../types';
import { ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE } from '../constants/saved_objects';
import { asSavedObjectExecutionSource } from './action_execution_source';
@ -78,16 +81,16 @@ export class TaskRunnerFactory {
return {
async run() {
const { spaceId, actionTaskParamsId } = taskInstance.params as Record<string, string>;
const namespace = spaceIdToNamespace(spaceId);
const actionTaskExecutorParams = taskInstance.params as ActionTaskExecutorParams;
const { spaceId } = actionTaskExecutorParams;
const {
attributes: { actionId, params, apiKey, relatedSavedObjects },
references,
} = await encryptedSavedObjectsClient.getDecryptedAsInternalUser<ActionTaskParams>(
ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE,
actionTaskParamsId,
{ namespace }
} = await getActionTaskParams(
actionTaskExecutorParams,
encryptedSavedObjectsClient,
spaceIdToNamespace
);
const requestHeaders: Record<string, string> = {};
@ -119,7 +122,8 @@ export class TaskRunnerFactory {
try {
executorResult = await actionExecutor.execute({
params,
actionId,
actionId: actionId as string,
isEphemeral: !isPersistedActionTask(actionTaskExecutorParams),
request: fakeRequest,
...getSourceFromReferences(references),
taskInfo,
@ -144,26 +148,46 @@ export class TaskRunnerFactory {
}
// Cleanup action_task_params object now that we're done with it
try {
// If the request has reached this far we can assume the user is allowed to run clean up
// We would idealy secure every operation but in order to support clean up of legacy alerts
// we allow this operation in an unsecured manner
// Once support for legacy alert RBAC is dropped, this can be secured
await getUnsecuredSavedObjectsClient(fakeRequest).delete(
ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE,
actionTaskParamsId
);
} catch (e) {
// Log error only, we shouldn't fail the task because of an error here (if ever there's retry logic)
logger.error(
`Failed to cleanup ${ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE} object [id="${actionTaskParamsId}"]: ${e.message}`
);
if (isPersistedActionTask(actionTaskExecutorParams)) {
try {
// If the request has reached this far we can assume the user is allowed to run clean up
// We would idealy secure every operation but in order to support clean up of legacy alerts
// we allow this operation in an unsecured manner
// Once support for legacy alert RBAC is dropped, this can be secured
await getUnsecuredSavedObjectsClient(fakeRequest).delete(
ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE,
actionTaskExecutorParams.actionTaskParamsId
);
} catch (e) {
// Log error only, we shouldn't fail the task because of an error here (if ever there's retry logic)
logger.error(
`Failed to cleanup ${ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE} object [id="${actionTaskExecutorParams.actionTaskParamsId}"]: ${e.message}`
);
}
}
},
};
}
}
async function getActionTaskParams(
executorParams: ActionTaskExecutorParams,
encryptedSavedObjectsClient: EncryptedSavedObjectsClient,
spaceIdToNamespace: SpaceIdToNamespaceFunction
): Promise<Omit<SavedObject<ActionTaskParams>, 'id' | 'type'>> {
const { spaceId } = executorParams;
const namespace = spaceIdToNamespace(spaceId);
if (isPersistedActionTask(executorParams)) {
return encryptedSavedObjectsClient.getDecryptedAsInternalUser<ActionTaskParams>(
ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE,
executorParams.actionTaskParamsId,
{ namespace }
);
} else {
return { attributes: executorParams.taskParams, references: executorParams.references ?? [] };
}
}
function getSourceFromReferences(references: SavedObjectReference[]) {
return pipe(
fromNullable(references.find((ref) => ref.name === 'source')),

View file

@ -38,7 +38,10 @@ import { ActionsConfig, getValidatedConfig } from './config';
import { resolveCustomHosts } from './lib/custom_host_settings';
import { ActionsClient } from './actions_client';
import { ActionTypeRegistry } from './action_type_registry';
import { createExecutionEnqueuerFunction } from './create_execute_function';
import {
createExecutionEnqueuerFunction,
createEphemeralExecutionEnqueuerFunction,
} from './create_execute_function';
import { registerBuiltInActionTypes } from './builtin_action_types';
import { registerActionsUsageCollector } from './usage';
import {
@ -332,6 +335,12 @@ export class ActionsPlugin implements Plugin<PluginSetupContract, PluginStartCon
await getAuthorizationModeBySource(unsecuredSavedObjectsClient, authorizationContext)
),
actionExecutor: actionExecutor!,
ephemeralExecutionEnqueuer: createEphemeralExecutionEnqueuerFunction({
taskManager: plugins.taskManager,
actionTypeRegistry: actionTypeRegistry!,
isESOCanEncrypt: isESOCanEncrypt!,
preconfiguredActions,
}),
executionEnqueuer: createExecutionEnqueuerFunction({
taskManager: plugins.taskManager,
actionTypeRegistry: actionTypeRegistry!,
@ -492,6 +501,12 @@ export class ActionsPlugin implements Plugin<PluginSetupContract, PluginStartCon
request,
authorization: instantiateAuthorization(request),
actionExecutor: actionExecutor!,
ephemeralExecutionEnqueuer: createEphemeralExecutionEnqueuerFunction({
taskManager,
actionTypeRegistry: actionTypeRegistry!,
isESOCanEncrypt: isESOCanEncrypt!,
preconfiguredActions,
}),
executionEnqueuer: createExecutionEnqueuerFunction({
taskManager,
actionTypeRegistry: actionTypeRegistry!,

View file

@ -16,6 +16,7 @@ import {
SavedObjectAttributes,
ElasticsearchClient,
RequestHandlerContext,
SavedObjectReference,
} from '../../../../src/core/server';
import { ActionTypeExecutorResult } from '../common';
export { ActionTypeExecutorResult } from '../common';
@ -57,6 +58,7 @@ export interface ActionTypeExecutorOptions<Config, Secrets, Params> {
config: Config;
secrets: Secrets;
params: Params;
isEphemeral?: boolean;
}
export interface ActionResult<Config extends ActionTypeConfig = ActionTypeConfig> {
@ -132,10 +134,25 @@ export interface ActionTaskParams extends SavedObjectAttributes {
apiKey?: string;
}
export interface ActionTaskExecutorParams {
interface PersistedActionTaskExecutorParams {
spaceId: string;
actionTaskParamsId: string;
}
interface EphemeralActionTaskExecutorParams {
spaceId: string;
taskParams: ActionTaskParams;
references?: SavedObjectReference[];
}
export type ActionTaskExecutorParams =
| PersistedActionTaskExecutorParams
| EphemeralActionTaskExecutorParams;
export function isPersistedActionTask(
actionTask: ActionTaskExecutorParams
): actionTask is PersistedActionTaskExecutorParams {
return typeof (actionTask as PersistedActionTaskExecutorParams).actionTaskParamsId === 'string';
}
export interface ProxySettings {
proxyUrl: string;

View file

@ -19,6 +19,7 @@ describe('config validation', () => {
"interval": "5m",
"removalDelay": "1h",
},
"maxEphemeralActionsPerAlert": 10,
}
`);
});

View file

@ -8,6 +8,7 @@
import { schema, TypeOf } from '@kbn/config-schema';
import { validateDurationSchema } from './lib';
export const DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT = 10;
export const configSchema = schema.object({
healthCheck: schema.object({
interval: schema.string({ validate: validateDurationSchema, defaultValue: '60m' }),
@ -16,6 +17,9 @@ export const configSchema = schema.object({
interval: schema.string({ validate: validateDurationSchema, defaultValue: '5m' }),
removalDelay: schema.string({ validate: validateDurationSchema, defaultValue: '1h' }),
}),
maxEphemeralActionsPerAlert: schema.number({
defaultValue: DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT,
}),
});
export type AlertsConfig = TypeOf<typeof configSchema>;

View file

@ -71,6 +71,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
}),
pollInterval
).subscribe();
@ -104,6 +105,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
}),
pollInterval,
retryDelay
@ -148,6 +150,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
})
).toPromise();
@ -178,6 +181,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
})
).toPromise();
@ -208,6 +212,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
})
).toPromise();
@ -235,6 +240,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
}),
retryDelay
).subscribe((status) => {
@ -265,6 +271,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
}),
retryDelay
).subscribe((status) => {
@ -301,6 +308,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
})
).toPromise();

View file

@ -29,6 +29,7 @@ export type {
AlertInstanceContext,
AlertingApiRequestHandlerContext,
} from './types';
export { DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT } from './config';
export { PluginSetupContract, PluginStartContract } from './plugin';
export { FindResult } from './alerts_client';
export { PublicAlertInstance as AlertInstance } from './alert_instance';

View file

@ -36,6 +36,7 @@ describe('Alerting Plugin', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 10,
});
plugin = new AlertingPlugin(context);
@ -122,6 +123,7 @@ describe('Alerting Plugin', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 10,
});
const plugin = new AlertingPlugin(context);
@ -161,6 +163,7 @@ describe('Alerting Plugin', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 10,
});
const plugin = new AlertingPlugin(context);
@ -214,6 +217,7 @@ describe('Alerting Plugin', () => {
interval: '5m',
removalDelay: '1h',
},
maxEphemeralActionsPerAlert: 100,
});
const plugin = new AlertingPlugin(context);

View file

@ -376,6 +376,8 @@ export class AlertingPlugin {
internalSavedObjectsRepository: core.savedObjects.createInternalRepository(['alert']),
alertTypeRegistry: this.alertTypeRegistry!,
kibanaBaseUrl: this.kibanaBaseUrl,
supportsEphemeralTasks: plugins.taskManager.supportsEphemeralTasks(),
maxEphemeralActionsPerAlert: this.config.then((config) => config.maxEphemeralActionsPerAlert),
});
this.eventLogService!.registerSavedObjectProvider('alert', (request) => {

View file

@ -96,6 +96,8 @@ const createExecutionHandlerParams: jest.Mocked<
contextVal: 'My other {{context.value}} goes here',
stateVal: 'My other {{state.value}} goes here',
},
supportsEphemeralTasks: false,
maxEphemeralActionsPerAlert: Promise.resolve(10),
};
beforeEach(() => {

View file

@ -4,12 +4,11 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Logger, KibanaRequest } from '../../../../../src/core/server';
import { transformActionParams } from './transform_action_params';
import {
PluginStartContract as ActionsPluginStartContract,
asSavedObjectExecutionSource,
PluginStartContract as ActionsPluginStartContract,
} from '../../../actions/server';
import { IEventLogger, IEvent, SAVED_OBJECT_REL_PRIMARY } from '../../../event_log/server';
import { EVENT_LOG_ACTIONS } from '../plugin';
@ -23,6 +22,7 @@ import {
RawAlert,
} from '../types';
import { NormalizedAlertType } from '../alert_type_registry';
import { isEphemeralTaskRejectedDueToCapacityError } from '../../../task_manager/server';
export interface CreateExecutionHandlerOptions<
Params extends AlertTypeParams,
@ -52,6 +52,8 @@ export interface CreateExecutionHandlerOptions<
eventLogger: IEventLogger;
request: KibanaRequest;
alertParams: AlertTypeParams;
supportsEphemeralTasks: boolean;
maxEphemeralActionsPerAlert: Promise<number>;
}
interface ExecutionHandlerOptions<ActionGroupIds extends string> {
@ -87,6 +89,8 @@ export function createExecutionHandler<
eventLogger,
request,
alertParams,
supportsEphemeralTasks,
maxEphemeralActionsPerAlert,
}: CreateExecutionHandlerOptions<
Params,
State,
@ -147,6 +151,8 @@ export function createExecutionHandler<
const alertLabel = `${alertType.id}:${alertId}: '${alertName}'`;
const actionsClient = await actionsPlugin.getActionsClientWithRequest(request);
let ephemeralActionsToSchedule = await maxEphemeralActionsPerAlert;
for (const action of actions) {
if (
!actionsPlugin.isActionExecutable(action.id, action.actionTypeId, { notifyUsage: true })
@ -159,10 +165,7 @@ export function createExecutionHandler<
const namespace = spaceId === 'default' ? {} : { namespace: spaceId };
// TODO would be nice to add the action name here, but it's not available
const actionLabel = `${action.actionTypeId}:${action.id}`;
const actionsClient = await actionsPlugin.getActionsClientWithRequest(request);
await actionsClient.enqueueExecution({
const enqueueOptions = {
id: action.id,
params: action.params,
spaceId,
@ -179,7 +182,20 @@ export function createExecutionHandler<
typeId: alertType.id,
},
],
});
};
// TODO would be nice to add the action name here, but it's not available
const actionLabel = `${action.actionTypeId}:${action.id}`;
if (supportsEphemeralTasks && ephemeralActionsToSchedule > 0) {
ephemeralActionsToSchedule--;
actionsClient.ephemeralEnqueuedExecution(enqueueOptions).catch(async (err) => {
if (isEphemeralTaskRejectedDueToCapacityError(err)) {
await actionsClient.enqueueExecution(enqueueOptions);
}
});
} else {
await actionsClient.enqueueExecution(enqueueOptions);
}
const event: IEvent = {
event: {

File diff suppressed because it is too large Load diff

View file

@ -190,6 +190,8 @@ export class TaskRunner<
eventLogger: this.context.eventLogger,
request: this.getFakeKibanaRequest(spaceId, apiKey),
alertParams,
supportsEphemeralTasks: this.context.supportsEphemeralTasks,
maxEphemeralActionsPerAlert: this.context.maxEphemeralActionsPerAlert,
});
}

View file

@ -79,6 +79,8 @@ describe('Task Runner Factory', () => {
internalSavedObjectsRepository: savedObjectsRepositoryMock.create(),
alertTypeRegistry: alertTypeRegistryMock.create(),
kibanaBaseUrl: 'https://localhost:5601',
supportsEphemeralTasks: true,
maxEphemeralActionsPerAlert: new Promise((resolve) => resolve(10)),
};
beforeEach(() => {

View file

@ -41,6 +41,8 @@ export interface TaskRunnerContext {
internalSavedObjectsRepository: ISavedObjectsRepository;
alertTypeRegistry: AlertTypeRegistry;
kibanaBaseUrl: string | undefined;
supportsEphemeralTasks: boolean;
maxEphemeralActionsPerAlert: Promise<number>;
}
export class TaskRunnerFactory {

View file

@ -4,5 +4,6 @@
"version": "8.0.0",
"kibanaVersion": "kibana",
"configPath": ["xpack", "task_manager"],
"optionalPlugins": ["usageCollection"],
"ui": false
}

View file

@ -13,6 +13,10 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"enabled": true,
"ephemeral_tasks": Object {
"enabled": false,
"request_capacity": 10,
},
"index": ".kibana_task_manager",
"max_attempts": 3,
"max_poll_inactivity_cycles": 10,
@ -65,6 +69,10 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"enabled": true,
"ephemeral_tasks": Object {
"enabled": false,
"request_capacity": 10,
},
"index": ".kibana_task_manager",
"max_attempts": 3,
"max_poll_inactivity_cycles": 10,
@ -104,6 +112,10 @@ describe('config validation', () => {
expect(configSchema.validate(config)).toMatchInlineSnapshot(`
Object {
"enabled": true,
"ephemeral_tasks": Object {
"enabled": false,
"request_capacity": 10,
},
"index": ".kibana_task_manager",
"max_attempts": 3,
"max_poll_inactivity_cycles": 10,

View file

@ -12,6 +12,7 @@ export const DEFAULT_MAX_WORKERS = 10;
export const DEFAULT_POLL_INTERVAL = 3000;
export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10;
export const DEFAULT_VERSION_CONFLICT_THRESHOLD = 80;
export const DEFAULT_MAX_EPHEMERAL_REQUEST_CAPACITY = MAX_WORKERS_LIMIT;
// Monitoring Constants
// ===================
@ -117,6 +118,16 @@ export const configSchema = schema.object(
defaultValue: DEFAULT_MONITORING_STATS_WARN_DELAYED_TASK_START_IN_SECONDS,
}),
}),
ephemeral_tasks: schema.object({
enabled: schema.boolean({ defaultValue: false }),
/* How many requests can Task Manager buffer before it rejects new requests. */
request_capacity: schema.number({
// a nice round contrived number, feel free to change as we learn how it behaves
defaultValue: 10,
min: 1,
max: DEFAULT_MAX_EPHEMERAL_REQUEST_CAPACITY,
}),
}),
},
{
validate: (config) => {

View file

@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle';
import { TaskLifecycleEvent } from './polling_lifecycle';
import { of, Observable } from 'rxjs';
export const ephemeralTaskLifecycleMock = {
create(opts: { events$?: Observable<TaskLifecycleEvent>; getQueuedTasks?: () => number }) {
return ({
attemptToRun: jest.fn(),
get events() {
return opts.events$ ?? of();
},
get queuedTasks() {
return opts.getQueuedTasks ? opts.getQueuedTasks() : 0;
},
} as unknown) as jest.Mocked<EphemeralTaskLifecycle>;
},
};

View file

@ -0,0 +1,396 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import _ from 'lodash';
import { Subject } from 'rxjs';
import { TaskLifecycleEvent } from './polling_lifecycle';
import { createInitialMiddleware } from './lib/middleware';
import { TaskTypeDictionary } from './task_type_dictionary';
import { mockLogger } from './test_utils';
import { asErr, asOk } from './lib/result_type';
import { FillPoolResult } from './lib/fill_pool';
import { EphemeralTaskLifecycle, EphemeralTaskLifecycleOpts } from './ephemeral_task_lifecycle';
import { ConcreteTaskInstance, TaskStatus } from './task';
import uuid from 'uuid';
import { asTaskPollingCycleEvent, asTaskRunEvent, TaskPersistence } from './task_events';
import { TaskRunResult } from './task_running';
import { TaskPoolRunResult } from './task_pool';
import { TaskPoolMock } from './task_pool.mock';
describe('EphemeralTaskLifecycle', () => {
function initTaskLifecycleParams({
config,
...optOverrides
}: {
config?: Partial<EphemeralTaskLifecycleOpts['config']>;
} & Partial<Omit<EphemeralTaskLifecycleOpts, 'config'>> = {}) {
const taskManagerLogger = mockLogger();
const poolCapacity = jest.fn();
const pool = TaskPoolMock.create(poolCapacity);
const lifecycleEvent$ = new Subject<TaskLifecycleEvent>();
const elasticsearchAndSOAvailability$ = new Subject<boolean>();
const opts: EphemeralTaskLifecycleOpts = {
logger: taskManagerLogger,
definitions: new TaskTypeDictionary(taskManagerLogger),
config: {
enabled: true,
max_workers: 10,
index: 'foo',
max_attempts: 9,
poll_interval: 6000000,
version_conflict_threshold: 80,
max_poll_inactivity_cycles: 10,
request_capacity: 1000,
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_required_freshness: 5000,
monitored_stats_running_average_window: 50,
monitored_stats_health_verbose_log: {
enabled: true,
warn_delayed_task_start_in_seconds: 60,
},
monitored_task_execution_thresholds: {
default: {
error_threshold: 90,
warn_threshold: 80,
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
...config,
},
elasticsearchAndSOAvailability$,
pool,
lifecycleEvent: lifecycleEvent$,
middleware: createInitialMiddleware(),
...optOverrides,
};
opts.definitions.registerTaskDefinitions({
foo: {
title: 'foo',
createTaskRunner: jest.fn(),
},
});
pool.run.mockResolvedValue(Promise.resolve(TaskPoolRunResult.RunningAllClaimedTasks));
return { poolCapacity, lifecycleEvent$, pool, elasticsearchAndSOAvailability$, opts };
}
describe('constructor', () => {
test('avoids unnecesery subscription if ephemeral tasks are disabled', () => {
const { opts } = initTaskLifecycleParams({
config: {
ephemeral_tasks: {
enabled: false,
request_capacity: 10,
},
},
});
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const task = mockTask();
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asErr(task));
});
test('queues up tasks when ephemeral tasks are enabled', () => {
const { opts } = initTaskLifecycleParams();
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const task = mockTask();
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
});
test('rejects tasks when ephemeral tasks are enabled and queue is full', () => {
const { opts } = initTaskLifecycleParams({
config: { ephemeral_tasks: { enabled: true, request_capacity: 2 } },
});
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const task = mockTask();
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
const task2 = mockTask();
expect(ephemeralTaskLifecycle.attemptToRun(task2)).toMatchObject(asOk(task2));
const rejectedTask = mockTask();
expect(ephemeralTaskLifecycle.attemptToRun(rejectedTask)).toMatchObject(asErr(rejectedTask));
});
test('pulls tasks off queue when a polling cycle completes', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const task = mockTask({ id: `my-phemeral-task` });
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
poolCapacity.mockReturnValue({
availableWorkers: 10,
});
lifecycleEvent$.next(
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
);
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(1);
expect(`${taskRunners[0]}`).toMatchInlineSnapshot(`"foo \\"my-phemeral-task\\" (Ephemeral)"`);
});
test('pulls tasks off queue when a task run completes', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const task = mockTask({ id: `my-phemeral-task` });
expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task));
poolCapacity.mockReturnValue({
availableWorkers: 10,
});
lifecycleEvent$.next(
asTaskRunEvent(
uuid.v4(),
asOk({
task: mockTask(),
result: TaskRunResult.Success,
persistence: TaskPersistence.Ephemeral,
})
)
);
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(1);
expect(`${taskRunners[0]}`).toMatchInlineSnapshot(`"foo \\"my-phemeral-task\\" (Ephemeral)"`);
});
test('pulls as many tasks off queue as it has capacity for', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const tasks = [mockTask(), mockTask(), mockTask()];
expect(ephemeralTaskLifecycle.attemptToRun(tasks[0])).toMatchObject(asOk(tasks[0]));
expect(ephemeralTaskLifecycle.attemptToRun(tasks[1])).toMatchObject(asOk(tasks[1]));
expect(ephemeralTaskLifecycle.attemptToRun(tasks[2])).toMatchObject(asOk(tasks[2]));
poolCapacity.mockReturnValue({
availableWorkers: 2,
});
lifecycleEvent$.next(
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
);
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(2);
expect(`${taskRunners[0]}`).toEqual(`foo "${tasks[0].id}" (Ephemeral)`);
expect(`${taskRunners[1]}`).toEqual(`foo "${tasks[1].id}" (Ephemeral)`);
});
test('pulls only as many tasks of the same type as is allowed by maxConcurrency', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
opts.definitions.registerTaskDefinitions({
report: {
title: 'report',
maxConcurrency: 1,
createTaskRunner: jest.fn(),
},
});
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const firstLimitedTask = mockTask({ taskType: 'report' });
const secondLimitedTask = mockTask({ taskType: 'report' });
// both are queued
expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject(
asOk(firstLimitedTask)
);
expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject(
asOk(secondLimitedTask)
);
// pool has capacity for both
poolCapacity.mockReturnValue({
availableWorkers: 10,
});
pool.getOccupiedWorkersByType.mockReturnValue(0);
lifecycleEvent$.next(
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
);
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(1);
expect(`${taskRunners[0]}`).toEqual(`report "${firstLimitedTask.id}" (Ephemeral)`);
});
test('when pulling tasks from the queue, it takes into account the maxConcurrency of tasks that are already in the pool', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
opts.definitions.registerTaskDefinitions({
report: {
title: 'report',
maxConcurrency: 1,
createTaskRunner: jest.fn(),
},
});
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const firstLimitedTask = mockTask({ taskType: 'report' });
const secondLimitedTask = mockTask({ taskType: 'report' });
// both are queued
expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject(
asOk(firstLimitedTask)
);
expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject(
asOk(secondLimitedTask)
);
// pool has capacity in general
poolCapacity.mockReturnValue({
availableWorkers: 2,
});
// but when we ask how many it has occupied by type - wee always have one worker already occupied by that type
pool.getOccupiedWorkersByType.mockReturnValue(1);
lifecycleEvent$.next(
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
);
expect(pool.run).toHaveBeenCalledTimes(0);
// now we release the worker in the pool and cause another cycle in the epheemral queue
pool.getOccupiedWorkersByType.mockReturnValue(0);
lifecycleEvent$.next(
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))
);
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(1);
expect(`${taskRunners[0]}`).toEqual(`report "${firstLimitedTask.id}" (Ephemeral)`);
});
});
test('pulls tasks with both maxConcurrency and unlimited concurrency', () => {
const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
opts.definitions.registerTaskDefinitions({
report: {
title: 'report',
maxConcurrency: 1,
createTaskRunner: jest.fn(),
},
});
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const fooTasks = [mockTask(), mockTask(), mockTask()];
expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[0])).toMatchObject(asOk(fooTasks[0]));
const firstLimitedTask = mockTask({ taskType: 'report' });
expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject(
asOk(firstLimitedTask)
);
expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[1])).toMatchObject(asOk(fooTasks[1]));
const secondLimitedTask = mockTask({ taskType: 'report' });
expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject(
asOk(secondLimitedTask)
);
expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[2])).toMatchObject(asOk(fooTasks[2]));
// pool has capacity for all
poolCapacity.mockReturnValue({
availableWorkers: 10,
});
pool.getOccupiedWorkersByType.mockReturnValue(0);
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
expect(pool.run).toHaveBeenCalledTimes(1);
const taskRunners = pool.run.mock.calls[0][0];
expect(taskRunners).toHaveLength(4);
const asStrings = taskRunners.map((taskRunner) => `${taskRunner}`);
expect(asStrings).toContain(`foo "${fooTasks[0].id}" (Ephemeral)`);
expect(asStrings).toContain(`report "${firstLimitedTask.id}" (Ephemeral)`);
expect(asStrings).toContain(`foo "${fooTasks[1].id}" (Ephemeral)`);
expect(asStrings).toContain(`foo "${fooTasks[2].id}" (Ephemeral)`);
});
test('properly removes from the queue after pulled', () => {
const { poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams();
const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts);
const tasks = [mockTask(), mockTask(), mockTask()];
expect(ephemeralTaskLifecycle.attemptToRun(tasks[0])).toMatchObject(asOk(tasks[0]));
expect(ephemeralTaskLifecycle.attemptToRun(tasks[1])).toMatchObject(asOk(tasks[1]));
expect(ephemeralTaskLifecycle.attemptToRun(tasks[2])).toMatchObject(asOk(tasks[2]));
expect(ephemeralTaskLifecycle.queuedTasks).toBe(3);
poolCapacity.mockReturnValue({
availableWorkers: 1,
});
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
expect(ephemeralTaskLifecycle.queuedTasks).toBe(2);
poolCapacity.mockReturnValue({
availableWorkers: 1,
});
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
expect(ephemeralTaskLifecycle.queuedTasks).toBe(1);
poolCapacity.mockReturnValue({
availableWorkers: 1,
});
lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })));
expect(ephemeralTaskLifecycle.queuedTasks).toBe(0);
});
});
function mockTask(overrides: Partial<ConcreteTaskInstance> = {}): ConcreteTaskInstance {
return {
id: uuid.v4(),
runAt: new Date(),
taskType: 'foo',
schedule: undefined,
attempts: 0,
status: TaskStatus.Idle,
params: { hello: 'world' },
state: { baby: 'Henhen' },
user: 'jimbo',
scope: ['reporting'],
ownerId: '',
startedAt: null,
retryAt: null,
scheduledAt: new Date(),
...overrides,
};
}

View file

@ -0,0 +1,205 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Subject, Observable, Subscription } from 'rxjs';
import { filter } from 'rxjs/operators';
import { Logger } from '../../../../src/core/server';
import { Result, asErr, asOk } from './lib/result_type';
import { TaskManagerConfig } from './config';
import { asTaskManagerStatEvent, isTaskRunEvent, isTaskPollingCycleEvent } from './task_events';
import { Middleware } from './lib/middleware';
import { EphemeralTaskInstance } from './task';
import { TaskTypeDictionary } from './task_type_dictionary';
import { TaskLifecycleEvent } from './polling_lifecycle';
import { EphemeralTaskManagerRunner } from './task_running/ephemeral_task_runner';
import { TaskPool } from './task_pool';
export interface EphemeralTaskLifecycleOpts {
logger: Logger;
definitions: TaskTypeDictionary;
config: TaskManagerConfig;
middleware: Middleware;
elasticsearchAndSOAvailability$: Observable<boolean>;
pool: TaskPool;
lifecycleEvent: Observable<TaskLifecycleEvent>;
}
export type EphemeralTaskInstanceRequest = Omit<EphemeralTaskInstance, 'startedAt'>;
export class EphemeralTaskLifecycle {
private definitions: TaskTypeDictionary;
private pool: TaskPool;
private lifecycleEvent: Observable<TaskLifecycleEvent>;
// all task related events (task claimed, task marked as running, etc.) are emitted through events$
private events$ = new Subject<TaskLifecycleEvent>();
private ephemeralTaskQueue: Array<{
task: EphemeralTaskInstanceRequest;
enqueuedAt: number;
}> = [];
private logger: Logger;
private config: TaskManagerConfig;
private middleware: Middleware;
private lifecycleSubscription: Subscription = Subscription.EMPTY;
constructor({
logger,
middleware,
definitions,
pool,
lifecycleEvent,
config,
}: EphemeralTaskLifecycleOpts) {
this.logger = logger;
this.middleware = middleware;
this.definitions = definitions;
this.pool = pool;
this.lifecycleEvent = lifecycleEvent;
this.config = config;
if (this.enabled) {
this.lifecycleSubscription = this.lifecycleEvent
.pipe(
filter((e) => {
const hasPollingCycleCompleted = isTaskPollingCycleEvent(e);
if (hasPollingCycleCompleted) {
this.emitEvent(
asTaskManagerStatEvent('queuedEphemeralTasks', asOk(this.queuedTasks))
);
}
return (
// when a polling cycle or a task run have just completed
(hasPollingCycleCompleted || isTaskRunEvent(e)) &&
// we want to know when the queue has ephemeral task run requests
this.queuedTasks > 0 &&
this.getCapacity() > 0
);
})
)
.subscribe(async (e) => {
let overallCapacity = this.getCapacity();
const capacityByType = new Map<string, number>();
const tasksWithinCapacity = [...this.ephemeralTaskQueue]
.filter(({ task }) => {
if (overallCapacity > 0) {
if (!capacityByType.has(task.taskType)) {
capacityByType.set(task.taskType, this.getCapacity(task.taskType));
}
if (capacityByType.get(task.taskType)! > 0) {
overallCapacity--;
capacityByType.set(task.taskType, capacityByType.get(task.taskType)! - 1);
return true;
}
}
})
.map((ephemeralTask) => {
const index = this.ephemeralTaskQueue.indexOf(ephemeralTask);
if (index >= 0) {
this.ephemeralTaskQueue.splice(index, 1);
}
this.emitEvent(
asTaskManagerStatEvent(
'ephemeralTaskDelay',
asOk(Date.now() - ephemeralTask.enqueuedAt)
)
);
return this.createTaskRunnerForTask(ephemeralTask.task);
});
if (tasksWithinCapacity.length) {
this.pool
.run(tasksWithinCapacity)
.then((successTaskPoolRunResult) => {
this.logger.debug(
`Successful ephemeral task lifecycle resulted in: ${successTaskPoolRunResult}`
);
})
.catch((error) => {
this.logger.debug(`Failed ephemeral task lifecycle resulted in: ${error}`);
});
}
});
}
}
public get enabled(): boolean {
return this.config.ephemeral_tasks.enabled;
}
public get events(): Observable<TaskLifecycleEvent> {
return this.events$;
}
private getCapacity = (taskType?: string) =>
taskType && this.definitions.get(taskType)?.maxConcurrency
? Math.max(
Math.min(
this.pool.availableWorkers,
this.definitions.get(taskType)!.maxConcurrency! -
this.pool.getOccupiedWorkersByType(taskType)
),
0
)
: this.pool.availableWorkers;
private emitEvent = (event: TaskLifecycleEvent) => {
this.events$.next(event);
};
public attemptToRun(task: EphemeralTaskInstanceRequest) {
if (this.lifecycleSubscription.closed) {
return asErr(task);
}
return pushIntoSetWithTimestamp(
this.ephemeralTaskQueue,
this.config.ephemeral_tasks.request_capacity,
task
);
}
public get queuedTasks() {
return this.ephemeralTaskQueue.length;
}
private createTaskRunnerForTask = (
instance: EphemeralTaskInstanceRequest
): EphemeralTaskManagerRunner => {
return new EphemeralTaskManagerRunner({
logger: this.logger,
instance: {
...instance,
startedAt: new Date(),
},
definitions: this.definitions,
beforeRun: this.middleware.beforeRun,
beforeMarkRunning: this.middleware.beforeMarkRunning,
onTaskEvent: this.emitEvent,
});
};
}
/**
* Pushes values into a bounded set
* @param set A Set of generic type T
* @param maxCapacity How many values are we allowed to push into the set
* @param value A value T to push into the set if it is there
*/
function pushIntoSetWithTimestamp(
set: Array<{
task: EphemeralTaskInstanceRequest;
enqueuedAt: number;
}>,
maxCapacity: number,
task: EphemeralTaskInstanceRequest
): Result<EphemeralTaskInstanceRequest, EphemeralTaskInstanceRequest> {
if (set.length >= maxCapacity) {
return asErr(task);
}
set.push({ task, enqueuedAt: Date.now() });
return asOk(task);
}

View file

@ -15,13 +15,19 @@ export const plugin = (initContext: PluginInitializerContext) => new TaskManager
export {
TaskInstance,
ConcreteTaskInstance,
EphemeralTask,
TaskRunCreatorFunction,
TaskStatus,
RunContext,
} from './task';
export { asInterval } from './lib/intervals';
export { isUnrecoverableError, throwUnrecoverableError } from './task_running';
export {
isUnrecoverableError,
throwUnrecoverableError,
isEphemeralTaskRejectedDueToCapacityError,
} from './task_running';
export { RunNowResult } from './task_scheduling';
export {
TaskManagerPlugin as TaskManager,

View file

@ -51,11 +51,17 @@ describe('managed configuration', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
});
logger = context.logger.get('taskManager');
const taskManager = new TaskManagerPlugin(context);
(await taskManager.setup(coreMock.createSetup())).registerTaskDefinitions({
(
await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined })
).registerTaskDefinitions({
foo: {
title: 'Foo',
createTaskRunner: jest.fn(),

View file

@ -8,10 +8,10 @@ import { merge } from 'lodash';
import { loggingSystemMock } from 'src/core/server/mocks';
import { configSchema, TaskManagerConfig } from '../config';
import { HealthStatus } from '../monitoring';
import { TaskPersistence } from '../monitoring/task_run_statistics';
import { MonitoredHealth } from '../routes/health';
import { logHealthMetrics, resetLastLogLevel } from './log_health_metrics';
import { Logger } from '../../../../../src/core/server';
import { TaskPersistence } from '../task_events';
jest.mock('./calculate_health_status', () => ({
calculateHealthStatus: jest.fn(),

View file

@ -23,8 +23,10 @@ const createStartMock = () => {
remove: jest.fn(),
schedule: jest.fn(),
runNow: jest.fn(),
ephemeralRunNow: jest.fn(),
ensureScheduled: jest.fn(),
removeIfExists: jest.fn(),
supportsEphemeralTasks: jest.fn(),
};
return mock;
};

View file

@ -835,6 +835,30 @@ function mockStats(
runtime: Partial<Required<RawMonitoringStats['stats']>['runtime']['value']> = {}
): CapacityEstimationParams {
return {
ephemeral: {
status: HealthStatus.OK,
timestamp: new Date().toISOString(),
value: {
load: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
executionsPerCycle: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
queuedTasks: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
},
},
configuration: {
status: HealthStatus.OK,
timestamp: new Date().toISOString(),

View file

@ -100,6 +100,7 @@ export function estimateCapacity(
percentageOfExecutionsUsedByRecurringTasks + percentageOfExecutionsUsedByNonRecurringTasks
)
);
/**
* On average, how much of this kibana's capacity has been historically used to execute
* non-recurring and ephemeral tasks
@ -147,7 +148,7 @@ export function estimateCapacity(
*/
const minRequiredKibanaInstances = Math.ceil(
hasTooLittleCapacityToEstimateRequiredNonRecurringCapacity
? /*
? /*
if load is at 100% or there's no capacity for recurring tasks at the moment, then it's really difficult for us to assess how
much capacity is needed for non-recurring tasks at normal times. This might be representative, but it might
also be a spike and we have no way of knowing that. We'll recommend people scale up by 20% and go from there. */
@ -182,7 +183,6 @@ export function estimateCapacity(
const assumedRequiredThroughputPerMinutePerKibana =
averageCapacityUsedByNonRecurringAndEphemeralTasksPerKibana +
averageRecurringRequiredPerMinute / assumedKibanaInstances;
return {
status:
assumedRequiredThroughputPerMinutePerKibana < capacityPerMinutePerKibana

View file

@ -35,6 +35,10 @@ describe('Configuration Statistics Aggregator', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
};
const managedConfig = {

View file

@ -0,0 +1,384 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import uuid from 'uuid';
import { Subject, Observable } from 'rxjs';
import stats from 'stats-lite';
import { take, bufferCount, skip, map } from 'rxjs/operators';
import { ConcreteTaskInstance, TaskStatus } from '../task';
import {
asTaskRunEvent,
TaskTiming,
asTaskManagerStatEvent,
TaskPersistence,
} from '../task_events';
import { asOk } from '../lib/result_type';
import { TaskLifecycleEvent } from '../polling_lifecycle';
import { TaskRunResult } from '../task_running';
import {
createEphemeralTaskAggregator,
summarizeEphemeralStat,
SummarizedEphemeralTaskStat,
EphemeralTaskStat,
} from './ephemeral_task_statistics';
import { AggregatedStat } from './runtime_statistics_aggregator';
import { ephemeralTaskLifecycleMock } from '../ephemeral_task_lifecycle.mock';
import { times, takeRight, take as takeLeft } from 'lodash';
describe('Ephemeral Task Statistics', () => {
test('returns the average size of the ephemeral queue', async () => {
const queueSize = [2, 6, 10, 10, 10, 6, 2, 0, 0];
const events$ = new Subject<TaskLifecycleEvent>();
const getQueuedTasks = jest.fn();
const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
getQueuedTasks,
});
const runningAverageWindowSize = 5;
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
runningAverageWindowSize,
10
);
function expectWindowEqualsUpdate(
taskStat: AggregatedStat<SummarizedEphemeralTaskStat>,
window: number[]
) {
expect(taskStat.value.queuedTasks).toMatchObject({
p50: stats.percentile(window, 0.5),
p90: stats.percentile(window, 0.9),
p95: stats.percentile(window, 0.95),
p99: stats.percentile(window, 0.99),
});
}
return new Promise<void>((resolve) => {
ephemeralTaskAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeEphemeralStat' to receive summarize stats
map(({ key, value }: AggregatedStat<EphemeralTaskStat>) => ({
key,
value: summarizeEphemeralStat(value).value,
})),
take(queueSize.length),
bufferCount(queueSize.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedEphemeralTaskStat>>) => {
expectWindowEqualsUpdate(taskStats[0], queueSize.slice(0, 1));
expectWindowEqualsUpdate(taskStats[1], queueSize.slice(0, 2));
expectWindowEqualsUpdate(taskStats[2], queueSize.slice(0, 3));
expectWindowEqualsUpdate(taskStats[3], queueSize.slice(0, 4));
expectWindowEqualsUpdate(taskStats[4], queueSize.slice(0, 5));
// from the 6th value, begin to drop old values as out window is 5
expectWindowEqualsUpdate(taskStats[5], queueSize.slice(1, 6));
expectWindowEqualsUpdate(taskStats[6], queueSize.slice(2, 7));
expectWindowEqualsUpdate(taskStats[7], queueSize.slice(3, 8));
resolve();
});
for (const size of queueSize) {
events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(size)));
}
});
});
test('returns the average number of ephemeral tasks executed per polling cycle', async () => {
const tasksQueueSize = [5, 2, 5, 0];
const executionsPerCycle = [5, 0, 5];
// we expect one event per "task queue size event", and we simmulate
// tasks being drained after each one of theseevents, so we expect
// the first cycle to show zero drained tasks
const expectedTasksDrainedEvents = [0, ...executionsPerCycle];
const events$ = new Subject<TaskLifecycleEvent>();
const getQueuedTasks = jest.fn();
const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
getQueuedTasks,
});
const runningAverageWindowSize = 5;
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
runningAverageWindowSize,
10
);
function expectWindowEqualsUpdate(
taskStat: AggregatedStat<SummarizedEphemeralTaskStat>,
window: number[]
) {
expect(taskStat.value.executionsPerCycle).toMatchObject({
p50: stats.percentile(window, 0.5),
p90: stats.percentile(window, 0.9),
p95: stats.percentile(window, 0.95),
p99: stats.percentile(window, 0.99),
});
}
return new Promise<void>((resolve) => {
ephemeralTaskAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeEphemeralStat' to receive summarize stats
map(({ key, value }: AggregatedStat<EphemeralTaskStat>) => ({
key,
value: summarizeEphemeralStat(value).value,
})),
take(tasksQueueSize.length),
bufferCount(tasksQueueSize.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedEphemeralTaskStat>>) => {
taskStats.forEach((taskStat, index) => {
expectWindowEqualsUpdate(
taskStat,
takeRight(takeLeft(expectedTasksDrainedEvents, index + 1), runningAverageWindowSize)
);
});
resolve();
});
for (const tasksDrainedInCycle of executionsPerCycle) {
events$.next(
asTaskManagerStatEvent('queuedEphemeralTasks', asOk(tasksQueueSize.shift() ?? 0))
);
times(tasksDrainedInCycle, () => {
events$.next(mockTaskRunEvent());
});
}
events$.next(
asTaskManagerStatEvent('queuedEphemeralTasks', asOk(tasksQueueSize.shift() ?? 0))
);
});
});
test('returns the average load added per polling cycle cycle by ephemeral tasks', async () => {
const tasksExecuted = [0, 5, 10, 10, 10, 5, 5, 0, 0, 0, 0, 0];
const expectedLoad = [0, 50, 100, 100, 100, 50, 50, 0, 0, 0, 0, 0];
const events$ = new Subject<TaskLifecycleEvent>();
const getQueuedTasks = jest.fn();
const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
getQueuedTasks,
});
const runningAverageWindowSize = 5;
const maxWorkers = 10;
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
runningAverageWindowSize,
maxWorkers
);
function expectWindowEqualsUpdate(
taskStat: AggregatedStat<SummarizedEphemeralTaskStat>,
window: number[]
) {
expect(taskStat.value.load).toMatchObject({
p50: stats.percentile(window, 0.5),
p90: stats.percentile(window, 0.9),
p95: stats.percentile(window, 0.95),
p99: stats.percentile(window, 0.99),
});
}
return new Promise<void>((resolve) => {
ephemeralTaskAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeEphemeralStat' to receive summarize stats
map(({ key, value }: AggregatedStat<EphemeralTaskStat>) => ({
key,
value: summarizeEphemeralStat(value).value,
})),
take(tasksExecuted.length),
bufferCount(tasksExecuted.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedEphemeralTaskStat>>) => {
taskStats.forEach((taskStat, index) => {
expectWindowEqualsUpdate(
taskStat,
takeRight(takeLeft(expectedLoad, index + 1), runningAverageWindowSize)
);
});
resolve();
});
for (const tasksExecutedInCycle of tasksExecuted) {
times(tasksExecutedInCycle, () => {
events$.next(mockTaskRunEvent());
});
events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(0)));
}
});
});
});
test('returns the average load added per polling cycle cycle by ephemeral tasks when load exceeds max workers', async () => {
const tasksExecuted = [0, 5, 10, 20, 15, 10, 5, 0, 0, 0, 0, 0];
const expectedLoad = [0, 50, 100, 200, 150, 100, 50, 0, 0, 0, 0, 0];
const events$ = new Subject<TaskLifecycleEvent>();
const getQueuedTasks = jest.fn();
const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
getQueuedTasks,
});
const runningAverageWindowSize = 5;
const maxWorkers = 10;
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
runningAverageWindowSize,
maxWorkers
);
function expectWindowEqualsUpdate(
taskStat: AggregatedStat<SummarizedEphemeralTaskStat>,
window: number[]
) {
expect(taskStat.value.load).toMatchObject({
p50: stats.percentile(window, 0.5),
p90: stats.percentile(window, 0.9),
p95: stats.percentile(window, 0.95),
p99: stats.percentile(window, 0.99),
});
}
return new Promise<void>((resolve) => {
ephemeralTaskAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeEphemeralStat' to receive summarize stats
map(({ key, value }: AggregatedStat<EphemeralTaskStat>) => ({
key,
value: summarizeEphemeralStat(value).value,
})),
take(tasksExecuted.length),
bufferCount(tasksExecuted.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedEphemeralTaskStat>>) => {
taskStats.forEach((taskStat, index) => {
expectWindowEqualsUpdate(
taskStat,
takeRight(takeLeft(expectedLoad, index + 1), runningAverageWindowSize)
);
});
resolve();
});
for (const tasksExecutedInCycle of tasksExecuted) {
times(tasksExecutedInCycle, () => {
events$.next(mockTaskRunEvent());
});
events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(0)));
}
});
});
test('returns the average delay experienced by tasks in the ephemeral queue', async () => {
const taskDelays = [100, 150, 500, 100, 100, 200, 2000, 10000, 20000, 100];
const events$ = new Subject<TaskLifecycleEvent>();
const getQueuedTasks = jest.fn();
const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
getQueuedTasks,
});
const runningAverageWindowSize = 5;
const ephemeralTaskAggregator = createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
runningAverageWindowSize,
10
);
function expectWindowEqualsUpdate(
taskStat: AggregatedStat<SummarizedEphemeralTaskStat>,
window: number[]
) {
expect(taskStat.value.delay).toMatchObject({
p50: stats.percentile(window, 0.5),
p90: stats.percentile(window, 0.9),
p95: stats.percentile(window, 0.95),
p99: stats.percentile(window, 0.99),
});
}
return new Promise<void>((resolve) => {
ephemeralTaskAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeEphemeralStat' to receive summarize stats
map(({ key, value }: AggregatedStat<EphemeralTaskStat>) => ({
key,
value: summarizeEphemeralStat(value).value,
})),
take(taskDelays.length),
bufferCount(taskDelays.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedEphemeralTaskStat>>) => {
taskStats.forEach((taskStat, index) => {
expectWindowEqualsUpdate(
taskStat,
takeRight(takeLeft(taskDelays, index + 1), runningAverageWindowSize)
);
});
resolve();
});
for (const delay of taskDelays) {
events$.next(asTaskManagerStatEvent('ephemeralTaskDelay', asOk(delay)));
}
});
});
const mockTaskRunEvent = (
overrides: Partial<ConcreteTaskInstance> = {},
timing: TaskTiming = {
start: 0,
stop: 0,
},
result: TaskRunResult = TaskRunResult.Success
) => {
const task = mockTaskInstance(overrides);
const persistence = TaskPersistence.Recurring;
return asTaskRunEvent(task.id, asOk({ task, persistence, result }), timing);
};
const mockTaskInstance = (overrides: Partial<ConcreteTaskInstance> = {}): ConcreteTaskInstance => ({
id: uuid.v4(),
attempts: 0,
status: TaskStatus.Running,
version: '123',
runAt: new Date(),
scheduledAt: new Date(),
startedAt: new Date(),
retryAt: new Date(Date.now() + 5 * 60 * 1000),
state: {},
taskType: 'alerting:test',
params: {
alertId: '1',
},
ownerId: null,
...overrides,
});

View file

@ -0,0 +1,128 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { map, filter, startWith, buffer, share } from 'rxjs/operators';
import { JsonObject } from '@kbn/common-utils';
import { combineLatest, Observable, zip } from 'rxjs';
import { isOk, Ok } from '../lib/result_type';
import { AggregatedStat, AggregatedStatProvider } from './runtime_statistics_aggregator';
import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle';
import { TaskLifecycleEvent } from '../polling_lifecycle';
import { isTaskRunEvent, isTaskManagerStatEvent } from '../task_events';
import {
AveragedStat,
calculateRunningAverage,
createRunningAveragedStat,
} from './task_run_calcultors';
import { HealthStatus } from './monitoring_stats_stream';
export interface EphemeralTaskStat extends JsonObject {
queuedTasks: number[];
executionsPerCycle: number[];
load: number[];
delay: number[];
}
export interface SummarizedEphemeralTaskStat extends JsonObject {
queuedTasks: AveragedStat;
executionsPerCycle: AveragedStat;
load: AveragedStat;
}
export function createEphemeralTaskAggregator(
ephemeralTaskLifecycle: EphemeralTaskLifecycle,
runningAverageWindowSize: number,
maxWorkers: number
): AggregatedStatProvider<EphemeralTaskStat> {
const ephemeralTaskRunEvents$ = ephemeralTaskLifecycle.events.pipe(
filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent))
);
const ephemeralQueueSizeEvents$: Observable<number> = ephemeralTaskLifecycle.events.pipe(
filter(
(taskEvent: TaskLifecycleEvent) =>
isTaskManagerStatEvent(taskEvent) &&
taskEvent.id === 'queuedEphemeralTasks' &&
isOk<number, never>(taskEvent.event)
),
map<TaskLifecycleEvent, number>((taskEvent: TaskLifecycleEvent) => {
return ((taskEvent.event as unknown) as Ok<number>).value;
}),
// as we consume this stream twice below (in the buffer, and the zip)
// we want to use share, otherwise ther'll be 2 subscribers and both will emit event
share()
);
const ephemeralQueueExecutionsPerCycleQueue = createRunningAveragedStat<number>(
runningAverageWindowSize
);
const ephemeralQueuedTasksQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const ephemeralTaskLoadQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const ephemeralPollingCycleBasedStats$ = zip(
ephemeralTaskRunEvents$.pipe(
buffer(ephemeralQueueSizeEvents$),
map((taskEvents: TaskLifecycleEvent[]) => taskEvents.length)
),
ephemeralQueueSizeEvents$
).pipe(
map(([tasksRanSincePreviousQueueSize, ephemeralQueueSize]) => ({
queuedTasks: ephemeralQueuedTasksQueue(ephemeralQueueSize),
executionsPerCycle: ephemeralQueueExecutionsPerCycleQueue(tasksRanSincePreviousQueueSize),
load: ephemeralTaskLoadQueue(calculateWorkerLoad(maxWorkers, tasksRanSincePreviousQueueSize)),
})),
startWith({
queuedTasks: [],
executionsPerCycle: [],
load: [],
})
);
const ephemeralTaskDelayQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const ephemeralTaskDelayEvents$: Observable<number[]> = ephemeralTaskLifecycle.events.pipe(
filter(
(taskEvent: TaskLifecycleEvent) =>
isTaskManagerStatEvent(taskEvent) &&
taskEvent.id === 'ephemeralTaskDelay' &&
isOk<number, never>(taskEvent.event)
),
map<TaskLifecycleEvent, number[]>((taskEvent: TaskLifecycleEvent) => {
return ephemeralTaskDelayQueue(((taskEvent.event as unknown) as Ok<number>).value);
}),
startWith([])
);
return combineLatest([ephemeralPollingCycleBasedStats$, ephemeralTaskDelayEvents$]).pipe(
map(([stats, delay]: [Omit<EphemeralTaskStat, 'delay'>, EphemeralTaskStat['delay']]) => {
return {
key: 'ephemeral',
value: { ...stats, delay },
} as AggregatedStat<EphemeralTaskStat>;
})
);
}
function calculateWorkerLoad(maxWorkers: number, tasksExecuted: number) {
return Math.round((tasksExecuted * 100) / maxWorkers);
}
export function summarizeEphemeralStat({
queuedTasks,
executionsPerCycle,
load,
delay,
}: EphemeralTaskStat): { value: SummarizedEphemeralTaskStat; status: HealthStatus } {
return {
value: {
queuedTasks: calculateRunningAverage(queuedTasks.length ? queuedTasks : [0]),
load: calculateRunningAverage(load.length ? load : [0]),
executionsPerCycle: calculateRunningAverage(
executionsPerCycle.length ? executionsPerCycle : [0]
),
delay: calculateRunningAverage(delay.length ? delay : [0]),
},
status: HealthStatus.OK,
};
}

View file

@ -16,6 +16,7 @@ import {
import { TaskStore } from '../task_store';
import { TaskPollingLifecycle } from '../polling_lifecycle';
import { ManagedConfiguration } from '../lib/create_managed_configuration';
import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle';
export {
MonitoringStats,
@ -28,6 +29,7 @@ export {
export function createMonitoringStats(
taskPollingLifecycle: TaskPollingLifecycle,
ephemeralTaskLifecycle: EphemeralTaskLifecycle,
taskStore: TaskStore,
elasticsearchAndSOAvailability$: Observable<boolean>,
config: TaskManagerConfig,
@ -37,6 +39,7 @@ export function createMonitoringStats(
return createMonitoringStatsStream(
createAggregators(
taskPollingLifecycle,
ephemeralTaskLifecycle,
taskStore,
elasticsearchAndSOAvailability$,
config,

View file

@ -39,6 +39,10 @@ describe('createMonitoringStatsStream', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
};
it('returns the initial config used to configure Task Manager', async () => {

View file

@ -18,6 +18,12 @@ import {
SummarizedWorkloadStat,
WorkloadStat,
} from './workload_statistics';
import {
EphemeralTaskStat,
createEphemeralTaskAggregator,
SummarizedEphemeralTaskStat,
summarizeEphemeralStat,
} from './ephemeral_task_statistics';
import {
createTaskRunAggregator,
summarizeTaskRunStat,
@ -28,6 +34,7 @@ import { ConfigStat, createConfigurationAggregator } from './configuration_stati
import { TaskManagerConfig } from '../config';
import { AggregatedStatProvider } from './runtime_statistics_aggregator';
import { ManagedConfiguration } from '../lib/create_managed_configuration';
import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle';
import { CapacityEstimationStat, withCapacityEstimate } from './capacity_estimation';
export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator';
@ -38,6 +45,7 @@ export interface MonitoringStats {
configuration?: MonitoredStat<ConfigStat>;
workload?: MonitoredStat<WorkloadStat>;
runtime?: MonitoredStat<TaskRunStat>;
ephemeral?: MonitoredStat<EphemeralTaskStat>;
};
}
@ -61,19 +69,21 @@ export interface RawMonitoringStats {
configuration?: RawMonitoredStat<ConfigStat>;
workload?: RawMonitoredStat<SummarizedWorkloadStat>;
runtime?: RawMonitoredStat<SummarizedTaskRunStat>;
ephemeral?: RawMonitoredStat<SummarizedEphemeralTaskStat>;
capacity_estimation?: RawMonitoredStat<CapacityEstimationStat>;
};
}
export function createAggregators(
taskPollingLifecycle: TaskPollingLifecycle,
ephemeralTaskLifecycle: EphemeralTaskLifecycle,
taskStore: TaskStore,
elasticsearchAndSOAvailability$: Observable<boolean>,
config: TaskManagerConfig,
managedConfig: ManagedConfiguration,
logger: Logger
): AggregatedStatProvider {
return merge(
const aggregators: AggregatedStatProvider[] = [
createConfigurationAggregator(config, managedConfig),
createTaskRunAggregator(taskPollingLifecycle, config.monitored_stats_running_average_window),
createWorkloadAggregator(
@ -82,8 +92,18 @@ export function createAggregators(
config.monitored_aggregated_stats_refresh_rate,
config.poll_interval,
logger
)
);
),
];
if (ephemeralTaskLifecycle.enabled) {
aggregators.push(
createEphemeralTaskAggregator(
ephemeralTaskLifecycle,
config.monitored_stats_running_average_window,
config.max_workers
)
);
}
return merge(...aggregators);
}
export function createMonitoringStatsStream(
@ -119,7 +139,7 @@ export function summarizeMonitoringStats(
{
// eslint-disable-next-line @typescript-eslint/naming-convention
last_update,
stats: { runtime, workload, configuration },
stats: { runtime, workload, configuration, ephemeral },
}: MonitoringStats,
config: TaskManagerConfig
): RawMonitoringStats {
@ -148,6 +168,14 @@ export function summarizeMonitoringStats(
},
}
: {}),
...(ephemeral
? {
ephemeral: {
timestamp: ephemeral.timestamp,
...summarizeEphemeralStat(ephemeral.value),
},
}
: {}),
});
return {

View file

@ -17,6 +17,8 @@ import {
asTaskPollingCycleEvent,
TaskTiming,
asTaskManagerStatEvent,
TaskPersistence,
asTaskClaimEvent,
} from '../task_events';
import { asOk } from '../lib/result_type';
import { TaskLifecycleEvent } from '../polling_lifecycle';
@ -400,6 +402,44 @@ describe('Task Run Statistics', () => {
runningAverageWindowSize
);
const taskEvents = [
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success),
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Success
),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed),
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Failed
),
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.RetryScheduled
),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success),
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Success
),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success),
mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral),
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Success
),
];
return new Promise<void>((resolve, reject) => {
taskRunAggregator
.pipe(
@ -409,22 +449,10 @@ describe('Task Run Statistics', () => {
// Use 'summarizeTaskRunStat' to receive summarize stats
map(({ key, value }: AggregatedStat<TaskRunStat>) => ({
key,
value: summarizeTaskRunStat(
value,
getTaskManagerConfig({
monitored_task_execution_thresholds: {
custom: {
'alerting:test': {
error_threshold: 59,
warn_threshold: 39,
},
},
},
})
).value,
value: summarizeTaskRunStat(value, getTaskManagerConfig({})).value,
})),
take(10),
bufferCount(10)
take(taskEvents.length),
bufferCount(taskEvents.length)
)
.subscribe((taskStats: Array<AggregatedStat<SummarizedTaskRunStat>>) => {
try {
@ -485,6 +513,31 @@ describe('Task Run Statistics', () => {
"non_recurring": 40,
"recurring": 60,
},
Object {
"ephemeral": 20,
"non_recurring": 40,
"recurring": 40,
},
Object {
"ephemeral": 40,
"non_recurring": 40,
"recurring": 20,
},
Object {
"ephemeral": 40,
"non_recurring": 40,
"recurring": 20,
},
Object {
"ephemeral": 60,
"non_recurring": 20,
"recurring": 20,
},
Object {
"ephemeral": 60,
"non_recurring": 20,
"recurring": 20,
},
]
`);
resolve();
@ -493,40 +546,142 @@ describe('Task Run Statistics', () => {
}
});
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success));
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success));
events$.next(
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Success
taskEvents.forEach((event) => events$.next(event));
});
});
test('frequency of polled tasks by their persistence', async () => {
const events$ = new Subject<TaskLifecycleEvent>();
const taskPollingLifecycle = taskPollingLifecycleMock.create({
events$: events$ as Observable<TaskLifecycleEvent>,
});
const runningAverageWindowSize = 5;
const taskRunAggregator = createTaskRunAggregator(
taskPollingLifecycle,
runningAverageWindowSize
);
const taskEvents = [
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({ schedule: { interval: '3s' } }),
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({ schedule: { interval: '3s' } }),
mockTaskPollingEvent({ schedule: { interval: '3s' } }),
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({ schedule: { interval: '3s' } }),
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({}),
mockTaskPollingEvent({ schedule: { interval: '3s' } }),
];
return new Promise<void>((resolve, reject) => {
taskRunAggregator
.pipe(
// skip initial stat which is just initialized data which
// ensures we don't stall on combineLatest
skip(1),
// Use 'summarizeTaskRunStat' to receive summarize stats
map(({ key, value }: AggregatedStat<TaskRunStat>) => ({
key,
value: summarizeTaskRunStat(value, getTaskManagerConfig({})).value,
})),
take(taskEvents.length),
bufferCount(taskEvents.length)
)
);
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed));
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed));
.subscribe((taskStats: Array<AggregatedStat<SummarizedTaskRunStat>>) => {
try {
/**
* At any given time we only keep track of the last X Polling Results
* In the tests this is ocnfiugured to a window size of 5
*/
expect(taskStats.map((taskStat) => taskStat.value.polling.persistence))
.toMatchInlineSnapshot(`
Array [
Object {
"non_recurring": 0,
"recurring": 0,
},
Object {
"non_recurring": 100,
"recurring": 0,
},
Object {
"non_recurring": 100,
"recurring": 0,
},
Object {
"non_recurring": 67,
"recurring": 33,
},
Object {
"non_recurring": 75,
"recurring": 25,
},
Object {
"non_recurring": 80,
"recurring": 20,
},
Object {
"non_recurring": 60,
"recurring": 40,
},
Object {
"non_recurring": 40,
"recurring": 60,
},
Object {
"non_recurring": 60,
"recurring": 40,
},
Object {
"non_recurring": 60,
"recurring": 40,
},
Object {
"non_recurring": 40,
"recurring": 60,
},
Object {
"non_recurring": 60,
"recurring": 40,
},
Object {
"non_recurring": 80,
"recurring": 20,
},
Object {
"non_recurring": 80,
"recurring": 20,
},
Object {
"non_recurring": 80,
"recurring": 20,
},
]
`);
resolve();
} catch (e) {
reject(e);
}
});
const timing = {
start: 0,
stop: 0,
};
events$.next(
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Failed
)
);
events$.next(
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.RetryScheduled
)
);
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled));
events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success));
events$.next(
mockTaskRunEvent(
{ schedule: { interval: '3s' } },
{ start: 0, stop: 0 },
TaskRunResult.Success
)
asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed, timing }))
);
events$.next(asTaskManagerStatEvent('pollingDelay', asOk(0)));
events$.next(asTaskManagerStatEvent('claimDuration', asOk(10)));
taskEvents.forEach((event) => events$.next(event));
});
});
@ -713,10 +868,25 @@ function runAtMillisecondsAgo(ms: number): Date {
const mockTaskRunEvent = (
overrides: Partial<ConcreteTaskInstance> = {},
timing: TaskTiming,
result: TaskRunResult = TaskRunResult.Success
result: TaskRunResult = TaskRunResult.Success,
persistence?: TaskPersistence
) => {
const task = mockTaskInstance(overrides);
return asTaskRunEvent(task.id, asOk({ task, result }), timing);
return asTaskRunEvent(
task.id,
asOk({
task,
persistence:
persistence ?? (task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring),
result,
}),
timing
);
};
const mockTaskPollingEvent = (overrides: Partial<ConcreteTaskInstance> = {}) => {
const task = mockTaskInstance(overrides);
return asTaskClaimEvent(task.id, asOk(task));
};
const mockTaskInstance = (overrides: Partial<ConcreteTaskInstance> = {}): ConcreteTaskInstance => ({

View file

@ -20,6 +20,9 @@ import {
TaskTiming,
isTaskManagerStatEvent,
TaskManagerStat,
TaskPersistence,
TaskClaim,
isTaskClaimEvent,
} from '../task_events';
import { isOk, Ok, unwrap } from '../lib/result_type';
import { ConcreteTaskInstance } from '../task';
@ -36,24 +39,17 @@ import { HealthStatus } from './monitoring_stats_stream';
import { TaskPollingLifecycle } from '../polling_lifecycle';
import { TaskExecutionFailureThreshold, TaskManagerConfig } from '../config';
export enum TaskPersistence {
Recurring = 'recurring',
NonRecurring = 'non_recurring',
Ephemeral = 'ephemeral',
}
function persistenceOf(task: ConcreteTaskInstance) {
return task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring;
}
interface FillPoolStat extends JsonObject {
last_successful_poll: string;
last_polling_delay: string;
duration: number[];
claim_duration: number[];
claim_conflicts: number[];
claim_mismatches: number[];
result_frequency_percent_as_number: FillPoolResult[];
persistence: TaskPersistence[];
}
interface OptionalFillPoolStat extends JsonObject {
last_successful_poll: string;
last_polling_delay: string;
}
interface ExecutionStat extends JsonObject {
@ -68,8 +64,7 @@ export interface TaskRunStat extends JsonObject {
drift_by_type: Record<string, number[]>;
load: number[];
execution: ExecutionStat;
polling: Omit<FillPoolStat, 'last_successful_poll' | 'last_polling_delay'> &
Pick<Partial<FillPoolStat>, 'last_successful_poll' | 'last_polling_delay'>;
polling: FillPoolStat & Partial<OptionalFillPoolStat>;
}
interface FillPoolRawStat extends JsonObject {
@ -83,6 +78,7 @@ interface FillPoolRawStat extends JsonObject {
[FillPoolResult.RunningAtCapacity]: number;
[FillPoolResult.PoolFilled]: number;
};
persistence: TaskPersistenceTypes;
}
interface ResultFrequency extends JsonObject {
@ -126,8 +122,10 @@ export function createTaskRunAggregator(
> = taskPollingLifecycle.events.pipe(
filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent) && hasTiming(taskEvent)),
map((taskEvent: TaskLifecycleEvent) => {
const { task, result }: RanTask | ErroredTask = unwrap((taskEvent as TaskRun).event);
return taskRunEventToStat(task, taskEvent.timing!, result);
const { task, result, persistence }: RanTask | ErroredTask = unwrap(
(taskEvent as TaskRun).event
);
return taskRunEventToStat(task, persistence, taskEvent.timing!, result);
})
);
@ -153,6 +151,9 @@ export function createTaskRunAggregator(
const claimDurationQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const claimConflictsQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const claimMismatchesQueue = createRunningAveragedStat<number>(runningAverageWindowSize);
const polledTasksByPersistenceQueue = createRunningAveragedStat<TaskPersistence>(
runningAverageWindowSize
);
const taskPollingEvents$: Observable<Pick<TaskRunStat, 'polling'>> = combineLatest([
// get latest polling stats
taskPollingLifecycle.events.pipe(
@ -194,6 +195,22 @@ export function createTaskRunAggregator(
),
map(() => new Date().toISOString())
),
// get the average ratio of polled tasks by their persistency
taskPollingLifecycle.events.pipe(
filter(
(taskEvent: TaskLifecycleEvent) => isTaskClaimEvent(taskEvent) && isOk(taskEvent.event)
),
map((taskClaimEvent) => {
const claimedTask = ((taskClaimEvent as TaskClaim).event as Ok<ConcreteTaskInstance>).value;
return polledTasksByPersistenceQueue(
claimedTask.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring
);
}),
// unlike the other streams that emit once TM polls, this will only emit when a task is actually
// claimed, so to make sure `combineLatest` doesn't stall until a task is actually emitted we seed
// the stream with an empty queue
startWith([])
),
// get duration of task claim stage in polling
taskPollingLifecycle.events.pipe(
filter(
@ -204,16 +221,15 @@ export function createTaskRunAggregator(
),
map((claimDurationEvent) => {
const duration = ((claimDurationEvent as TaskManagerStat).event as Ok<number>).value;
return {
claimDuration: duration ? claimDurationQueue(duration) : claimDurationQueue(),
};
return duration ? claimDurationQueue(duration) : claimDurationQueue();
})
),
]).pipe(
map(([{ polling }, pollingDelay, { claimDuration }]) => ({
map(([{ polling }, pollingDelay, persistence, claimDuration]) => ({
polling: {
last_polling_delay: pollingDelay,
claim_duration: claimDuration,
persistence,
...polling,
},
}))
@ -245,13 +261,14 @@ export function createTaskRunAggregator(
claim_conflicts: [],
claim_mismatches: [],
result_frequency_percent_as_number: [],
persistence: [],
},
})
),
]).pipe(
map(
([taskRun, load, polling]: [
Pick<TaskRunStat, 'drift' | 'execution'>,
Pick<TaskRunStat, 'drift' | 'drift_by_type' | 'execution'>,
Pick<TaskRunStat, 'load'>,
Pick<TaskRunStat, 'polling'>
]) => {
@ -285,12 +302,12 @@ function createTaskRunEventToStat(runningAverageWindowSize: number) {
);
return (
task: ConcreteTaskInstance,
persistence: TaskPersistence,
timing: TaskTiming,
result: TaskRunResult
): Pick<TaskRunStat, 'drift' | 'drift_by_type' | 'execution'> => {
const drift = timing!.start - task.runAt.getTime();
const duration = timing!.stop - timing!.start;
const persistence = persistenceOf(task);
return {
drift: driftQueue(drift),
drift_by_type: driftByTaskQueue(task.taskType, drift),
@ -318,11 +335,6 @@ const DEFAULT_POLLING_FREQUENCIES = {
[FillPoolResult.RunningAtCapacity]: 0,
[FillPoolResult.PoolFilled]: 0,
};
const DEFAULT_PERSISTENCE_FREQUENCIES = {
[TaskPersistence.Recurring]: 0,
[TaskPersistence.NonRecurring]: 0,
[TaskPersistence.Ephemeral]: 0,
};
export function summarizeTaskRunStat(
{
@ -337,6 +349,7 @@ export function summarizeTaskRunStat(
result_frequency_percent_as_number: pollingResultFrequency,
claim_conflicts: claimConflicts,
claim_mismatches: claimMismatches,
persistence: pollingPersistence,
},
drift,
// eslint-disable-next-line @typescript-eslint/naming-convention
@ -366,6 +379,11 @@ export function summarizeTaskRunStat(
...DEFAULT_POLLING_FREQUENCIES,
...calculateFrequency<FillPoolResult>(pollingResultFrequency as FillPoolResult[]),
},
persistence: {
[TaskPersistence.Recurring]: 0,
[TaskPersistence.NonRecurring]: 0,
...calculateFrequency<TaskPersistence>(pollingPersistence as TaskPersistence[]),
},
},
drift: calculateRunningAverage(drift),
drift_by_type: mapValues(drift_by_type, (typedDrift) => calculateRunningAverage(typedDrift)),
@ -376,7 +394,9 @@ export function summarizeTaskRunStat(
calculateRunningAverage(typedDurations)
),
persistence: {
...DEFAULT_PERSISTENCE_FREQUENCIES,
[TaskPersistence.Recurring]: 0,
[TaskPersistence.NonRecurring]: 0,
[TaskPersistence.Ephemeral]: 0,
...calculateFrequency<TaskPersistence>(persistence),
},
result_frequency_percent_as_number: mapValues(

View file

@ -38,12 +38,18 @@ describe('TaskManagerPlugin', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: false,
request_capacity: 10,
},
});
pluginInitializerContext.env.instanceUuid = '';
const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext);
expect(() => taskManagerPlugin.setup(coreMock.createSetup())).toThrow(
expect(() =>
taskManagerPlugin.setup(coreMock.createSetup(), { usageCollection: undefined })
).toThrow(
new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`)
);
});
@ -72,11 +78,17 @@ describe('TaskManagerPlugin', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
});
const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext);
const setupApi = await taskManagerPlugin.setup(coreMock.createSetup());
const setupApi = await taskManagerPlugin.setup(coreMock.createSetup(), {
usageCollection: undefined,
});
// we only start a poller if we have task types that we support and we track
// phases (moving from Setup to Start) based on whether the poller is working

View file

@ -7,6 +7,7 @@
import { combineLatest, Observable, Subject } from 'rxjs';
import { map, distinctUntilChanged } from 'rxjs/operators';
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import {
PluginInitializerContext,
Plugin,
@ -27,6 +28,9 @@ import { createManagedConfiguration } from './lib/create_managed_configuration';
import { TaskScheduling } from './task_scheduling';
import { healthRoute } from './routes';
import { createMonitoringStats, MonitoringStats } from './monitoring';
import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle';
import { EphemeralTask } from './task';
import { registerTaskManagerUsageCollector } from './usage';
export type TaskManagerSetupContract = {
/**
@ -38,15 +42,16 @@ export type TaskManagerSetupContract = {
export type TaskManagerStartContract = Pick<
TaskScheduling,
'schedule' | 'runNow' | 'ensureScheduled'
'schedule' | 'runNow' | 'ephemeralRunNow' | 'ensureScheduled'
> &
Pick<TaskStore, 'fetch' | 'get' | 'remove'> & {
removeIfExists: TaskStore['remove'];
};
} & { supportsEphemeralTasks: () => boolean };
export class TaskManagerPlugin
implements Plugin<TaskManagerSetupContract, TaskManagerStartContract> {
private taskPollingLifecycle?: TaskPollingLifecycle;
private ephemeralTaskLifecycle?: EphemeralTaskLifecycle;
private taskManagerId?: string;
private config: TaskManagerConfig;
private logger: Logger;
@ -62,7 +67,10 @@ export class TaskManagerPlugin
this.definitions = new TaskTypeDictionary(this.logger);
}
public setup(core: CoreSetup): TaskManagerSetupContract {
public setup(
core: CoreSetup,
plugins: { usageCollection?: UsageCollectionSetup }
): TaskManagerSetupContract {
this.elasticsearchAndSOAvailability$ = getElasticsearchAndSOAvailability(core.status.core$);
setupSavedObjects(core.savedObjects, this.config);
@ -79,7 +87,7 @@ export class TaskManagerPlugin
// Routes
const router = core.http.createRouter();
const serviceStatus$ = healthRoute(
const { serviceStatus$, monitoredHealth$ } = healthRoute(
router,
this.monitoringStats$,
this.logger,
@ -95,6 +103,16 @@ export class TaskManagerPlugin
)
);
const usageCollection = plugins.usageCollection;
if (usageCollection) {
registerTaskManagerUsageCollector(
usageCollection,
monitoredHealth$,
this.config.ephemeral_tasks.enabled,
this.config.ephemeral_tasks.request_capacity
);
}
return {
index: this.config.index,
addMiddleware: (middleware: Middleware) => {
@ -138,8 +156,19 @@ export class TaskManagerPlugin
...managedConfiguration,
});
this.ephemeralTaskLifecycle = new EphemeralTaskLifecycle({
config: this.config!,
definitions: this.definitions,
logger: this.logger,
middleware: this.middleware,
elasticsearchAndSOAvailability$: this.elasticsearchAndSOAvailability$!,
pool: this.taskPollingLifecycle.pool,
lifecycleEvent: this.taskPollingLifecycle.events,
});
createMonitoringStats(
this.taskPollingLifecycle,
this.ephemeralTaskLifecycle,
taskStore,
this.elasticsearchAndSOAvailability$!,
this.config!,
@ -152,7 +181,9 @@ export class TaskManagerPlugin
taskStore,
middleware: this.middleware,
taskPollingLifecycle: this.taskPollingLifecycle,
ephemeralTaskLifecycle: this.ephemeralTaskLifecycle,
definitions: this.definitions,
taskManagerId: taskStore.taskManagerId,
});
return {
@ -163,6 +194,8 @@ export class TaskManagerPlugin
schedule: (...args) => taskScheduling.schedule(...args),
ensureScheduled: (...args) => taskScheduling.ensureScheduled(...args),
runNow: (...args) => taskScheduling.runNow(...args),
ephemeralRunNow: (task: EphemeralTask) => taskScheduling.ephemeralRunNow(task),
supportsEphemeralTasks: () => this.config.ephemeral_tasks.enabled,
};
}

View file

@ -58,6 +58,10 @@ describe('TaskPollingLifecycle', () => {
},
custom: {},
},
ephemeral_tasks: {
enabled: true,
request_capacity: 10,
},
},
taskStore: mockTaskStore,
logger: taskManagerLogger,

View file

@ -25,6 +25,7 @@ import {
asTaskPollingCycleEvent,
TaskManagerStat,
asTaskManagerStatEvent,
EphemeralTaskRejectedDueToCapacity,
} from './task_events';
import { fillPool, FillPoolResult, TimedFillPoolResult } from './lib/fill_pool';
import { Middleware } from './lib/middleware';
@ -60,7 +61,8 @@ export type TaskLifecycleEvent =
| TaskClaim
| TaskRunRequest
| TaskPollingCycle
| TaskManagerStat;
| TaskManagerStat
| EphemeralTaskRejectedDueToCapacity;
/**
* The public interface into the task manager system.
@ -73,7 +75,7 @@ export class TaskPollingLifecycle {
private bufferedStore: BufferedTaskStore;
private logger: Logger;
private pool: TaskPool;
public pool: TaskPool;
// all task related events (task claimed, task marked as running, etc.) are emitted through events$
private events$ = new Subject<TaskLifecycleEvent>();
// all on-demand requests we wish to pipe into the poller
@ -160,7 +162,15 @@ export class TaskPollingLifecycle {
pollInterval$: pollIntervalConfiguration$,
pollIntervalDelay$,
bufferCapacity: config.request_capacity,
getCapacity: () => this.pool.availableWorkers,
getCapacity: () => {
const capacity = this.pool.availableWorkers;
if (!capacity) {
// if there isn't capacity, emit a load event so that we can expose how often
// high load causes the poller to skip work (work isn'tcalled when there is no capacity)
this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad)));
}
return capacity;
},
pollRequests$: this.claimRequests$,
work: this.pollForWork,
// Time out the `work` phase if it takes longer than a certain number of polling cycles
@ -227,8 +237,8 @@ export class TaskPollingLifecycle {
private pollForWork = async (...tasksToClaim: string[]): Promise<TimedFillPoolResult> => {
return fillPool(
// claim available tasks
() =>
claimAvailableTasks(
() => {
return claimAvailableTasks(
tasksToClaim.splice(0, this.pool.availableWorkers),
this.taskClaiming,
this.logger
@ -242,11 +252,18 @@ export class TaskPollingLifecycle {
}
})
)
),
);
},
// wrap each task in a Task Runner
this.createTaskRunnerForTask,
// place tasks in the Task Pool
async (tasks: TaskRunner[]) => await this.pool.run(tasks)
async (tasks: TaskRunner[]) => {
const result = await this.pool.run(tasks);
// Emit the load after fetching tasks, giving us a good metric for evaluating how
// busy Task manager tends to be in this Kibana instance
this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad)));
return result;
}
);
};

View file

@ -11,7 +11,7 @@
import apm from 'elastic-apm-node';
import { Subject, Observable, from, of } from 'rxjs';
import { map, mergeScan } from 'rxjs/operators';
import { difference, partition, groupBy, mapValues, countBy, pick } from 'lodash';
import { difference, partition, groupBy, mapValues, countBy, pick, isPlainObject } from 'lodash';
import { some, none } from 'fp-ts/lib/Option';
import { Logger } from '../../../../../src/core/server';
@ -87,6 +87,9 @@ export interface ClaimOwnershipResult {
docs: ConcreteTaskInstance[];
timing?: TaskTiming;
}
export const isClaimOwnershipResult = (result: unknown): result is ClaimOwnershipResult =>
isPlainObject((result as ClaimOwnershipResult).stats) &&
Array.isArray((result as ClaimOwnershipResult).docs);
enum BatchConcurrency {
Unlimited,

View file

@ -23,6 +23,7 @@ import {
import { ServiceStatusLevels } from 'src/core/server';
import { configSchema, TaskManagerConfig } from '../config';
import { calculateHealthStatusMock } from '../lib/calculate_health_status.mock';
import { FillPoolResult } from '../lib/fill_pool';
jest.mock('../lib/log_health_metrics', () => ({
logHealthMetrics: jest.fn(),
@ -106,6 +107,7 @@ describe('healthRoute', () => {
const warnRuntimeStat = mockHealthStats();
const warnConfigurationStat = mockHealthStats();
const warnWorkloadStat = mockHealthStats();
const warnEphemeralStat = mockHealthStats();
const stats$ = new Subject<MonitoringStats>();
@ -130,8 +132,10 @@ describe('healthRoute', () => {
stats$.next(warnConfigurationStat);
await sleep(1001);
stats$.next(warnWorkloadStat);
await sleep(1001);
stats$.next(warnEphemeralStat);
expect(logHealthMetrics).toBeCalledTimes(3);
expect(logHealthMetrics).toBeCalledTimes(4);
expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({
id,
timestamp: expect.any(String),
@ -156,6 +160,14 @@ describe('healthRoute', () => {
summarizeMonitoringStats(warnWorkloadStat, getTaskManagerConfig({}))
),
});
expect(logHealthMetrics.mock.calls[2][0]).toMatchObject({
id,
timestamp: expect.any(String),
status: expect.any(String),
...ignoreCapacityEstimation(
summarizeMonitoringStats(warnEphemeralStat, getTaskManagerConfig({}))
),
});
});
it(`logs at an error level if the status is error`, async () => {
@ -168,6 +180,7 @@ describe('healthRoute', () => {
const errorRuntimeStat = mockHealthStats();
const errorConfigurationStat = mockHealthStats();
const errorWorkloadStat = mockHealthStats();
const errorEphemeralStat = mockHealthStats();
const stats$ = new Subject<MonitoringStats>();
@ -192,8 +205,10 @@ describe('healthRoute', () => {
stats$.next(errorConfigurationStat);
await sleep(1001);
stats$.next(errorWorkloadStat);
await sleep(1001);
stats$.next(errorEphemeralStat);
expect(logHealthMetrics).toBeCalledTimes(3);
expect(logHealthMetrics).toBeCalledTimes(4);
expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({
id,
timestamp: expect.any(String),
@ -218,6 +233,14 @@ describe('healthRoute', () => {
summarizeMonitoringStats(errorWorkloadStat, getTaskManagerConfig({}))
),
});
expect(logHealthMetrics.mock.calls[2][0]).toMatchObject({
id,
timestamp: expect.any(String),
status: expect.any(String),
...ignoreCapacityEstimation(
summarizeMonitoringStats(errorEphemeralStat, getTaskManagerConfig({}))
),
});
});
it('returns a error status if the overall stats have not been updated within the required hot freshness', async () => {
@ -225,7 +248,7 @@ describe('healthRoute', () => {
const stats$ = new Subject<MonitoringStats>();
const serviceStatus$ = healthRoute(
const { serviceStatus$ } = healthRoute(
router,
stats$,
loggingSystemMock.create().get(),
@ -264,6 +287,9 @@ describe('healthRoute', () => {
workload: {
timestamp: expect.any(String),
},
ephemeral: {
timestamp: expect.any(String),
},
runtime: {
timestamp: expect.any(String),
value: {
@ -335,6 +361,9 @@ describe('healthRoute', () => {
workload: {
timestamp: expect.any(String),
},
ephemeral: {
timestamp: expect.any(String),
},
runtime: {
timestamp: expect.any(String),
value: {
@ -403,6 +432,9 @@ describe('healthRoute', () => {
workload: {
timestamp: expect.any(String),
},
ephemeral: {
timestamp: expect.any(String),
},
runtime: {
timestamp: expect.any(String),
value: {
@ -488,14 +520,25 @@ function mockHealthStats(overrides = {}) {
duration: [500, 400, 3000],
claim_conflicts: [0, 100, 75],
claim_mismatches: [0, 100, 75],
claim_duration: [0, 100, 75],
result_frequency_percent_as_number: [
'NoTasksClaimed',
'NoTasksClaimed',
'NoTasksClaimed',
FillPoolResult.NoTasksClaimed,
FillPoolResult.NoTasksClaimed,
FillPoolResult.NoTasksClaimed,
],
persistence: [],
},
},
},
ephemeral: {
timestamp: new Date().toISOString(),
value: {
load: [],
executionsPerCycle: [],
queuedTasks: [],
delay: [],
},
},
},
};
return (merge(stub, overrides) as unknown) as MonitoringStats;

View file

@ -53,7 +53,10 @@ export function healthRoute(
logger: Logger,
taskManagerId: string,
config: TaskManagerConfig
): Observable<TaskManagerServiceStatus> {
): {
serviceStatus$: Observable<TaskManagerServiceStatus>;
monitoredHealth$: Observable<MonitoredHealth>;
} {
// if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default)
// consider the system unhealthy
const requiredHotStatsFreshness: number = config.monitored_stats_required_freshness;
@ -67,6 +70,7 @@ export function healthRoute(
}
const serviceStatus$: Subject<TaskManagerServiceStatus> = new Subject<TaskManagerServiceStatus>();
const monitoredHealth$: Subject<MonitoredHealth> = new Subject<MonitoredHealth>();
/* keep track of last health summary, as we'll return that to the next call to _health */
let lastMonitoredStats: MonitoringStats | null = null;
@ -84,6 +88,7 @@ export function healthRoute(
)
.subscribe(([monitoredHealth, serviceStatus]) => {
serviceStatus$.next(serviceStatus);
monitoredHealth$.next(monitoredHealth);
logHealthMetrics(monitoredHealth, logger, config);
});
@ -104,7 +109,7 @@ export function healthRoute(
});
}
);
return serviceStatus$;
return { serviceStatus$, monitoredHealth$ };
}
export function withServiceStatus(

View file

@ -363,6 +363,13 @@ export interface ConcreteTaskInstance extends TaskInstance {
ownerId: string | null;
}
/**
* A task instance that has an id and is ready for storage.
*/
export type EphemeralTask = Pick<ConcreteTaskInstance, 'taskType' | 'params' | 'state' | 'scope'>;
export type EphemeralTaskInstance = EphemeralTask &
Pick<ConcreteTaskInstance, 'id' | 'scheduledAt' | 'startedAt' | 'runAt' | 'status' | 'ownerId'>;
export type SerializedConcreteTaskInstance = Omit<
ConcreteTaskInstance,
'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt'

View file

@ -13,6 +13,13 @@ import { Result, Err } from './lib/result_type';
import { ClaimAndFillPoolResult } from './lib/fill_pool';
import { PollingError } from './polling';
import { TaskRunResult } from './task_running';
import { EphemeralTaskInstanceRequest } from './ephemeral_task_lifecycle';
export enum TaskPersistence {
Recurring = 'recurring',
NonRecurring = 'non_recurring',
Ephemeral = 'ephemeral',
}
export enum TaskEventType {
TASK_CLAIM = 'TASK_CLAIM',
@ -21,6 +28,7 @@ export enum TaskEventType {
TASK_RUN_REQUEST = 'TASK_RUN_REQUEST',
TASK_POLLING_CYCLE = 'TASK_POLLING_CYCLE',
TASK_MANAGER_STAT = 'TASK_MANAGER_STAT',
EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY = 'EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY',
}
export enum TaskClaimErrorType {
@ -48,6 +56,7 @@ export interface TaskEvent<OkResult, ErrorResult, ID = string> {
}
export interface RanTask {
task: ConcreteTaskInstance;
persistence: TaskPersistence;
result: TaskRunResult;
}
export type ErroredTask = RanTask & {
@ -62,9 +71,15 @@ export type TaskMarkRunning = TaskEvent<ConcreteTaskInstance, Error>;
export type TaskRun = TaskEvent<RanTask, ErroredTask>;
export type TaskClaim = TaskEvent<ConcreteTaskInstance, ClaimTaskErr>;
export type TaskRunRequest = TaskEvent<ConcreteTaskInstance, Error>;
export type EphemeralTaskRejectedDueToCapacity = TaskEvent<EphemeralTaskInstanceRequest, Error>;
export type TaskPollingCycle<T = string> = TaskEvent<ClaimAndFillPoolResult, PollingError<T>>;
export type TaskManagerStats = 'load' | 'pollingDelay' | 'claimDuration';
export type TaskManagerStats =
| 'load'
| 'pollingDelay'
| 'claimDuration'
| 'queuedEphemeralTasks'
| 'ephemeralTaskDelay';
export type TaskManagerStat = TaskEvent<number, never, TaskManagerStats>;
export type OkResultOf<EventType> = EventType extends TaskEvent<infer OkResult, infer ErrorResult>
@ -149,6 +164,19 @@ export function asTaskManagerStatEvent(
};
}
export function asEphemeralTaskRejectedDueToCapacityEvent(
id: string,
event: Result<EphemeralTaskInstanceRequest, Error>,
timing?: TaskTiming
): EphemeralTaskRejectedDueToCapacity {
return {
id,
type: TaskEventType.EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY,
event,
timing,
};
}
export function isTaskMarkRunningEvent(
taskEvent: TaskEvent<unknown, unknown>
): taskEvent is TaskMarkRunning {
@ -175,3 +203,8 @@ export function isTaskManagerStatEvent(
): taskEvent is TaskManagerStat {
return taskEvent.type === TaskEventType.TASK_MANAGER_STAT;
}
export function isEphemeralTaskRejectedDueToCapacityEvent(
taskEvent: TaskEvent<unknown, unknown>
): taskEvent is EphemeralTaskRejectedDueToCapacity {
return taskEvent.type === TaskEventType.EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY;
}

View file

@ -0,0 +1,48 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { TaskPool } from './task_pool';
const defaultGetCapacityOverride: () => Partial<{
load: number;
occupiedWorkers: number;
workerLoad: number;
max: number;
availableWorkers: number;
}> = () => ({
load: 0,
occupiedWorkers: 0,
workerLoad: 0,
max: 10,
availableWorkers: 10,
});
const createTaskPoolMock = (getCapacityOverride = defaultGetCapacityOverride) => {
return ({
get load() {
return getCapacityOverride().load ?? 0;
},
get occupiedWorkers() {
return getCapacityOverride().occupiedWorkers ?? 0;
},
get workerLoad() {
return getCapacityOverride().workerLoad ?? 0;
},
get max() {
return getCapacityOverride().max ?? 10;
},
get availableWorkers() {
return getCapacityOverride().availableWorkers ?? 10;
},
getOccupiedWorkersByType: jest.fn(),
run: jest.fn(),
cancelRunningTasks: jest.fn(),
} as unknown) as jest.Mocked<TaskPool>;
};
export const TaskPoolMock = {
create: createTaskPoolMock,
};

View file

@ -16,8 +16,7 @@ import { padStart } from 'lodash';
import { Logger } from '../../../../src/core/server';
import { TaskRunner } from './task_running';
import { isTaskSavedObjectNotFoundError } from './lib/is_task_not_found_error';
import { TaskManagerStat, asTaskManagerStatEvent } from './task_events';
import { asOk } from './lib/result_type';
import { TaskManagerStat } from './task_events';
interface Opts {
maxWorkers$: Observable<number>;
@ -84,10 +83,6 @@ export class TaskPool {
* Gets how many workers are currently available.
*/
public get availableWorkers() {
// emit load whenever we check how many available workers there are
// this should happen less often than the actual changes to the worker queue
// so is lighter than emitting the load every time we add/remove a task from the queue
this.load$.next(asTaskManagerStatEvent('load', asOk(this.workerLoad)));
// cancel expired task whenever a call is made to check for capacity
// this ensures that we don't end up with a queue of hung tasks causing both
// the poller and the pool from hanging due to lack of capacity
@ -174,7 +169,9 @@ export class TaskPool {
this.logger.warn(errorLogLine);
}
})
.then(() => this.tasksInPool.delete(taskRunner.id));
.then(() => {
this.tasksInPool.delete(taskRunner.id);
});
}
private handleFailureOfMarkAsRunning(task: TaskRunner, err: Error) {

View file

@ -0,0 +1,337 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
/*
* This module contains the core logic for running an individual task.
* It handles the full lifecycle of a task run, including error handling,
* rescheduling, middleware application, etc.
*/
import apm from 'elastic-apm-node';
import { withSpan } from '@kbn/apm-utils';
import { identity } from 'lodash';
import { Logger } from '../../../../../src/core/server';
import { Middleware } from '../lib/middleware';
import { asOk, asErr, eitherAsync, Result } from '../lib/result_type';
import {
TaskRun,
TaskMarkRunning,
asTaskRunEvent,
asTaskMarkRunningEvent,
startTaskTimer,
TaskTiming,
TaskPersistence,
} from '../task_events';
import { intervalFromDate } from '../lib/intervals';
import {
CancellableTask,
ConcreteTaskInstance,
isFailedRunResult,
SuccessfulRunResult,
FailedRunResult,
TaskStatus,
EphemeralTaskInstance,
} from '../task';
import { TaskTypeDictionary } from '../task_type_dictionary';
import {
asPending,
asReadyToRun,
EMPTY_RUN_RESULT,
isPending,
isReadyToRun,
TaskRunner,
TaskRunningInstance,
TaskRunResult,
} from './task_runner';
type Opts = {
logger: Logger;
definitions: TaskTypeDictionary;
instance: EphemeralTaskInstance;
onTaskEvent?: (event: TaskRun | TaskMarkRunning) => void;
} & Pick<Middleware, 'beforeRun' | 'beforeMarkRunning'>;
// ephemeral tasks cannot be rescheduled or scheduled to run again in the future
type EphemeralSuccessfulRunResult = Omit<SuccessfulRunResult, 'runAt' | 'schedule'>;
type EphemeralFailedRunResult = Omit<FailedRunResult, 'runAt' | 'schedule'>;
/**
*
* @export
* @class EphemeralTaskManagerRunner
* @implements {TaskRunner}
*/
export class EphemeralTaskManagerRunner implements TaskRunner {
private task?: CancellableTask;
private instance: TaskRunningInstance;
private definitions: TaskTypeDictionary;
private logger: Logger;
private beforeRun: Middleware['beforeRun'];
private beforeMarkRunning: Middleware['beforeMarkRunning'];
private onTaskEvent: (event: TaskRun | TaskMarkRunning) => void;
/**
* Creates an instance of EphemeralTaskManagerRunner.
* @param {Opts} opts
* @prop {Logger} logger - The task manager logger
* @prop {TaskDefinition} definition - The definition of the task being run
* @prop {EphemeralTaskInstance} instance - The record describing this particular task instance
* @prop {BeforeRunFunction} beforeRun - A function that adjusts the run context prior to running the task
* @memberof TaskManagerRunner
*/
constructor({
instance,
definitions,
logger,
beforeRun,
beforeMarkRunning,
onTaskEvent = identity,
}: Opts) {
this.instance = asPending(asConcreteInstance(sanitizeInstance(instance)));
this.definitions = definitions;
this.logger = logger;
this.beforeRun = beforeRun;
this.beforeMarkRunning = beforeMarkRunning;
this.onTaskEvent = onTaskEvent;
}
/**
* Gets the id of this task instance.
*/
public get id() {
return this.instance.task.id;
}
/**
* Gets the task type of this task instance.
*/
public get taskType() {
return this.instance.task.taskType;
}
/**
* Get the stage this TaskRunner is at
*/
public get stage() {
return this.instance.stage;
}
/**
* Gets the task defintion from the dictionary.
*/
public get definition() {
return this.definitions.get(this.taskType);
}
/**
* Gets the time at which this task will expire.
*/
public get expiration() {
return intervalFromDate(
// if the task is running, use it's started at, otherwise use the timestamp at
// which it was last updated
// this allows us to catch tasks that remain in Pending/Finalizing without being
// cleaned up
isReadyToRun(this.instance) ? this.instance.task.startedAt : this.instance.timestamp,
this.definition.timeout
)!;
}
/**
* Gets the duration of the current task run
*/
public get startedAt() {
return this.instance.task.startedAt;
}
/**
* Gets whether or not this task has run longer than its expiration setting allows.
*/
public get isExpired() {
return this.expiration < new Date();
}
public get isEphemeral() {
return true;
}
/**
* Returns a log-friendly representation of this task.
*/
public toString() {
return `${this.taskType} "${this.id}" (Ephemeral)`;
}
/**
* Runs the task, handling the task result, errors, etc, rescheduling if need
* be. NOTE: the time of applying the middleware's beforeRun is incorporated
* into the total timeout time the task in configured with. We may decide to
* start the timer after beforeRun resolves
*
* @returns {Promise<Result<SuccessfulRunResult, FailedRunResult>>}
*/
public async run(): Promise<Result<SuccessfulRunResult, FailedRunResult>> {
if (!isReadyToRun(this.instance)) {
throw new Error(
`Running ephemeral task ${this} failed as it ${
isPending(this.instance) ? `isn't ready to be ran` : `has already been ran`
}`
);
}
this.logger.debug(`Running ephemeral task ${this}`);
const apmTrans = apm.startTransaction(this.taskType, 'taskManager ephemeral run', {
childOf: this.instance.task.traceparent,
});
const modifiedContext = await this.beforeRun({
taskInstance: asConcreteInstance(this.instance.task),
});
const stopTaskTimer = startTaskTimer();
try {
this.task = this.definition.createTaskRunner(modifiedContext);
const result = await withSpan({ name: 'ephemeral run', type: 'task manager' }, () =>
this.task!.run()
);
const validatedResult = this.validateResult(result);
const processedResult = await withSpan(
{ name: 'process ephemeral result', type: 'task manager' },
() => this.processResult(validatedResult, stopTaskTimer())
);
if (apmTrans) apmTrans.end('success');
return processedResult;
} catch (err) {
this.logger.error(`Task ${this} failed: ${err}`);
// in error scenario, we can not get the RunResult
const processedResult = await withSpan(
{ name: 'process ephemeral result', type: 'task manager' },
() =>
this.processResult(
asErr({ error: err, state: modifiedContext.taskInstance.state }),
stopTaskTimer()
)
);
if (apmTrans) apmTrans.end('failure');
return processedResult;
}
}
/**
* Noop for Ephemeral tasks
*
* @returns {Promise<boolean>}
*/
public async markTaskAsRunning(): Promise<boolean> {
if (!isPending(this.instance)) {
throw new Error(
`Marking ephemeral task ${this} as running has failed as it ${
isReadyToRun(this.instance) ? `is already running` : `has already been ran`
}`
);
}
const apmTrans = apm.startTransaction('taskManager', 'taskManager markTaskAsRunning');
const now = new Date();
try {
const { taskInstance } = await this.beforeMarkRunning({
taskInstance: asConcreteInstance(this.instance.task),
});
this.instance = asReadyToRun({
...taskInstance,
status: TaskStatus.Running,
startedAt: now,
attempts: taskInstance.attempts + 1,
retryAt: null,
});
if (apmTrans) apmTrans.end('success');
this.onTaskEvent(asTaskMarkRunningEvent(this.id, asOk(this.instance.task)));
return true;
} catch (error) {
if (apmTrans) apmTrans.end('failure');
this.onTaskEvent(asTaskMarkRunningEvent(this.id, asErr(error)));
}
return false;
}
/**
* Attempts to cancel the task.
*
* @returns {Promise<void>}
*/
public async cancel() {
const { task } = this;
if (task?.cancel) {
this.task = undefined;
return task.cancel();
}
this.logger.debug(`The ephemral task ${this} is not cancellable.`);
}
private validateResult(
result?: SuccessfulRunResult | FailedRunResult | void
): Result<EphemeralSuccessfulRunResult, EphemeralFailedRunResult> {
return isFailedRunResult(result)
? asErr({ ...result, error: result.error })
: asOk(result || EMPTY_RUN_RESULT);
}
private async processResult(
result: Result<EphemeralSuccessfulRunResult, EphemeralFailedRunResult>,
taskTiming: TaskTiming
): Promise<Result<SuccessfulRunResult, FailedRunResult>> {
await eitherAsync(
result,
async ({ state }: EphemeralSuccessfulRunResult) => {
this.onTaskEvent(
asTaskRunEvent(
this.id,
asOk({
task: { ...this.instance.task, state },
persistence: TaskPersistence.Ephemeral,
result: TaskRunResult.Success,
}),
taskTiming
)
);
},
async ({ error, state }: EphemeralFailedRunResult) => {
this.onTaskEvent(
asTaskRunEvent(
this.id,
asErr({
task: { ...this.instance.task, state },
persistence: TaskPersistence.Ephemeral,
result: TaskRunResult.Failed,
error,
}),
taskTiming
)
);
}
);
return result;
}
}
function sanitizeInstance(instance: EphemeralTaskInstance): EphemeralTaskInstance {
return {
...instance,
params: instance.params || {},
state: instance.state || {},
};
}
function asConcreteInstance(instance: EphemeralTaskInstance): ConcreteTaskInstance {
return {
...instance,
attempts: 0,
retryAt: null,
};
}

View file

@ -4,6 +4,7 @@
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { EphemeralTask } from '../task';
// Unrecoverable
const CODE_UNRECOVERABLE = 'TaskManager/unrecoverable';
@ -14,6 +15,19 @@ export interface DecoratedError extends Error {
[code]?: string;
}
export class EphemeralTaskRejectedDueToCapacityError extends Error {
private _task: EphemeralTask;
constructor(message: string, task: EphemeralTask) {
super(message);
this._task = task;
}
public get task() {
return this._task;
}
}
function isTaskManagerError(error: unknown): error is DecoratedError {
return Boolean(error && (error as DecoratedError)[code]);
}
@ -26,3 +40,9 @@ export function throwUnrecoverableError(error: Error) {
(error as DecoratedError)[code] = CODE_UNRECOVERABLE;
throw error;
}
export function isEphemeralTaskRejectedDueToCapacityError(
error: Error | EphemeralTaskRejectedDueToCapacityError
) {
return Boolean(error && error instanceof EphemeralTaskRejectedDueToCapacityError);
}

View file

@ -10,7 +10,13 @@ import sinon from 'sinon';
import { secondsFromNow } from '../lib/intervals';
import { asOk, asErr } from '../lib/result_type';
import { TaskManagerRunner, TaskRunningStage, TaskRunResult } from '../task_running';
import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent, TaskRun } from '../task_events';
import {
TaskEvent,
asTaskRunEvent,
asTaskMarkRunningEvent,
TaskRun,
TaskPersistence,
} from '../task_events';
import { ConcreteTaskInstance, TaskStatus } from '../task';
import { SavedObjectsErrorHelpers } from '../../../../../src/core/server';
import moment from 'moment';
@ -854,7 +860,12 @@ describe('TaskManagerRunner', () => {
const onTaskEvent = jest.fn();
const { runner, store, instance: originalInstance } = await readyToRunStageSetup({
onTaskEvent,
instance: { id, status: TaskStatus.Running, startedAt: new Date() },
instance: {
id,
schedule: { interval: '20m' },
status: TaskStatus.Running,
startedAt: new Date(),
},
definitions: {
bar: {
title: 'Bar!',
@ -878,6 +889,7 @@ describe('TaskManagerRunner', () => {
id,
asErr({
error,
persistence: TaskPersistence.Recurring,
task: originalInstance,
result: TaskRunResult.Failed,
})
@ -1209,7 +1221,16 @@ describe('TaskManagerRunner', () => {
await runner.run();
expect(onTaskEvent).toHaveBeenCalledWith(
withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success })))
withAnyTiming(
asTaskRunEvent(
id,
asOk({
task: instance,
persistence: TaskPersistence.NonRecurring,
result: TaskRunResult.Success,
})
)
)
);
});
@ -1238,7 +1259,16 @@ describe('TaskManagerRunner', () => {
await runner.run();
expect(onTaskEvent).toHaveBeenCalledWith(
withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success })))
withAnyTiming(
asTaskRunEvent(
id,
asOk({
task: instance,
persistence: TaskPersistence.Recurring,
result: TaskRunResult.Success,
})
)
)
);
});
@ -1268,7 +1298,12 @@ describe('TaskManagerRunner', () => {
withAnyTiming(
asTaskRunEvent(
id,
asErr({ error, task: instance, result: TaskRunResult.RetryScheduled })
asErr({
error,
task: instance,
persistence: TaskPersistence.NonRecurring,
result: TaskRunResult.RetryScheduled,
})
)
)
);
@ -1304,7 +1339,12 @@ describe('TaskManagerRunner', () => {
withAnyTiming(
asTaskRunEvent(
id,
asErr({ error, task: instance, result: TaskRunResult.RetryScheduled })
asErr({
error,
task: instance,
persistence: TaskPersistence.Recurring,
result: TaskRunResult.RetryScheduled,
})
)
)
);
@ -1346,6 +1386,7 @@ describe('TaskManagerRunner', () => {
asErr({
error,
task: originalInstance,
persistence: TaskPersistence.NonRecurring,
result: TaskRunResult.Failed,
})
)

View file

@ -36,6 +36,7 @@ import {
asTaskMarkRunningEvent,
startTaskTimer,
TaskTiming,
TaskPersistence,
} from '../task_events';
import { intervalFromDate, maxIntervalFromDate } from '../lib/intervals';
import {
@ -53,7 +54,7 @@ import { TaskTypeDictionary } from '../task_type_dictionary';
import { isUnrecoverableError } from './errors';
const defaultBackoffPerFailure = 5 * 60 * 1000;
const EMPTY_RUN_RESULT: SuccessfulRunResult = { state: {} };
export const EMPTY_RUN_RESULT: SuccessfulRunResult = { state: {} };
export interface TaskRunner {
isExpired: boolean;
@ -65,6 +66,7 @@ export interface TaskRunner {
run: () => Promise<Result<SuccessfulRunResult, FailedRunResult>>;
id: string;
stage: string;
isEphemeral?: boolean;
toString: () => string;
}
@ -105,14 +107,17 @@ export enum TaskRunResult {
}
// A ConcreteTaskInstance which we *know* has a `startedAt` Date on it
type ConcreteTaskInstanceWithStartedAt = ConcreteTaskInstance & { startedAt: Date };
export type ConcreteTaskInstanceWithStartedAt = ConcreteTaskInstance & { startedAt: Date };
// The three possible stages for a Task Runner - Pending -> ReadyToRun -> Ran
type PendingTask = TaskRunning<TaskRunningStage.PENDING, ConcreteTaskInstance>;
type ReadyToRunTask = TaskRunning<TaskRunningStage.READY_TO_RUN, ConcreteTaskInstanceWithStartedAt>;
type RanTask = TaskRunning<TaskRunningStage.RAN, ConcreteTaskInstance>;
export type PendingTask = TaskRunning<TaskRunningStage.PENDING, ConcreteTaskInstance>;
export type ReadyToRunTask = TaskRunning<
TaskRunningStage.READY_TO_RUN,
ConcreteTaskInstanceWithStartedAt
>;
export type RanTask = TaskRunning<TaskRunningStage.RAN, ConcreteTaskInstance>;
type TaskRunningInstance = PendingTask | ReadyToRunTask | RanTask;
export type TaskRunningInstance = PendingTask | ReadyToRunTask | RanTask;
/**
* Runs a background task, ensures that errors are properly handled,
@ -528,6 +533,10 @@ export class TaskManagerRunner implements TaskRunner {
this.id,
asOk({
task,
persistence:
schedule || task.schedule
? TaskPersistence.Recurring
: TaskPersistence.NonRecurring,
result: await (runAt || schedule || task.schedule
? this.processResultForRecurringTask(result)
: this.processResultWhenDone()),
@ -540,7 +549,12 @@ export class TaskManagerRunner implements TaskRunner {
this.onTaskEvent(
asTaskRunEvent(
this.id,
asErr({ task, result: await this.processResultForRecurringTask(result), error }),
asErr({
task,
persistence: task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring,
result: await this.processResultForRecurringTask(result),
error,
}),
taskTiming
)
);
@ -602,20 +616,20 @@ function performanceStopMarkingTaskAsRunning() {
// in a specific place in the code might be
type InstanceOf<S extends TaskRunningStage, T> = T extends TaskRunning<S, infer I> ? I : never;
function isPending(taskRunning: TaskRunningInstance): taskRunning is PendingTask {
export function isPending(taskRunning: TaskRunningInstance): taskRunning is PendingTask {
return taskRunning.stage === TaskRunningStage.PENDING;
}
function asPending(task: InstanceOf<TaskRunningStage.PENDING, PendingTask>): PendingTask {
export function asPending(task: InstanceOf<TaskRunningStage.PENDING, PendingTask>): PendingTask {
return {
timestamp: new Date(),
stage: TaskRunningStage.PENDING,
task,
};
}
function isReadyToRun(taskRunning: TaskRunningInstance): taskRunning is ReadyToRunTask {
export function isReadyToRun(taskRunning: TaskRunningInstance): taskRunning is ReadyToRunTask {
return taskRunning.stage === TaskRunningStage.READY_TO_RUN;
}
function asReadyToRun(
export function asReadyToRun(
task: InstanceOf<TaskRunningStage.READY_TO_RUN, ReadyToRunTask>
): ReadyToRunTask {
return {
@ -624,7 +638,7 @@ function asReadyToRun(
task,
};
}
function asRan(task: InstanceOf<TaskRunningStage.RAN, RanTask>): RanTask {
export function asRan(task: InstanceOf<TaskRunningStage.RAN, RanTask>): RanTask {
return {
timestamp: new Date(),
stage: TaskRunningStage.RAN,

View file

@ -12,6 +12,7 @@ const createTaskSchedulingMock = () => {
ensureScheduled: jest.fn(),
schedule: jest.fn(),
runNow: jest.fn(),
ephemeralRunNow: jest.fn(),
} as unknown) as jest.Mocked<TaskScheduling>;
};

View file

@ -15,6 +15,7 @@ import {
asTaskClaimEvent,
asTaskRunRequestEvent,
TaskClaimErrorType,
TaskPersistence,
} from './task_events';
import { TaskLifecycleEvent } from './polling_lifecycle';
import { taskPollingLifecycleMock } from './polling_lifecycle.mock';
@ -26,6 +27,11 @@ import { taskStoreMock } from './task_store.mock';
import { TaskRunResult } from './task_running';
import { mockLogger } from './test_utils';
import { TaskTypeDictionary } from './task_type_dictionary';
import { ephemeralTaskLifecycleMock } from './ephemeral_task_lifecycle.mock';
jest.mock('uuid', () => ({
v4: () => 'v4uuid',
}));
jest.mock('elastic-apm-node', () => ({
currentTraceparent: 'parent',
@ -41,6 +47,8 @@ describe('TaskScheduling', () => {
logger: mockLogger(),
middleware: createInitialMiddleware(),
definitions,
ephemeralTaskLifecycle: ephemeralTaskLifecycleMock.create({}),
taskManagerId: '',
};
definitions.registerTaskDefinitions({
@ -137,7 +145,12 @@ describe('TaskScheduling', () => {
const result = taskScheduling.runNow(id);
const task = mockTask({ id });
events$.next(asTaskRunEvent(id, asOk({ task, result: TaskRunResult.Success })));
events$.next(
asTaskRunEvent(
id,
asOk({ task, result: TaskRunResult.Success, persistence: TaskPersistence.Recurring })
)
);
return expect(result).resolves.toEqual({ id });
});
@ -163,6 +176,7 @@ describe('TaskScheduling', () => {
task,
error: new Error('some thing gone wrong'),
result: TaskRunResult.Failed,
persistence: TaskPersistence.Recurring,
})
)
);
@ -393,7 +407,14 @@ describe('TaskScheduling', () => {
events$.next(asTaskClaimEvent(id, asOk(task)));
events$.next(asTaskClaimEvent(differentTask, asOk(otherTask)));
events$.next(
asTaskRunEvent(differentTask, asOk({ task: otherTask, result: TaskRunResult.Success }))
asTaskRunEvent(
differentTask,
asOk({
task: otherTask,
result: TaskRunResult.Success,
persistence: TaskPersistence.Recurring,
})
)
);
events$.next(
@ -403,6 +424,7 @@ describe('TaskScheduling', () => {
task,
error: new Error('some thing gone wrong'),
result: TaskRunResult.Failed,
persistence: TaskPersistence.Recurring,
})
)
);
@ -411,6 +433,97 @@ describe('TaskScheduling', () => {
`[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]`
);
});
test('runs a task ephemerally', async () => {
const ephemeralEvents$ = new Subject<TaskLifecycleEvent>();
const ephemeralTask = mockTask({
state: {
foo: 'bar',
},
});
const customEphemeralTaskLifecycleMock = ephemeralTaskLifecycleMock.create({
events$: ephemeralEvents$,
});
customEphemeralTaskLifecycleMock.attemptToRun.mockImplementation((value) => {
return {
tag: 'ok',
value,
};
});
const middleware = createInitialMiddleware();
middleware.beforeSave = jest.fn().mockImplementation(async () => {
return { taskInstance: ephemeralTask };
});
const taskScheduling = new TaskScheduling({
...taskSchedulingOpts,
middleware,
ephemeralTaskLifecycle: customEphemeralTaskLifecycleMock,
});
const result = taskScheduling.ephemeralRunNow(ephemeralTask);
ephemeralEvents$.next(
asTaskRunEvent(
'v4uuid',
asOk({
task: {
...ephemeralTask,
id: 'v4uuid',
},
result: TaskRunResult.Success,
persistence: TaskPersistence.Ephemeral,
})
)
);
expect(result).resolves.toEqual({ id: 'v4uuid', state: { foo: 'bar' } });
});
test('rejects ephemeral task if lifecycle returns an error', async () => {
const ephemeralEvents$ = new Subject<TaskLifecycleEvent>();
const ephemeralTask = mockTask({
state: {
foo: 'bar',
},
});
const customEphemeralTaskLifecycleMock = ephemeralTaskLifecycleMock.create({
events$: ephemeralEvents$,
});
customEphemeralTaskLifecycleMock.attemptToRun.mockImplementation((value) => {
return asErr(value);
});
const middleware = createInitialMiddleware();
middleware.beforeSave = jest.fn().mockImplementation(async () => {
return { taskInstance: ephemeralTask };
});
const taskScheduling = new TaskScheduling({
...taskSchedulingOpts,
middleware,
ephemeralTaskLifecycle: customEphemeralTaskLifecycleMock,
});
const result = taskScheduling.ephemeralRunNow(ephemeralTask);
ephemeralEvents$.next(
asTaskRunEvent(
'v4uuid',
asOk({
task: {
...ephemeralTask,
id: 'v4uuid',
},
result: TaskRunResult.Failed,
persistence: TaskPersistence.Ephemeral,
})
)
);
expect(result).rejects.toMatchInlineSnapshot(
`[Error: Ephemeral Task of type foo was rejected]`
);
});
});
});

View file

@ -5,14 +5,17 @@
* 2.0.
*/
import { filter } from 'rxjs/operators';
import { filter, take } from 'rxjs/operators';
import { pipe } from 'fp-ts/lib/pipeable';
import { Option, map as mapOptional, getOrElse, isSome } from 'fp-ts/lib/Option';
import uuid from 'uuid';
import { pick } from 'lodash';
import { merge, Subject } from 'rxjs';
import agent from 'elastic-apm-node';
import { Logger } from '../../../../src/core/server';
import { asOk, either, map, mapErr, promiseResult } from './lib/result_type';
import { asOk, either, map, mapErr, promiseResult, isErr } from './lib/result_type';
import {
isTaskRunEvent,
isTaskClaimEvent,
@ -32,11 +35,14 @@ import {
TaskLifecycle,
TaskLifecycleResult,
TaskStatus,
EphemeralTask,
} from './task';
import { TaskStore } from './task_store';
import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields';
import { TaskLifecycleEvent, TaskPollingLifecycle } from './polling_lifecycle';
import { TaskTypeDictionary } from './task_type_dictionary';
import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle';
import { EphemeralTaskRejectedDueToCapacityError } from './task_running';
const VERSION_CONFLICT_STATUS = 409;
@ -44,20 +50,25 @@ export interface TaskSchedulingOpts {
logger: Logger;
taskStore: TaskStore;
taskPollingLifecycle: TaskPollingLifecycle;
ephemeralTaskLifecycle: EphemeralTaskLifecycle;
middleware: Middleware;
definitions: TaskTypeDictionary;
taskManagerId: string;
}
interface RunNowResult {
id: string;
export interface RunNowResult {
id: ConcreteTaskInstance['id'];
state?: ConcreteTaskInstance['state'];
}
export class TaskScheduling {
private store: TaskStore;
private taskPollingLifecycle: TaskPollingLifecycle;
private ephemeralTaskLifecycle: EphemeralTaskLifecycle;
private logger: Logger;
private middleware: Middleware;
private definitions: TaskTypeDictionary;
private taskManagerId: string;
/**
* Initializes the task manager, preventing any further addition of middleware,
@ -68,8 +79,10 @@ export class TaskScheduling {
this.logger = opts.logger;
this.middleware = opts.middleware;
this.taskPollingLifecycle = opts.taskPollingLifecycle;
this.ephemeralTaskLifecycle = opts.ephemeralTaskLifecycle;
this.store = opts.taskStore;
this.definitions = opts.definitions;
this.taskManagerId = opts.taskManagerId;
}
/**
@ -100,11 +113,67 @@ export class TaskScheduling {
*/
public async runNow(taskId: string): Promise<RunNowResult> {
return new Promise(async (resolve, reject) => {
this.awaitTaskRunResult(taskId).then(resolve).catch(reject);
this.awaitTaskRunResult(taskId)
// don't expose state on runNow
.then(({ id }) => resolve({ id }))
.catch(reject);
this.taskPollingLifecycle.attemptToRun(taskId);
});
}
/**
* Run an ad-hoc task in memory without persisting it into ES or distributing the load across the cluster.
*
* @param task - The ephemeral task being queued.
* @returns {Promise<ConcreteTaskInstance>}
*/
public async ephemeralRunNow(
task: EphemeralTask,
options?: Record<string, unknown>
): Promise<RunNowResult> {
const id = uuid.v4();
const { taskInstance: modifiedTask } = await this.middleware.beforeSave({
...options,
taskInstance: task,
});
return new Promise(async (resolve, reject) => {
// The actual promise returned from this function is resolved after the awaitTaskRunResult promise resolves.
// However, we do not wait to await this promise, as we want later execution to happen in parallel.
// The awaitTaskRunResult promise is resolved once the ephemeral task is successfully executed (technically, when a TaskEventType.TASK_RUN is emitted with the same id).
// However, the ephemeral task won't even get into the queue until the subsequent this.ephemeralTaskLifecycle.attemptToRun is called (which puts it in the queue).
// The reason for all this confusion? Timing.
// In the this.ephemeralTaskLifecycle.attemptToRun, it's possible that the ephemeral task is put into the queue and processed before this function call returns anything.
// If that happens, putting the awaitTaskRunResult after would just hang because the task already completed. We need to listen for the completion before we add it to the queue to avoid this possibility.
const { cancel, resolveOnCancel } = cancellablePromise();
this.awaitTaskRunResult(id, resolveOnCancel)
.then((arg: RunNowResult) => {
resolve(arg);
})
.catch((err: Error) => {
reject(err);
});
const attemptToRunResult = this.ephemeralTaskLifecycle.attemptToRun({
id,
scheduledAt: new Date(),
runAt: new Date(),
status: TaskStatus.Idle,
ownerId: this.taskManagerId,
...modifiedTask,
});
if (isErr(attemptToRunResult)) {
cancel();
reject(
new EphemeralTaskRejectedDueToCapacityError(
`Ephemeral Task of type ${task.taskType} was rejected`,
task
)
);
}
});
}
/**
* Schedules a task with an Id
*
@ -125,10 +194,13 @@ export class TaskScheduling {
}
}
private async awaitTaskRunResult(taskId: string): Promise<RunNowResult> {
private awaitTaskRunResult(taskId: string, cancel?: Promise<void>): Promise<RunNowResult> {
return new Promise((resolve, reject) => {
const subscription = this.taskPollingLifecycle.events
// listen for all events related to the current task
// listen for all events related to the current task
const subscription = merge(
this.taskPollingLifecycle.events,
this.ephemeralTaskLifecycle.events
)
.pipe(filter(({ id }: TaskLifecycleEvent) => id === taskId))
.subscribe((taskEvent: TaskLifecycleEvent) => {
if (isTaskClaimEvent(taskEvent)) {
@ -161,7 +233,7 @@ export class TaskScheduling {
// resolve if the task has run sucessfully
if (isTaskRunEvent(taskEvent)) {
subscription.unsubscribe();
resolve({ id: (taskInstance as RanTask).task.id });
resolve(pick((taskInstance as RanTask).task, ['id', 'state']));
}
},
async (errorResult: ErrResultOf<TaskLifecycleEvent>) => {
@ -182,6 +254,12 @@ export class TaskScheduling {
);
}
});
if (cancel) {
cancel.then(() => {
subscription.unsubscribe();
});
}
});
}
@ -216,3 +294,14 @@ export class TaskScheduling {
);
}
}
const cancellablePromise = () => {
const boolStream = new Subject<boolean>();
return {
cancel: () => boolStream.next(true),
resolveOnCancel: boolStream
.pipe(take(1))
.toPromise()
.then(() => {}),
};
};

View file

@ -0,0 +1,8 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export { registerTaskManagerUsageCollector } from './task_manager_usage_collector';

View file

@ -0,0 +1,172 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Subject } from 'rxjs';
import { merge } from 'lodash';
import { loggingSystemMock } from 'src/core/server/mocks';
import {
Collector,
createCollectorFetchContextWithKibanaMock,
createUsageCollectionSetupMock,
} from 'src/plugins/usage_collection/server/mocks';
import { HealthStatus } from '../monitoring';
import { MonitoredHealth } from '../routes/health';
import { TaskPersistence } from '../task_events';
import { registerTaskManagerUsageCollector } from './task_manager_usage_collector';
import { sleep } from '../test_utils';
describe('registerTaskManagerUsageCollector', () => {
let collector: Collector<unknown>;
const logger = loggingSystemMock.createLogger();
it('should report telemetry on the ephemeral queue', async () => {
const monitoringStats$ = new Subject<MonitoredHealth>();
const usageCollectionMock = createUsageCollectionSetupMock();
const fetchContext = createCollectorFetchContextWithKibanaMock();
usageCollectionMock.makeUsageCollector.mockImplementation((config) => {
collector = new Collector(logger, config);
return createUsageCollectionSetupMock().makeUsageCollector(config);
});
registerTaskManagerUsageCollector(usageCollectionMock, monitoringStats$, true, 10);
const mockHealth = getMockMonitoredHealth();
monitoringStats$.next(mockHealth);
await sleep(1001);
expect(usageCollectionMock.makeUsageCollector).toBeCalled();
const telemetry = await collector.fetch(fetchContext);
expect(telemetry).toMatchObject({
ephemeral_tasks_enabled: true,
ephemeral_request_capacity: 10,
ephemeral_stats: {
status: mockHealth.stats.ephemeral?.status,
load: mockHealth.stats.ephemeral?.value.load,
executions_per_cycle: mockHealth.stats.ephemeral?.value.executionsPerCycle,
queued_tasks: mockHealth.stats.ephemeral?.value.queuedTasks,
},
});
});
});
function getMockMonitoredHealth(overrides = {}): MonitoredHealth {
const stub: MonitoredHealth = {
id: '1',
status: HealthStatus.OK,
timestamp: new Date().toISOString(),
last_update: new Date().toISOString(),
stats: {
configuration: {
timestamp: new Date().toISOString(),
status: HealthStatus.OK,
value: {
max_workers: 10,
poll_interval: 3000,
max_poll_inactivity_cycles: 10,
request_capacity: 1000,
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_running_average_window: 50,
monitored_task_execution_thresholds: {
default: {
error_threshold: 90,
warn_threshold: 80,
},
custom: {},
},
},
},
workload: {
timestamp: new Date().toISOString(),
status: HealthStatus.OK,
value: {
count: 4,
task_types: {
actions_telemetry: { count: 2, status: { idle: 2 } },
alerting_telemetry: { count: 1, status: { idle: 1 } },
session_cleanup: { count: 1, status: { idle: 1 } },
},
schedule: [],
overdue: 0,
overdue_non_recurring: 0,
estimatedScheduleDensity: [],
non_recurring: 20,
owner_ids: 2,
estimated_schedule_density: [],
capacity_requirements: {
per_minute: 150,
per_hour: 360,
per_day: 820,
},
},
},
ephemeral: {
status: HealthStatus.OK,
timestamp: new Date().toISOString(),
value: {
load: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
executionsPerCycle: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
queuedTasks: {
p50: 4,
p90: 6,
p95: 6,
p99: 6,
},
},
},
runtime: {
timestamp: new Date().toISOString(),
status: HealthStatus.OK,
value: {
drift: {
p50: 1000,
p90: 2000,
p95: 2500,
p99: 3000,
},
drift_by_type: {},
load: {
p50: 1000,
p90: 2000,
p95: 2500,
p99: 3000,
},
execution: {
duration: {},
duration_by_persistence: {},
persistence: {
[TaskPersistence.Recurring]: 10,
[TaskPersistence.NonRecurring]: 10,
[TaskPersistence.Ephemeral]: 10,
},
result_frequency_percent_as_number: {},
},
polling: {
last_successful_poll: new Date().toISOString(),
duration: [500, 400, 3000],
claim_conflicts: [0, 100, 75],
claim_mismatches: [0, 100, 75],
result_frequency_percent_as_number: [
'NoTasksClaimed',
'NoTasksClaimed',
'NoTasksClaimed',
],
},
},
},
},
};
return (merge(stub, overrides) as unknown) as MonitoredHealth;
}

View file

@ -0,0 +1,96 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { Observable } from 'rxjs';
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
import { MonitoredHealth } from '../routes/health';
import { TaskManagerUsage } from './types';
export function createTaskManagerUsageCollector(
usageCollection: UsageCollectionSetup,
monitoringStats$: Observable<MonitoredHealth>,
ephemeralTasksEnabled: boolean,
ephemeralRequestCapacity: number
) {
let lastMonitoredHealth: MonitoredHealth | null = null;
monitoringStats$.subscribe((health) => {
lastMonitoredHealth = health;
});
return usageCollection.makeUsageCollector<TaskManagerUsage>({
type: 'task_manager',
isReady: async () => {
return Boolean(lastMonitoredHealth);
},
fetch: async () => {
return {
ephemeral_tasks_enabled: ephemeralTasksEnabled,
ephemeral_request_capacity: ephemeralRequestCapacity,
ephemeral_stats: {
status: lastMonitoredHealth?.stats.ephemeral?.status ?? '',
queued_tasks: {
p50: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p50 ?? 0,
p90: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p90 ?? 0,
p95: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p95 ?? 0,
p99: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p99 ?? 0,
},
load: {
p50: lastMonitoredHealth?.stats.ephemeral?.value.load.p50 ?? 0,
p90: lastMonitoredHealth?.stats.ephemeral?.value.load.p90 ?? 0,
p95: lastMonitoredHealth?.stats.ephemeral?.value.load.p95 ?? 0,
p99: lastMonitoredHealth?.stats.ephemeral?.value.load.p99 ?? 0,
},
executions_per_cycle: {
p50: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p50 ?? 0,
p90: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p90 ?? 0,
p95: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p95 ?? 0,
p99: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p99 ?? 0,
},
},
};
},
schema: {
ephemeral_tasks_enabled: { type: 'boolean' },
ephemeral_request_capacity: { type: 'short' },
ephemeral_stats: {
status: { type: 'keyword' },
queued_tasks: {
p50: { type: 'long' },
p90: { type: 'long' },
p95: { type: 'long' },
p99: { type: 'long' },
},
load: {
p50: { type: 'long' },
p90: { type: 'long' },
p95: { type: 'long' },
p99: { type: 'long' },
},
executions_per_cycle: {
p50: { type: 'long' },
p90: { type: 'long' },
p95: { type: 'long' },
p99: { type: 'long' },
},
},
},
});
}
export function registerTaskManagerUsageCollector(
usageCollection: UsageCollectionSetup,
monitoringStats$: Observable<MonitoredHealth>,
ephemeralTasksEnabled: boolean,
ephemeralRequestCapacity: number
) {
const collector = createTaskManagerUsageCollector(
usageCollection,
monitoringStats$,
ephemeralTasksEnabled,
ephemeralRequestCapacity
);
usageCollection.registerCollector(collector);
}

View file

@ -0,0 +1,32 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export interface TaskManagerUsage {
ephemeral_tasks_enabled: boolean;
ephemeral_request_capacity: number;
ephemeral_stats: {
status: string;
queued_tasks: {
p50: number;
p90: number;
p95: number;
p99: number;
};
load: {
p50: number;
p90: number;
p95: number;
p99: number;
};
executions_per_cycle: {
p50: number;
p90: number;
p95: number;
p99: number;
};
};
}

View file

@ -15,5 +15,6 @@
"references": [
{ "path": "../../../src/core/tsconfig.json" },
{ "path": "../../../src/plugins/kibana_utils/tsconfig.json" },
{ "path": "../../../src/plugins/usage_collection/tsconfig.json" },
]
}

View file

@ -5840,6 +5840,71 @@
}
}
},
"task_manager": {
"properties": {
"ephemeral_tasks_enabled": {
"type": "boolean"
},
"ephemeral_request_capacity": {
"type": "short"
},
"ephemeral_stats": {
"properties": {
"status": {
"type": "keyword"
},
"queued_tasks": {
"properties": {
"p50": {
"type": "long"
},
"p90": {
"type": "long"
},
"p95": {
"type": "long"
},
"p99": {
"type": "long"
}
}
},
"load": {
"properties": {
"p50": {
"type": "long"
},
"p90": {
"type": "long"
},
"p95": {
"type": "long"
},
"p99": {
"type": "long"
}
}
},
"executions_per_cycle": {
"properties": {
"p50": {
"type": "long"
},
"p90": {
"type": "long"
},
"p95": {
"type": "long"
},
"p99": {
"type": "long"
}
}
}
}
}
}
},
"upgrade-assistant-telemetry": {
"properties": {
"features": {

View file

@ -42,6 +42,7 @@ const enabledActionTypes = [
'test.failing',
'test.index-record',
'test.noop',
'test.delayed',
'test.rate-limit',
'test.throw',
];
@ -158,6 +159,7 @@ export function createTestConfig(name: string, options: CreateTestConfigOptions)
...actionsProxyUrl,
...customHostSettings,
'--xpack.eventLog.logEntries=true',
'--xpack.task_manager.ephemeral_tasks.enabled=false',
`--xpack.actions.preconfiguredAlertHistoryEsIndex=${preconfiguredAlertHistoryEsIndex}`,
`--xpack.actions.preconfigured=${JSON.stringify({
'my-slack1': {

View file

@ -34,6 +34,7 @@ export function defineActionTypes(
actions.registerType(noopActionType);
actions.registerType(throwActionType);
actions.registerType(getIndexRecordActionType());
actions.registerType(getDelayedActionType());
actions.registerType(getFailingActionType());
actions.registerType(getRateLimitedActionType());
actions.registerType(getAuthorizationActionType(core));
@ -81,6 +82,40 @@ function getIndexRecordActionType() {
return result;
}
function getDelayedActionType() {
const paramsSchema = schema.object({
delayInMs: schema.number({ defaultValue: 1000 }),
});
type ParamsType = TypeOf<typeof paramsSchema>;
const configSchema = schema.object({
unencrypted: schema.string(),
});
type ConfigType = TypeOf<typeof configSchema>;
const secretsSchema = schema.object({
encrypted: schema.string(),
});
type SecretsType = TypeOf<typeof secretsSchema>;
const result: ActionType<ConfigType, SecretsType, ParamsType> = {
id: 'test.delayed',
name: 'Test: Delayed',
minimumLicenseRequired: 'gold',
validate: {
params: paramsSchema,
config: configSchema,
secrets: secretsSchema,
},
async executor({ config, secrets, params, services, actionId }) {
await new Promise((resolve) => {
setTimeout(() => {
resolve(true);
}, params.delayInMs);
});
return { status: 'ok', actionId };
},
};
return result;
}
function getFailingActionType() {
const paramsSchema = schema.object({
index: schema.string(),

View file

@ -52,6 +52,7 @@ export class FixturePlugin implements Plugin<void, void, FixtureSetupDeps, Fixtu
'test.never-firing',
'test.failing',
'test.authorization',
'test.delayed',
'test.validation',
'test.onlyContextVariables',
'test.onlyStateVariables',
@ -75,6 +76,7 @@ export class FixturePlugin implements Plugin<void, void, FixtureSetupDeps, Fixtu
'test.cumulative-firing',
'test.never-firing',
'test.failing',
'test.delayed',
'test.authorization',
'test.validation',
'test.onlyContextVariables',
@ -103,6 +105,7 @@ export class FixturePlugin implements Plugin<void, void, FixtureSetupDeps, Fixtu
'test.never-firing',
'test.failing',
'test.authorization',
'test.delayed',
'test.validation',
'test.onlyContextVariables',
'test.onlyStateVariables',

View file

@ -65,29 +65,39 @@ export class ESTestIndexTool {
}
}
async search(source: string, reference: string) {
return await this.es.search({
async search(source: string, reference?: string) {
const body = reference
? {
query: {
bool: {
must: [
{
term: {
source,
},
},
{
term: {
reference,
},
},
],
},
},
}
: {
query: {
term: {
source,
},
},
};
const params = {
index: this.index,
size: 1000,
body: {
query: {
bool: {
must: [
{
term: {
source,
},
},
{
term: {
reference,
},
},
],
},
},
},
});
body,
};
return await this.es.search(params);
}
async waitForDocs(source: string, reference: string, numDocs: number = 1) {

View file

@ -501,6 +501,19 @@ instanceStateValue: true
})
);
// Enqueue non ephemerically so we the latter code can query properly
const enqueueResponse = await supertest
.post(`${getUrlPrefix(space.id)}/api/alerts_fixture/${createdAction.id}/enqueue_action`)
.set('kbn-xsrf', 'foo')
.send({
params: {
reference,
index: ES_TEST_INDEX_NAME,
retryAt: retryDate.getTime(),
},
});
expect(enqueueResponse.status).to.eql(204);
switch (scenario.id) {
case 'no_kibana_privileges at space1':
case 'global_read at space1':

View file

@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import expect from '@kbn/expect';
import { flatten } from 'lodash';
import { Spaces } from '../../scenarios';
import {
getUrlPrefix,
ObjectRemover,
getTestAlertData,
getEventLog,
ESTestIndexTool,
ES_TEST_INDEX_NAME,
} from '../../../common/lib';
import { FtrProviderContext } from '../../../common/ftr_provider_context';
import { IValidatedEvent } from '../../../../../plugins/event_log/server';
import { DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT } from '../../../../../plugins/alerting/server/config';
// eslint-disable-next-line import/no-default-export
export default function createNotifyWhenTests({ getService }: FtrProviderContext) {
const supertest = getService('supertest');
const retry = getService('retry');
const es = getService('legacyEs');
const esTestIndexTool = new ESTestIndexTool(es, retry);
describe('ephemeral', () => {
const objectRemover = new ObjectRemover(supertest);
beforeEach(async () => {
await esTestIndexTool.destroy();
await esTestIndexTool.setup();
});
afterEach(async () => await esTestIndexTool.destroy());
after(async () => {
await objectRemover.removeAll();
});
it('should execute all requests, when some will be ephemeral and some not', async () => {
const nonEphemeralTasks = 3;
const actionPromises = [];
for (let i = 0; i < DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT + nonEphemeralTasks; i++) {
actionPromises.push(
supertest
.post(`${getUrlPrefix(Spaces.space1.id)}/api/actions/connector`)
.set('kbn-xsrf', 'foo')
.send({
name: `My action${i}`,
connector_type_id: 'test.index-record',
config: {
unencrypted: `This value shouldn't get encrypted`,
},
secrets: {
encrypted: 'This value should be encrypted',
},
})
.expect(200)
);
}
const createdActions = await Promise.all(actionPromises);
createdActions.forEach((createdAction) =>
objectRemover.add(Spaces.space1.id, createdAction.body.id, 'action', 'actions')
);
const pattern = {
instance: [true, true, true, false, true, true],
};
const alertData = getTestAlertData({
rule_type_id: 'test.patternFiring',
params: { pattern },
schedule: { interval: '1m' },
throttle: null,
notify_when: 'onActiveAlert',
actions: createdActions.map((createdAction) => {
return {
id: createdAction.body.id,
group: 'default',
params: {
index: ES_TEST_INDEX_NAME,
reference: '',
message: 'test message',
},
};
}),
});
const { body: createdAlert } = await supertest
.post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`)
.set('kbn-xsrf', 'foo')
.send(alertData)
.expect(200);
objectRemover.add(Spaces.space1.id, createdAlert.id, 'rule', 'alerting');
const events = flatten(
await Promise.all(
createdActions.map(async (createdAction) => {
return await retry.try(async () => {
return await getEventLog({
getService,
spaceId: Spaces.space1.id,
type: 'action',
id: createdAction.body.id,
provider: 'actions',
actions: new Map([['execute', { gte: 1 }]]),
});
});
})
)
);
const executeActionsEvents = getEventsByAction(events, 'execute');
expect(executeActionsEvents.length).equal(
nonEphemeralTasks + DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT
);
const searchResult = await esTestIndexTool.search('action:test.index-record');
expect(searchResult.hits.total.value).equal(
nonEphemeralTasks + DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT
);
});
});
}
function getEventsByAction(events: IValidatedEvent[], action: string) {
return events.filter((event) => event?.event?.action === action);
}

View file

@ -37,6 +37,7 @@ export default function alertingTests({ loadTestFile, getService }: FtrProviderC
loadTestFile(require.resolve('./builtin_alert_types'));
loadTestFile(require.resolve('./mustache_templates.ts'));
loadTestFile(require.resolve('./notify_when'));
loadTestFile(require.resolve('./ephemeral'));
loadTestFile(require.resolve('./event_log_alerts'));
// note that this test will destroy existing spaces

View file

@ -42,6 +42,8 @@ export default async function ({ readConfigFile }: FtrConfigProviderContext) {
'--xpack.eventLog.logEntries=true',
'--xpack.eventLog.indexEntries=true',
'--xpack.task_manager.monitored_aggregated_stats_refresh_rate=5000',
'--xpack.task_manager.ephemeral_tasks.enabled=false',
'--xpack.task_manager.ephemeral_tasks.request_capacity=100',
...plugins.map(
(pluginDir) => `--plugin-path=${path.resolve(__dirname, 'plugins', pluginDir)}`
),

View file

@ -112,6 +112,45 @@ export function initRoutes(
}
);
router.post(
{
path: `/api/sample_tasks/ephemeral_run_now`,
validate: {
body: schema.object({
task: schema.object({
taskType: schema.string(),
state: schema.recordOf(schema.string(), schema.any()),
params: schema.recordOf(schema.string(), schema.any()),
}),
}),
},
},
async function (
context: RequestHandlerContext,
req: KibanaRequest<
any,
any,
{
task: {
taskType: string;
params: Record<string, any>;
state: Record<string, any>;
};
},
any
>,
res: KibanaResponseFactory
): Promise<IKibanaResponse<any>> {
const { task } = req.body;
try {
const taskManager = await taskManagerStart;
return res.ok({ body: await taskManager.ephemeralRunNow(task) });
} catch (err) {
return res.ok({ body: { task, error: `${err}` } });
}
}
);
router.post(
{
path: `/api/sample_tasks/ensure_scheduled`,

View file

@ -15,6 +15,7 @@ import {
TaskManagerSetupContract,
TaskManagerStartContract,
ConcreteTaskInstance,
EphemeralTask,
} from '../../../../../plugins/task_manager/server';
import { DEFAULT_MAX_WORKERS } from '../../../../../plugins/task_manager/server/config';
@ -38,6 +39,8 @@ export class SampleTaskManagerFixturePlugin
const taskTestingEvents = new EventEmitter();
taskTestingEvents.setMaxListeners(DEFAULT_MAX_WORKERS * 2);
const tmStart = this.taskManagerStart;
const defaultSampleTaskConfig = {
timeout: '1m',
// This task allows tests to specify its behavior (whether it reschedules itself, whether it errors, etc)
@ -155,6 +158,85 @@ export class SampleTaskManagerFixturePlugin
},
});
const taskWithTiming = {
createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({
async run() {
const stopTiming = startTaskTimer();
const {
params: { delay = 0 },
state: { timings = [] },
} = taskInstance;
if (delay) {
await new Promise((resolve) => {
setTimeout(resolve, delay);
});
}
return {
state: { timings: [...timings, stopTiming()] },
};
},
}),
};
taskManager.registerTaskDefinitions({
timedTask: {
title: 'Task With Tracked Timings',
timeout: '60s',
description: 'A task that tracks its execution timing.',
...taskWithTiming,
},
timedTaskWithSingleConcurrency: {
title: 'Task With Tracked Timings and Single Concurrency',
maxConcurrency: 1,
timeout: '60s',
description:
'A task that can only have one concurrent instance and tracks its execution timing.',
...taskWithTiming,
},
timedTaskWithLimitedConcurrency: {
title: 'Task With Tracked Timings and Limited Concurrency',
maxConcurrency: 2,
timeout: '60s',
description:
'A task that can only have two concurrent instance and tracks its execution timing.',
...taskWithTiming,
},
taskWhichExecutesOtherTasksEphemerally: {
title: 'Task Which Executes Other Tasks Ephemerally',
description: 'A sample task used to validate how ephemeral tasks are executed.',
maxAttempts: 1,
timeout: '60s',
createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({
async run() {
const {
params: { tasks = [] },
} = taskInstance;
const tm = await tmStart;
const executions = await Promise.all(
(tasks as EphemeralTask[]).map(async (task) => {
return tm
.ephemeralRunNow(task)
.then((result) => ({
result,
}))
.catch((error) => ({
error,
}));
})
);
return {
state: { executions },
};
},
}),
},
});
taskManager.addMiddleware({
async beforeSave({ taskInstance, ...opts }) {
const modifiedInstance = {
@ -213,3 +295,8 @@ const once = function (emitter: EventEmitter, event: string): Promise<Record<str
emitter.once(event, (data) => resolve(data || {}));
});
};
function startTaskTimer(): () => { start: number; stop: number } {
const start = Date.now();
return () => ({ start, stop: Date.now() });
}

View file

@ -5,7 +5,7 @@
* 2.0.
*/
import _ from 'lodash';
import { random, times } from 'lodash';
import expect from '@kbn/expect';
import type { estypes } from '@elastic/elasticsearch';
import url from 'url';
@ -183,6 +183,20 @@ export default function ({ getService }: FtrProviderContext) {
.then((response) => response.body);
}
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// function runEphemeralTaskNow(task: {
// taskType: string;
// params: Record<string, any>;
// state: Record<string, any>;
// }) {
// return supertest
// .post('/api/sample_tasks/ephemeral_run_now')
// .set('kbn-xsrf', 'xxx')
// .send({ task })
// .expect(200)
// .then((response) => response.body);
// }
function scheduleTaskIfNotExists(task: Partial<ConcreteTaskInstance>) {
return supertest
.post('/api/sample_tasks/ensure_scheduled')
@ -225,7 +239,7 @@ export default function ({ getService }: FtrProviderContext) {
}
it('should support middleware', async () => {
const historyItem = _.random(1, 100);
const historyItem = random(1, 100);
const scheduledTask = await scheduleTask({
taskType: 'sampleTask',
@ -330,8 +344,8 @@ export default function ({ getService }: FtrProviderContext) {
});
it('should reschedule if task returns runAt', async () => {
const nextRunMilliseconds = _.random(60000, 200000);
const count = _.random(1, 20);
const nextRunMilliseconds = random(60000, 200000);
const count = random(1, 20);
const originalTask = await scheduleTask({
taskType: 'sampleTask',
@ -351,7 +365,7 @@ export default function ({ getService }: FtrProviderContext) {
});
it('should reschedule if task has an interval', async () => {
const interval = _.random(5, 200);
const interval = random(5, 200);
const intervalMilliseconds = interval * 60000;
const originalTask = await scheduleTask({
@ -372,7 +386,7 @@ export default function ({ getService }: FtrProviderContext) {
});
it('should support the deprecated interval field', async () => {
const interval = _.random(5, 200);
const interval = random(5, 200);
const intervalMilliseconds = interval * 60000;
const originalTask = await scheduleTask({
@ -471,7 +485,7 @@ export default function ({ getService }: FtrProviderContext) {
// Task Manager to use up its worker capacity
// causing tasks to pile up
await Promise.all(
_.times(DEFAULT_MAX_WORKERS + _.random(1, DEFAULT_MAX_WORKERS), () =>
times(DEFAULT_MAX_WORKERS + random(1, DEFAULT_MAX_WORKERS), () =>
scheduleTask({
taskType: 'sampleTask',
params: {
@ -887,5 +901,246 @@ export default function ({ getService }: FtrProviderContext) {
expect(scheduledTask.attempts).to.be.greaterThan(3);
});
});
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// it('should return the resulting task state when asked to run an ephemeral task now', async () => {
// const ephemeralTask = await runEphemeralTaskNow({
// taskType: 'sampleTask',
// params: {},
// state: {},
// });
// await retry.try(async () => {
// expect(
// (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id)
// .length
// ).to.eql(1);
// expect(ephemeralTask.state.count).to.eql(1);
// });
// const secondEphemeralTask = await runEphemeralTaskNow({
// taskType: 'sampleTask',
// params: {},
// // pass state from previous ephemeral run as input for the second run
// state: ephemeralTask.state,
// });
// // ensure state is cumulative
// expect(secondEphemeralTask.state.count).to.eql(2);
// await retry.try(async () => {
// // ensure new id is produced for second task execution
// expect(
// (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id)
// .length
// ).to.eql(1);
// expect(
// (await historyDocs()).filter(
// (taskDoc) => taskDoc._source.taskId === secondEphemeralTask.id
// ).length
// ).to.eql(1);
// });
// });
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// it('Epheemral task run should only run one instance of a task if its maxConcurrency is 1', async () => {
// const ephemeralTaskWithSingleConcurrency: {
// state: {
// executions: Array<{
// result: {
// id: string;
// state: {
// timings: Array<{
// start: number;
// stop: number;
// }>;
// };
// };
// }>;
// };
// } = await runEphemeralTaskNow({
// taskType: 'taskWhichExecutesOtherTasksEphemerally',
// params: {
// tasks: [
// {
// taskType: 'timedTaskWithSingleConcurrency',
// params: { delay: 1000 },
// state: {},
// },
// {
// taskType: 'timedTaskWithSingleConcurrency',
// params: { delay: 1000 },
// state: {},
// },
// {
// taskType: 'timedTaskWithSingleConcurrency',
// params: { delay: 1000 },
// state: {},
// },
// {
// taskType: 'timedTaskWithSingleConcurrency',
// params: { delay: 1000 },
// state: {},
// },
// ],
// },
// state: {},
// });
// ensureOverlappingTasksDontExceedThreshold(
// ephemeralTaskWithSingleConcurrency.state.executions,
// // make sure each task intersects with any other task
// 0
// );
// });
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// it('Ephemeral task run should only run as many instances of a task as its maxConcurrency will allow', async () => {
// const ephemeralTaskWithSingleConcurrency: {
// state: {
// executions: Array<{
// result: {
// id: string;
// state: {
// timings: Array<{
// start: number;
// stop: number;
// }>;
// };
// };
// }>;
// };
// } = await runEphemeralTaskNow({
// taskType: 'taskWhichExecutesOtherTasksEphemerally',
// params: {
// tasks: [
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// {
// taskType: 'timedTaskWithLimitedConcurrency',
// params: { delay: 100 },
// state: {},
// },
// ],
// },
// state: {},
// });
// ensureOverlappingTasksDontExceedThreshold(
// ephemeralTaskWithSingleConcurrency.state.executions,
// // make sure each task intersects with, at most, 1 other task
// 1
// );
// });
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// it('Ephemeral task executions cant exceed the max workes in Task Manager', async () => {
// const ephemeralTaskWithSingleConcurrency: {
// state: {
// executions: Array<{
// result: {
// id: string;
// state: {
// timings: Array<{
// start: number;
// stop: number;
// }>;
// };
// };
// }>;
// };
// } = await runEphemeralTaskNow({
// taskType: 'taskWhichExecutesOtherTasksEphemerally',
// params: {
// tasks: times(20, () => ({
// taskType: 'timedTask',
// params: { delay: 100 },
// state: {},
// })),
// },
// state: {},
// });
// ensureOverlappingTasksDontExceedThreshold(
// ephemeralTaskWithSingleConcurrency.state.executions,
// // make sure each task intersects with, at most, 9 other tasks (as max workes is 10)
// 9
// );
// });
});
// TODO: Add this back in with https://github.com/elastic/kibana/issues/106139
// function ensureOverlappingTasksDontExceedThreshold(
// executions: Array<{
// result: {
// id: string;
// state: {
// timings: Array<{
// start: number;
// stop: number;
// }>;
// };
// };
// }>,
// threshold: number
// ) {
// const executionRanges = executions.map((execution) => ({
// id: execution.result.id,
// range: range(
// // calculate range of milliseconds
// // in which the task was running (that should be good enough)
// execution.result.state.timings[0].start,
// execution.result.state.timings[0].stop
// ),
// }));
// const intersections = new Map<string, string[]>();
// for (const currentExecution of executionRanges) {
// for (const executionToComparteTo of executionRanges) {
// if (currentExecution.id !== executionToComparteTo.id) {
// // find all executions that intersect
// if (intersection(currentExecution.range, executionToComparteTo.range).length) {
// intersections.set(currentExecution.id, [
// ...(intersections.get(currentExecution.id) ?? []),
// executionToComparteTo.id,
// ]);
// }
// }
// }
// }
// const tooManyIntersectingTasks = [...intersections.entries()].find(
// // make sure each task intersects with, at most, threshold of other task
// ([, intersectingTasks]) => intersectingTasks.length > threshold
// );
// if (tooManyIntersectingTasks) {
// throw new Error(
// `Invalid execution found: ${tooManyIntersectingTasks[0]} overlaps with ${tooManyIntersectingTasks[1]}`
// );
// }
// }
}