diff --git a/docs/settings/alert-action-settings.asciidoc b/docs/settings/alert-action-settings.asciidoc index d1d283ca60fb..a523c2cb005a 100644 --- a/docs/settings/alert-action-settings.asciidoc +++ b/docs/settings/alert-action-settings.asciidoc @@ -207,4 +207,10 @@ Use `full` to perform hostname verification, `certificate` to skip hostname veri [[alert-settings]] ==== Alerting settings -You do not need to configure any additional settings to use alerting in {kib}. +[cols="2*<"] +|=== + +| `xpack.alerting.maxEphemeralActionsPerAlert` + | Sets the number of actions that will be executed ephemerally. To use this, enable ephemeral tasks in task manager first with <> + +|=== \ No newline at end of file diff --git a/docs/settings/task-manager-settings.asciidoc b/docs/settings/task-manager-settings.asciidoc index 7f4dbb3a96e6..fa89b7780e47 100644 --- a/docs/settings/task-manager-settings.asciidoc +++ b/docs/settings/task-manager-settings.asciidoc @@ -37,6 +37,14 @@ Task Manager runs background tasks by polling for work on an interval. You can `monitored_stats_health_verbose_log.` `warn_delayed_task_start_in_seconds` | The amount of seconds we allow a task to delay before printing a warning server log. Defaults to 60. + + | `xpack.task_manager.ephemeral_tasks.enabled` + | Enables an experimental feature that executes a limited (and configurable) number of actions in the same task as the alert which triggered them. + These action tasks will reduce the latency of the time it takes an action to run after it's triggered, but are not persisted as SavedObjects. + These non-persisted action tasks have a risk that they won't be run at all if the Kibana instance running them exits unexpectedly. Defaults to false. + + | `xpack.task_manager.ephemeral_tasks.request_capacity` + | Sets the size of the ephemeral queue defined above. Defaults to 10. |=== [float] diff --git a/x-pack/plugins/actions/server/actions_client.mock.ts b/x-pack/plugins/actions/server/actions_client.mock.ts index 3795f0131305..aa766eba92eb 100644 --- a/x-pack/plugins/actions/server/actions_client.mock.ts +++ b/x-pack/plugins/actions/server/actions_client.mock.ts @@ -21,6 +21,7 @@ const createActionsClientMock = () => { getBulk: jest.fn(), execute: jest.fn(), enqueueExecution: jest.fn(), + ephemeralEnqueuedExecution: jest.fn(), listTypes: jest.fn(), isActionTypeEnabled: jest.fn(), }; diff --git a/x-pack/plugins/actions/server/actions_client.test.ts b/x-pack/plugins/actions/server/actions_client.test.ts index 012cd1a58de7..4b600d73ab0b 100644 --- a/x-pack/plugins/actions/server/actions_client.test.ts +++ b/x-pack/plugins/actions/server/actions_client.test.ts @@ -44,6 +44,7 @@ const scopedClusterClient = elasticsearchServiceMock.createScopedClusterClient() const actionExecutor = actionExecutorMock.create(); const authorization = actionsAuthorizationMock.create(); const executionEnqueuer = jest.fn(); +const ephemeralExecutionEnqueuer = jest.fn(); const request = httpServerMock.createKibanaRequest(); const auditLogger = auditServiceMock.create().asScoped(request); @@ -77,6 +78,7 @@ beforeEach(() => { preconfiguredActions: [], actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, auditLogger, @@ -453,6 +455,7 @@ describe('create()', () => { preconfiguredActions: [], actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, }); @@ -553,6 +556,7 @@ describe('get()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -608,6 +612,7 @@ describe('get()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -724,6 +729,7 @@ describe('get()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -793,6 +799,7 @@ describe('getAll()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -930,6 +937,7 @@ describe('getAll()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -1005,6 +1013,7 @@ describe('getBulk()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ @@ -1136,6 +1145,7 @@ describe('getBulk()', () => { defaultKibanaIndex, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization: (authorization as unknown) as ActionsAuthorization, preconfiguredActions: [ diff --git a/x-pack/plugins/actions/server/actions_client.ts b/x-pack/plugins/actions/server/actions_client.ts index f8d13cdafa75..66032a7c411b 100644 --- a/x-pack/plugins/actions/server/actions_client.ts +++ b/x-pack/plugins/actions/server/actions_client.ts @@ -41,6 +41,7 @@ import { AuthorizationMode, } from './authorization/get_authorization_mode_by_source'; import { connectorAuditEvent, ConnectorAuditAction } from './lib/audit_events'; +import { RunNowResult } from '../../task_manager/server'; // We are assuming there won't be many actions. This is why we will load // all the actions in advance and assume the total count to not go over 10000. @@ -68,7 +69,8 @@ interface ConstructorOptions { unsecuredSavedObjectsClient: SavedObjectsClientContract; preconfiguredActions: PreConfiguredAction[]; actionExecutor: ActionExecutorContract; - executionEnqueuer: ExecutionEnqueuer; + executionEnqueuer: ExecutionEnqueuer; + ephemeralExecutionEnqueuer: ExecutionEnqueuer; request: KibanaRequest; authorization: ActionsAuthorization; auditLogger?: AuditLogger; @@ -88,7 +90,8 @@ export class ActionsClient { private readonly actionExecutor: ActionExecutorContract; private readonly request: KibanaRequest; private readonly authorization: ActionsAuthorization; - private readonly executionEnqueuer: ExecutionEnqueuer; + private readonly executionEnqueuer: ExecutionEnqueuer; + private readonly ephemeralExecutionEnqueuer: ExecutionEnqueuer; private readonly auditLogger?: AuditLogger; constructor({ @@ -99,6 +102,7 @@ export class ActionsClient { preconfiguredActions, actionExecutor, executionEnqueuer, + ephemeralExecutionEnqueuer, request, authorization, auditLogger, @@ -110,6 +114,7 @@ export class ActionsClient { this.preconfiguredActions = preconfiguredActions; this.actionExecutor = actionExecutor; this.executionEnqueuer = executionEnqueuer; + this.ephemeralExecutionEnqueuer = ephemeralExecutionEnqueuer; this.request = request; this.authorization = authorization; this.auditLogger = auditLogger; @@ -497,6 +502,17 @@ export class ActionsClient { return this.executionEnqueuer(this.unsecuredSavedObjectsClient, options); } + public async ephemeralEnqueuedExecution(options: EnqueueExecutionOptions): Promise { + const { source } = options; + if ( + (await getAuthorizationModeBySource(this.unsecuredSavedObjectsClient, source)) === + AuthorizationMode.RBAC + ) { + await this.authorization.ensureAuthorized('execute'); + } + return this.ephemeralExecutionEnqueuer(this.unsecuredSavedObjectsClient, options); + } + public async listTypes(): Promise { return this.actionTypeRegistry.list(); } diff --git a/x-pack/plugins/actions/server/create_execute_function.ts b/x-pack/plugins/actions/server/create_execute_function.ts index 7dcd66c711bd..bcad5f20d9ba 100644 --- a/x-pack/plugins/actions/server/create_execute_function.ts +++ b/x-pack/plugins/actions/server/create_execute_function.ts @@ -6,8 +6,13 @@ */ import { SavedObjectsClientContract } from '../../../../src/core/server'; -import { TaskManagerStartContract } from '../../task_manager/server'; -import { RawAction, ActionTypeRegistryContract, PreConfiguredAction } from './types'; +import { RunNowResult, TaskManagerStartContract } from '../../task_manager/server'; +import { + RawAction, + ActionTypeRegistryContract, + PreConfiguredAction, + ActionTaskExecutorParams, +} from './types'; import { ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE } from './constants/saved_objects'; import { ExecuteOptions as ActionExecutorOptions } from './lib/action_executor'; import { isSavedObjectExecutionSource } from './lib'; @@ -27,17 +32,17 @@ export interface ExecuteOptions extends Pick = ( unsecuredSavedObjectsClient: SavedObjectsClientContract, options: ExecuteOptions -) => Promise; +) => Promise; export function createExecutionEnqueuerFunction({ taskManager, actionTypeRegistry, isESOCanEncrypt, preconfiguredActions, -}: CreateExecuteFunctionOptions) { +}: CreateExecuteFunctionOptions): ExecutionEnqueuer { return async function execute( unsecuredSavedObjectsClient: SavedObjectsClientContract, { id, params, spaceId, source, apiKey, relatedSavedObjects }: ExecuteOptions @@ -48,18 +53,10 @@ export function createExecutionEnqueuerFunction({ ); } - const { actionTypeId, name, isMissingSecrets } = await getAction( - unsecuredSavedObjectsClient, - preconfiguredActions, - id - ); - - if (isMissingSecrets) { - throw new Error( - `Unable to execute action because no secrets are defined for the "${name}" connector.` - ); - } + const action = await getAction(unsecuredSavedObjectsClient, preconfiguredActions, id); + validateCanActionBeUsed(action); + const { actionTypeId } = action; if (!actionTypeRegistry.isActionExecutable(id, actionTypeId, { notifyUsage: true })) { actionTypeRegistry.ensureActionTypeEnabled(actionTypeId); } @@ -76,7 +73,7 @@ export function createExecutionEnqueuerFunction({ ); await taskManager.schedule({ - taskType: `actions:${actionTypeId}`, + taskType: `actions:${action.actionTypeId}`, params: { spaceId, actionTaskParamsId: actionTaskParamsRecord.id, @@ -87,6 +84,53 @@ export function createExecutionEnqueuerFunction({ }; } +export function createEphemeralExecutionEnqueuerFunction({ + taskManager, + actionTypeRegistry, + preconfiguredActions, +}: CreateExecuteFunctionOptions): ExecutionEnqueuer { + return async function execute( + unsecuredSavedObjectsClient: SavedObjectsClientContract, + { id, params, spaceId, source, apiKey }: ExecuteOptions + ): Promise { + const action = await getAction(unsecuredSavedObjectsClient, preconfiguredActions, id); + validateCanActionBeUsed(action); + + const { actionTypeId } = action; + if (!actionTypeRegistry.isActionExecutable(id, actionTypeId, { notifyUsage: true })) { + actionTypeRegistry.ensureActionTypeEnabled(actionTypeId); + } + + const taskParams: ActionTaskExecutorParams = { + spaceId, + taskParams: { + actionId: id, + // Saved Objects won't allow us to enforce unknown rather than any + // eslint-disable-next-line @typescript-eslint/no-explicit-any + params: params as Record, + ...(apiKey ? { apiKey } : {}), + }, + ...executionSourceAsSavedObjectReferences(source), + }; + + return taskManager.ephemeralRunNow({ + taskType: `actions:${action.actionTypeId}`, + params: taskParams, + state: {}, + scope: ['actions'], + }); + }; +} + +function validateCanActionBeUsed(action: PreConfiguredAction | RawAction) { + const { name, isMissingSecrets } = action; + if (isMissingSecrets) { + throw new Error( + `Unable to execute action because no secrets are defined for the "${name}" connector.` + ); + } +} + function executionSourceAsSavedObjectReferences(executionSource: ActionExecutorOptions['source']) { return isSavedObjectExecutionSource(executionSource) ? { diff --git a/x-pack/plugins/actions/server/lib/action_executor.ts b/x-pack/plugins/actions/server/lib/action_executor.ts index 9e62b123951d..5dfe56cff501 100644 --- a/x-pack/plugins/actions/server/lib/action_executor.ts +++ b/x-pack/plugins/actions/server/lib/action_executor.ts @@ -48,6 +48,7 @@ export interface TaskInfo { export interface ExecuteOptions { actionId: string; + isEphemeral?: boolean; request: KibanaRequest; params: Record; source?: ActionExecutionSource; @@ -79,6 +80,7 @@ export class ActionExecutor { params, request, source, + isEphemeral, taskInfo, relatedSavedObjects, }: ExecuteOptions): Promise> { @@ -207,6 +209,7 @@ export class ActionExecutor { params: validatedParams, config: validatedConfig, secrets: validatedSecrets, + isEphemeral, }); } catch (err) { rawResult = { diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts index 495d638951b5..722ba08a2625 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.test.ts @@ -125,6 +125,7 @@ test('executes the task by calling the executor with proper parameters', async ( expect(mockedActionExecutor.execute).toHaveBeenCalledWith({ actionId: '2', + isEphemeral: false, params: { baz: true }, relatedSavedObjects: [], request: expect.objectContaining({ @@ -250,6 +251,7 @@ test('uses API key when provided', async () => { expect(mockedActionExecutor.execute).toHaveBeenCalledWith({ actionId: '2', + isEphemeral: false, params: { baz: true }, relatedSavedObjects: [], request: expect.objectContaining({ @@ -293,6 +295,7 @@ test('uses relatedSavedObjects when provided', async () => { expect(mockedActionExecutor.execute).toHaveBeenCalledWith({ actionId: '2', + isEphemeral: false, params: { baz: true }, relatedSavedObjects: [ { @@ -334,14 +337,15 @@ test('sanitizes invalid relatedSavedObjects when provided', async () => { await taskRunner.run(); expect(mockedActionExecutor.execute).toHaveBeenCalledWith({ actionId: '2', + isEphemeral: false, params: { baz: true }, - relatedSavedObjects: [], request: expect.objectContaining({ headers: { // base64 encoded "123:abc" authorization: 'ApiKey MTIzOmFiYw==', }, }), + relatedSavedObjects: [], taskInfo: { scheduled: new Date(), }, @@ -369,6 +373,7 @@ test(`doesn't use API key when not provided`, async () => { expect(mockedActionExecutor.execute).toHaveBeenCalledWith({ actionId: '2', + isEphemeral: false, params: { baz: true }, relatedSavedObjects: [], request: expect.objectContaining({ diff --git a/x-pack/plugins/actions/server/lib/task_runner_factory.ts b/x-pack/plugins/actions/server/lib/task_runner_factory.ts index 64169de728f7..2354ea55eded 100644 --- a/x-pack/plugins/actions/server/lib/task_runner_factory.ts +++ b/x-pack/plugins/actions/server/lib/task_runner_factory.ts @@ -16,6 +16,7 @@ import { KibanaRequest, SavedObjectReference, IBasePath, + SavedObject, } from '../../../../../src/core/server'; import { ActionExecutorContract } from './action_executor'; import { ExecutorError } from './executor_error'; @@ -27,6 +28,8 @@ import { ActionTypeRegistryContract, SpaceIdToNamespaceFunction, ActionTypeExecutorResult, + ActionTaskExecutorParams, + isPersistedActionTask, } from '../types'; import { ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE } from '../constants/saved_objects'; import { asSavedObjectExecutionSource } from './action_execution_source'; @@ -78,16 +81,16 @@ export class TaskRunnerFactory { return { async run() { - const { spaceId, actionTaskParamsId } = taskInstance.params as Record; - const namespace = spaceIdToNamespace(spaceId); + const actionTaskExecutorParams = taskInstance.params as ActionTaskExecutorParams; + const { spaceId } = actionTaskExecutorParams; const { attributes: { actionId, params, apiKey, relatedSavedObjects }, references, - } = await encryptedSavedObjectsClient.getDecryptedAsInternalUser( - ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE, - actionTaskParamsId, - { namespace } + } = await getActionTaskParams( + actionTaskExecutorParams, + encryptedSavedObjectsClient, + spaceIdToNamespace ); const requestHeaders: Record = {}; @@ -119,7 +122,8 @@ export class TaskRunnerFactory { try { executorResult = await actionExecutor.execute({ params, - actionId, + actionId: actionId as string, + isEphemeral: !isPersistedActionTask(actionTaskExecutorParams), request: fakeRequest, ...getSourceFromReferences(references), taskInfo, @@ -144,26 +148,46 @@ export class TaskRunnerFactory { } // Cleanup action_task_params object now that we're done with it - try { - // If the request has reached this far we can assume the user is allowed to run clean up - // We would idealy secure every operation but in order to support clean up of legacy alerts - // we allow this operation in an unsecured manner - // Once support for legacy alert RBAC is dropped, this can be secured - await getUnsecuredSavedObjectsClient(fakeRequest).delete( - ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE, - actionTaskParamsId - ); - } catch (e) { - // Log error only, we shouldn't fail the task because of an error here (if ever there's retry logic) - logger.error( - `Failed to cleanup ${ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE} object [id="${actionTaskParamsId}"]: ${e.message}` - ); + if (isPersistedActionTask(actionTaskExecutorParams)) { + try { + // If the request has reached this far we can assume the user is allowed to run clean up + // We would idealy secure every operation but in order to support clean up of legacy alerts + // we allow this operation in an unsecured manner + // Once support for legacy alert RBAC is dropped, this can be secured + await getUnsecuredSavedObjectsClient(fakeRequest).delete( + ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE, + actionTaskExecutorParams.actionTaskParamsId + ); + } catch (e) { + // Log error only, we shouldn't fail the task because of an error here (if ever there's retry logic) + logger.error( + `Failed to cleanup ${ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE} object [id="${actionTaskExecutorParams.actionTaskParamsId}"]: ${e.message}` + ); + } } }, }; } } +async function getActionTaskParams( + executorParams: ActionTaskExecutorParams, + encryptedSavedObjectsClient: EncryptedSavedObjectsClient, + spaceIdToNamespace: SpaceIdToNamespaceFunction +): Promise, 'id' | 'type'>> { + const { spaceId } = executorParams; + const namespace = spaceIdToNamespace(spaceId); + if (isPersistedActionTask(executorParams)) { + return encryptedSavedObjectsClient.getDecryptedAsInternalUser( + ACTION_TASK_PARAMS_SAVED_OBJECT_TYPE, + executorParams.actionTaskParamsId, + { namespace } + ); + } else { + return { attributes: executorParams.taskParams, references: executorParams.references ?? [] }; + } +} + function getSourceFromReferences(references: SavedObjectReference[]) { return pipe( fromNullable(references.find((ref) => ref.name === 'source')), diff --git a/x-pack/plugins/actions/server/plugin.ts b/x-pack/plugins/actions/server/plugin.ts index 2c5287525c59..2f4b1325e3df 100644 --- a/x-pack/plugins/actions/server/plugin.ts +++ b/x-pack/plugins/actions/server/plugin.ts @@ -38,7 +38,10 @@ import { ActionsConfig, getValidatedConfig } from './config'; import { resolveCustomHosts } from './lib/custom_host_settings'; import { ActionsClient } from './actions_client'; import { ActionTypeRegistry } from './action_type_registry'; -import { createExecutionEnqueuerFunction } from './create_execute_function'; +import { + createExecutionEnqueuerFunction, + createEphemeralExecutionEnqueuerFunction, +} from './create_execute_function'; import { registerBuiltInActionTypes } from './builtin_action_types'; import { registerActionsUsageCollector } from './usage'; import { @@ -332,6 +335,12 @@ export class ActionsPlugin implements Plugin { config: Config; secrets: Secrets; params: Params; + isEphemeral?: boolean; } export interface ActionResult { @@ -132,10 +134,25 @@ export interface ActionTaskParams extends SavedObjectAttributes { apiKey?: string; } -export interface ActionTaskExecutorParams { +interface PersistedActionTaskExecutorParams { spaceId: string; actionTaskParamsId: string; } +interface EphemeralActionTaskExecutorParams { + spaceId: string; + taskParams: ActionTaskParams; + references?: SavedObjectReference[]; +} + +export type ActionTaskExecutorParams = + | PersistedActionTaskExecutorParams + | EphemeralActionTaskExecutorParams; + +export function isPersistedActionTask( + actionTask: ActionTaskExecutorParams +): actionTask is PersistedActionTaskExecutorParams { + return typeof (actionTask as PersistedActionTaskExecutorParams).actionTaskParamsId === 'string'; +} export interface ProxySettings { proxyUrl: string; diff --git a/x-pack/plugins/alerting/server/config.test.ts b/x-pack/plugins/alerting/server/config.test.ts index f7280e05b78f..a1ae77596ccb 100644 --- a/x-pack/plugins/alerting/server/config.test.ts +++ b/x-pack/plugins/alerting/server/config.test.ts @@ -19,6 +19,7 @@ describe('config validation', () => { "interval": "5m", "removalDelay": "1h", }, + "maxEphemeralActionsPerAlert": 10, } `); }); diff --git a/x-pack/plugins/alerting/server/config.ts b/x-pack/plugins/alerting/server/config.ts index e42955b385bf..47ef451ceab9 100644 --- a/x-pack/plugins/alerting/server/config.ts +++ b/x-pack/plugins/alerting/server/config.ts @@ -8,6 +8,7 @@ import { schema, TypeOf } from '@kbn/config-schema'; import { validateDurationSchema } from './lib'; +export const DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT = 10; export const configSchema = schema.object({ healthCheck: schema.object({ interval: schema.string({ validate: validateDurationSchema, defaultValue: '60m' }), @@ -16,6 +17,9 @@ export const configSchema = schema.object({ interval: schema.string({ validate: validateDurationSchema, defaultValue: '5m' }), removalDelay: schema.string({ validate: validateDurationSchema, defaultValue: '1h' }), }), + maxEphemeralActionsPerAlert: schema.number({ + defaultValue: DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT, + }), }); export type AlertsConfig = TypeOf; diff --git a/x-pack/plugins/alerting/server/health/get_state.test.ts b/x-pack/plugins/alerting/server/health/get_state.test.ts index 24f3c101b26b..b58a12794188 100644 --- a/x-pack/plugins/alerting/server/health/get_state.test.ts +++ b/x-pack/plugins/alerting/server/health/get_state.test.ts @@ -71,6 +71,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }), pollInterval ).subscribe(); @@ -104,6 +105,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }), pollInterval, retryDelay @@ -148,6 +150,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }) ).toPromise(); @@ -178,6 +181,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }) ).toPromise(); @@ -208,6 +212,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }) ).toPromise(); @@ -235,6 +240,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }), retryDelay ).subscribe((status) => { @@ -265,6 +271,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }), retryDelay ).subscribe((status) => { @@ -301,6 +308,7 @@ describe('getHealthServiceStatusWithRetryAndErrorHandling', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }) ).toPromise(); diff --git a/x-pack/plugins/alerting/server/index.ts b/x-pack/plugins/alerting/server/index.ts index 957bd89f52f3..0faaca5d549f 100644 --- a/x-pack/plugins/alerting/server/index.ts +++ b/x-pack/plugins/alerting/server/index.ts @@ -29,6 +29,7 @@ export type { AlertInstanceContext, AlertingApiRequestHandlerContext, } from './types'; +export { DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT } from './config'; export { PluginSetupContract, PluginStartContract } from './plugin'; export { FindResult } from './alerts_client'; export { PublicAlertInstance as AlertInstance } from './alert_instance'; diff --git a/x-pack/plugins/alerting/server/plugin.test.ts b/x-pack/plugins/alerting/server/plugin.test.ts index 9adc3cc9d656..cfb30b6bd220 100644 --- a/x-pack/plugins/alerting/server/plugin.test.ts +++ b/x-pack/plugins/alerting/server/plugin.test.ts @@ -36,6 +36,7 @@ describe('Alerting Plugin', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 10, }); plugin = new AlertingPlugin(context); @@ -122,6 +123,7 @@ describe('Alerting Plugin', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 10, }); const plugin = new AlertingPlugin(context); @@ -161,6 +163,7 @@ describe('Alerting Plugin', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 10, }); const plugin = new AlertingPlugin(context); @@ -214,6 +217,7 @@ describe('Alerting Plugin', () => { interval: '5m', removalDelay: '1h', }, + maxEphemeralActionsPerAlert: 100, }); const plugin = new AlertingPlugin(context); diff --git a/x-pack/plugins/alerting/server/plugin.ts b/x-pack/plugins/alerting/server/plugin.ts index b906983017ff..8a90a918b4d0 100644 --- a/x-pack/plugins/alerting/server/plugin.ts +++ b/x-pack/plugins/alerting/server/plugin.ts @@ -376,6 +376,8 @@ export class AlertingPlugin { internalSavedObjectsRepository: core.savedObjects.createInternalRepository(['alert']), alertTypeRegistry: this.alertTypeRegistry!, kibanaBaseUrl: this.kibanaBaseUrl, + supportsEphemeralTasks: plugins.taskManager.supportsEphemeralTasks(), + maxEphemeralActionsPerAlert: this.config.then((config) => config.maxEphemeralActionsPerAlert), }); this.eventLogService!.registerSavedObjectProvider('alert', (request) => { diff --git a/x-pack/plugins/alerting/server/task_runner/create_execution_handler.test.ts b/x-pack/plugins/alerting/server/task_runner/create_execution_handler.test.ts index b264428b4d6f..e6618d293a39 100644 --- a/x-pack/plugins/alerting/server/task_runner/create_execution_handler.test.ts +++ b/x-pack/plugins/alerting/server/task_runner/create_execution_handler.test.ts @@ -96,6 +96,8 @@ const createExecutionHandlerParams: jest.Mocked< contextVal: 'My other {{context.value}} goes here', stateVal: 'My other {{state.value}} goes here', }, + supportsEphemeralTasks: false, + maxEphemeralActionsPerAlert: Promise.resolve(10), }; beforeEach(() => { diff --git a/x-pack/plugins/alerting/server/task_runner/create_execution_handler.ts b/x-pack/plugins/alerting/server/task_runner/create_execution_handler.ts index 3004ed599128..b1f5b44f529c 100644 --- a/x-pack/plugins/alerting/server/task_runner/create_execution_handler.ts +++ b/x-pack/plugins/alerting/server/task_runner/create_execution_handler.ts @@ -4,12 +4,11 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ - import { Logger, KibanaRequest } from '../../../../../src/core/server'; import { transformActionParams } from './transform_action_params'; import { - PluginStartContract as ActionsPluginStartContract, asSavedObjectExecutionSource, + PluginStartContract as ActionsPluginStartContract, } from '../../../actions/server'; import { IEventLogger, IEvent, SAVED_OBJECT_REL_PRIMARY } from '../../../event_log/server'; import { EVENT_LOG_ACTIONS } from '../plugin'; @@ -23,6 +22,7 @@ import { RawAlert, } from '../types'; import { NormalizedAlertType } from '../alert_type_registry'; +import { isEphemeralTaskRejectedDueToCapacityError } from '../../../task_manager/server'; export interface CreateExecutionHandlerOptions< Params extends AlertTypeParams, @@ -52,6 +52,8 @@ export interface CreateExecutionHandlerOptions< eventLogger: IEventLogger; request: KibanaRequest; alertParams: AlertTypeParams; + supportsEphemeralTasks: boolean; + maxEphemeralActionsPerAlert: Promise; } interface ExecutionHandlerOptions { @@ -87,6 +89,8 @@ export function createExecutionHandler< eventLogger, request, alertParams, + supportsEphemeralTasks, + maxEphemeralActionsPerAlert, }: CreateExecutionHandlerOptions< Params, State, @@ -147,6 +151,8 @@ export function createExecutionHandler< const alertLabel = `${alertType.id}:${alertId}: '${alertName}'`; + const actionsClient = await actionsPlugin.getActionsClientWithRequest(request); + let ephemeralActionsToSchedule = await maxEphemeralActionsPerAlert; for (const action of actions) { if ( !actionsPlugin.isActionExecutable(action.id, action.actionTypeId, { notifyUsage: true }) @@ -159,10 +165,7 @@ export function createExecutionHandler< const namespace = spaceId === 'default' ? {} : { namespace: spaceId }; - // TODO would be nice to add the action name here, but it's not available - const actionLabel = `${action.actionTypeId}:${action.id}`; - const actionsClient = await actionsPlugin.getActionsClientWithRequest(request); - await actionsClient.enqueueExecution({ + const enqueueOptions = { id: action.id, params: action.params, spaceId, @@ -179,7 +182,20 @@ export function createExecutionHandler< typeId: alertType.id, }, ], - }); + }; + + // TODO would be nice to add the action name here, but it's not available + const actionLabel = `${action.actionTypeId}:${action.id}`; + if (supportsEphemeralTasks && ephemeralActionsToSchedule > 0) { + ephemeralActionsToSchedule--; + actionsClient.ephemeralEnqueuedExecution(enqueueOptions).catch(async (err) => { + if (isEphemeralTaskRejectedDueToCapacityError(err)) { + await actionsClient.enqueueExecution(enqueueOptions); + } + }); + } else { + await actionsClient.enqueueExecution(enqueueOptions); + } const event: IEvent = { event: { diff --git a/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts b/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts index 4f650975f830..62ca000bc836 100644 --- a/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts +++ b/x-pack/plugins/alerting/server/task_runner/task_runner.test.ts @@ -17,6 +17,7 @@ import { import { ConcreteTaskInstance, isUnrecoverableError, + RunNowResult, TaskStatus, } from '../../../task_manager/server'; import { TaskRunnerContext } from './task_runner_factory'; @@ -37,6 +38,7 @@ import { Alert, RecoveredActionGroup } from '../../common'; import { omit } from 'lodash'; import { UntypedNormalizedAlertType } from '../alert_type_registry'; import { alertTypeRegistryMock } from '../alert_type_registry.mock'; +import { ExecuteOptions } from '../../../actions/server/create_execute_function'; const alertType: jest.Mocked = { id: 'test', @@ -84,10 +86,12 @@ describe('Task Runner', () => { const alertsClient = alertsClientMock.create(); const alertTypeRegistry = alertTypeRegistryMock.create(); - const taskRunnerFactoryInitializerParams: jest.Mocked & { + type TaskRunnerFactoryInitializerParamsType = jest.Mocked & { actionsPlugin: jest.Mocked; eventLogger: jest.Mocked; - } = { + }; + + const taskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType = { getServices: jest.fn().mockReturnValue(services), actionsPlugin: actionsMock.createStart(), getAlertsClientWithRequest: jest.fn().mockReturnValue(alertsClient), @@ -99,8 +103,30 @@ describe('Task Runner', () => { internalSavedObjectsRepository: savedObjectsRepositoryMock.create(), alertTypeRegistry, kibanaBaseUrl: 'https://localhost:5601', + supportsEphemeralTasks: false, + maxEphemeralActionsPerAlert: new Promise((resolve) => resolve(10)), }; + function testAgainstEphemeralSupport( + name: string, + fn: ( + params: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => jest.ProvidesCallback + ) { + test(name, fn(taskRunnerFactoryInitializerParams, actionsClient.enqueueExecution)); + test( + `${name} (with ephemeral support)`, + fn( + { + ...taskRunnerFactoryInitializerParams, + supportsEphemeralTasks: true, + }, + actionsClient.ephemeralEnqueuedExecution + ) + ); + } + const mockDate = new Date('2019-02-12T21:01:22.479Z'); const mockedAlertTypeSavedObject: Alert = { @@ -314,41 +340,51 @@ describe('Task Runner', () => { ); }); - test('actionsPlugin.execute is called per alert instance that is scheduled', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices - .alertInstanceFactory('1') - .scheduleActionsWithSubGroup('default', 'subDefault'); - } - ); - const taskRunner = new TaskRunner( - alertType, - mockedTaskInstance, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(1); - expect(actionsClient.enqueueExecution.mock.calls[0]).toMatchInlineSnapshot(` + testAgainstEphemeralSupport( + 'actionsPlugin.execute is called per alert instance that is scheduled', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices + .alertInstanceFactory('1') + .scheduleActionsWithSubGroup('default', 'subDefault'); + } + ); + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + await taskRunner.run(); + expect(enqueueFunction).toHaveBeenCalledTimes(1); + expect((enqueueFunction as jest.Mock).mock.calls[0]).toMatchInlineSnapshot(` Array [ Object { "apiKey": "MTIzOmFiYw==", @@ -376,179 +412,181 @@ describe('Task Runner', () => { ] `); - const logger = taskRunnerFactoryInitializerParams.logger; - expect(logger.debug).toHaveBeenCalledTimes(3); - expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); - expect(logger.debug).nthCalledWith( - 2, - `alert test:1: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` - ); - expect(logger.debug).nthCalledWith( - 3, - 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' - ); + const logger = customTaskRunnerFactoryInitializerParams.logger; + expect(logger.debug).toHaveBeenCalledTimes(3); + expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); + expect(logger.debug).nthCalledWith( + 2, + `alert test:1: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` + ); + expect(logger.debug).nthCalledWith( + 3, + 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' + ); + // alertExecutionStatus for test:1: {\"lastExecutionDate\":\"1970-01-01T00:00:00.000Z\",\"status\":\"error\",\"error\":{\"reason\":\"unknown\",\"message\":\"Cannot read property 'catch' of undefined\"}} - const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; - expect(eventLogger.logEvent).toHaveBeenCalledTimes(5); - expect(eventLogger.logEvent).toHaveBeenNthCalledWith(1, { - '@timestamp': '1970-01-01T00:00:00.000Z', - event: { - action: 'execute-start', - category: ['alerts'], - kind: 'alert', - }, - kibana: { - task: { - schedule_delay: 0, - scheduled: '1970-01-01T00:00:00.000Z', + const eventLogger = customTaskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(5); + expect(eventLogger.logEvent).toHaveBeenNthCalledWith(1, { + '@timestamp': '1970-01-01T00:00:00.000Z', + event: { + action: 'execute-start', + category: ['alerts'], + kind: 'alert', }, - saved_objects: [ - { - id: '1', - namespace: undefined, - rel: 'primary', - type: 'alert', - type_id: 'test', + kibana: { + task: { + schedule_delay: 0, + scheduled: '1970-01-01T00:00:00.000Z', }, - ], - }, - message: `alert execution start: "1"`, - rule: { - category: 'test', - id: '1', - license: 'basic', - ruleset: 'alerts', - }, - }); - expect(eventLogger.logEvent).toHaveBeenNthCalledWith(2, { - event: { - action: 'new-instance', - category: ['alerts'], - kind: 'alert', - duration: 0, - start: '1970-01-01T00:00:00.000Z', - }, - kibana: { - alerting: { - action_group_id: 'default', - action_subgroup: 'subDefault', - instance_id: '1', + saved_objects: [ + { + id: '1', + namespace: undefined, + rel: 'primary', + type: 'alert', + type_id: 'test', + }, + ], }, - saved_objects: [ - { - id: '1', - namespace: undefined, - rel: 'primary', - type: 'alert', - type_id: 'test', - }, - ], - }, - message: "test:1: 'alert-name' created new instance: '1'", - rule: { - category: 'test', - id: '1', - license: 'basic', - name: 'alert-name', - namespace: undefined, - ruleset: 'alerts', - }, - }); - expect(eventLogger.logEvent).toHaveBeenNthCalledWith(3, { - event: { - action: 'active-instance', - category: ['alerts'], - duration: 0, - kind: 'alert', - start: '1970-01-01T00:00:00.000Z', - }, - kibana: { - alerting: { action_group_id: 'default', action_subgroup: 'subDefault', instance_id: '1' }, - saved_objects: [ - { id: '1', namespace: undefined, rel: 'primary', type: 'alert', type_id: 'test' }, - ], - }, - message: - "test:1: 'alert-name' active instance: '1' in actionGroup(subgroup): 'default(subDefault)'", - rule: { - category: 'test', - id: '1', - license: 'basic', - name: 'alert-name', - namespace: undefined, - ruleset: 'alerts', - }, - }); - expect(eventLogger.logEvent).toHaveBeenNthCalledWith(4, { - event: { - action: 'execute-action', - category: ['alerts'], - kind: 'alert', - }, - kibana: { - alerting: { - instance_id: '1', - action_group_id: 'default', - action_subgroup: 'subDefault', + message: `alert execution start: "1"`, + rule: { + category: 'test', + id: '1', + license: 'basic', + ruleset: 'alerts', }, - saved_objects: [ - { - id: '1', - namespace: undefined, - rel: 'primary', - type: 'alert', - type_id: 'test', - }, - { - id: '1', - namespace: undefined, - type: 'action', - type_id: 'action', - }, - ], - }, - message: - "alert: test:1: 'alert-name' instanceId: '1' scheduled actionGroup(subgroup): 'default(subDefault)' action: action:1", - rule: { - category: 'test', - id: '1', - license: 'basic', - name: 'alert-name', - namespace: undefined, - ruleset: 'alerts', - }, - }); - expect(eventLogger.logEvent).toHaveBeenNthCalledWith(5, { - '@timestamp': '1970-01-01T00:00:00.000Z', - event: { action: 'execute', category: ['alerts'], kind: 'alert', outcome: 'success' }, - kibana: { - alerting: { - status: 'active', + }); + expect(eventLogger.logEvent).toHaveBeenNthCalledWith(2, { + event: { + action: 'new-instance', + category: ['alerts'], + kind: 'alert', + duration: 0, + start: '1970-01-01T00:00:00.000Z', }, - task: { - schedule_delay: 0, - scheduled: '1970-01-01T00:00:00.000Z', - }, - saved_objects: [ - { - id: '1', - namespace: undefined, - rel: 'primary', - type: 'alert', - type_id: 'test', + kibana: { + alerting: { + action_group_id: 'default', + action_subgroup: 'subDefault', + instance_id: '1', }, - ], - }, - message: "alert executed: test:1: 'alert-name'", - rule: { - category: 'test', - id: '1', - license: 'basic', - name: 'alert-name', - ruleset: 'alerts', - }, - }); - }); + saved_objects: [ + { + id: '1', + namespace: undefined, + rel: 'primary', + type: 'alert', + type_id: 'test', + }, + ], + }, + message: "test:1: 'alert-name' created new instance: '1'", + rule: { + category: 'test', + id: '1', + license: 'basic', + name: 'alert-name', + namespace: undefined, + ruleset: 'alerts', + }, + }); + expect(eventLogger.logEvent).toHaveBeenNthCalledWith(3, { + event: { + action: 'active-instance', + category: ['alerts'], + duration: 0, + kind: 'alert', + start: '1970-01-01T00:00:00.000Z', + }, + kibana: { + alerting: { action_group_id: 'default', action_subgroup: 'subDefault', instance_id: '1' }, + saved_objects: [ + { id: '1', namespace: undefined, rel: 'primary', type: 'alert', type_id: 'test' }, + ], + }, + message: + "test:1: 'alert-name' active instance: '1' in actionGroup(subgroup): 'default(subDefault)'", + rule: { + category: 'test', + id: '1', + license: 'basic', + name: 'alert-name', + namespace: undefined, + ruleset: 'alerts', + }, + }); + expect(eventLogger.logEvent).toHaveBeenNthCalledWith(4, { + event: { + action: 'execute-action', + category: ['alerts'], + kind: 'alert', + }, + kibana: { + alerting: { + instance_id: '1', + action_group_id: 'default', + action_subgroup: 'subDefault', + }, + saved_objects: [ + { + id: '1', + namespace: undefined, + rel: 'primary', + type: 'alert', + type_id: 'test', + }, + { + id: '1', + namespace: undefined, + type: 'action', + type_id: 'action', + }, + ], + }, + message: + "alert: test:1: 'alert-name' instanceId: '1' scheduled actionGroup(subgroup): 'default(subDefault)' action: action:1", + rule: { + category: 'test', + id: '1', + license: 'basic', + name: 'alert-name', + namespace: undefined, + ruleset: 'alerts', + }, + }); + expect(eventLogger.logEvent).toHaveBeenNthCalledWith(5, { + '@timestamp': '1970-01-01T00:00:00.000Z', + event: { action: 'execute', category: ['alerts'], kind: 'alert', outcome: 'success' }, + kibana: { + alerting: { + status: 'active', + }, + task: { + schedule_delay: 0, + scheduled: '1970-01-01T00:00:00.000Z', + }, + saved_objects: [ + { + id: '1', + namespace: undefined, + rel: 'primary', + type: 'alert', + type_id: 'test', + }, + ], + }, + message: "alert executed: test:1: 'alert-name'", + rule: { + category: 'test', + id: '1', + license: 'basic', + name: 'alert-name', + ruleset: 'alerts', + }, + }); + } + ); test('actionsPlugin.execute is skipped if muteAll is true', async () => { taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); @@ -584,7 +622,7 @@ describe('Task Runner', () => { references: [], }); await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(0); + expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0); const logger = taskRunnerFactoryInitializerParams.logger; expect(logger.debug).toHaveBeenCalledTimes(4); @@ -738,59 +776,70 @@ describe('Task Runner', () => { }); }); - test('skips firing actions for active instance if instance is muted', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); - executorServices.alertInstanceFactory('2').scheduleActions('default'); - } - ); - const taskRunner = new TaskRunner( - alertType, - mockedTaskInstance, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue({ - ...mockedAlertTypeSavedObject, - mutedInstanceIds: ['2'], - }); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(1); + testAgainstEphemeralSupport( + 'skips firing actions for active instance if instance is muted', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); + executorServices.alertInstanceFactory('2').scheduleActions('default'); + } + ); + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue({ + ...mockedAlertTypeSavedObject, + mutedInstanceIds: ['2'], + }); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + await taskRunner.run(); + expect(enqueueFunction).toHaveBeenCalledTimes(1); - const logger = taskRunnerFactoryInitializerParams.logger; - expect(logger.debug).toHaveBeenCalledTimes(4); - expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); - expect(logger.debug).nthCalledWith( - 2, - `alert test:1: 'alert-name' has 2 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"},{\"instanceId\":\"2\",\"actionGroup\":\"default\"}]` - ); - expect(logger.debug).nthCalledWith( - 3, - `skipping scheduling of actions for '2' in alert test:1: 'alert-name': instance is muted` - ); - expect(logger.debug).nthCalledWith( - 4, - 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' - ); - }); + const logger = customTaskRunnerFactoryInitializerParams.logger; + expect(logger.debug).toHaveBeenCalledTimes(4); + expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); + expect(logger.debug).nthCalledWith( + 2, + `alert test:1: 'alert-name' has 2 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"},{\"instanceId\":\"2\",\"actionGroup\":\"default\"}]` + ); + expect(logger.debug).nthCalledWith( + 3, + `skipping scheduling of actions for '2' in alert test:1: 'alert-name': instance is muted` + ); + expect(logger.debug).nthCalledWith( + 4, + 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' + ); + } + ); test('actionsPlugin.execute is not called when notifyWhen=onActionGroupChange and alert instance state does not change', async () => { taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); @@ -843,7 +892,7 @@ describe('Task Runner', () => { references: [], }); await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(0); + expect(actionsClient.ephemeralEnqueuedExecution).toHaveBeenCalledTimes(0); const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; expect(eventLogger.logEvent).toHaveBeenCalledTimes(3); @@ -963,163 +1012,196 @@ describe('Task Runner', () => { `); }); - test('actionsPlugin.execute is called when notifyWhen=onActionGroupChange and alert instance state has changed', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); - } - ); - const taskRunner = new TaskRunner( - alertType, - { - ...mockedTaskInstance, - state: { - ...mockedTaskInstance.state, - alertInstances: { - '1': { - meta: { lastScheduledActions: { group: 'newGroup', date: new Date().toISOString() } }, - state: { bar: false }, - }, - }, - }, - }, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue({ - ...mockedAlertTypeSavedObject, - notifyWhen: 'onActionGroupChange', - }); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(1); - }); - - test('actionsPlugin.execute is called when notifyWhen=onActionGroupChange and alert instance state subgroup has changed', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices - .alertInstanceFactory('1') - .scheduleActionsWithSubGroup('default', 'subgroup1'); - } - ); - const taskRunner = new TaskRunner( - alertType, - { - ...mockedTaskInstance, - state: { - ...mockedTaskInstance.state, - alertInstances: { - '1': { - meta: { - lastScheduledActions: { - group: 'default', - subgroup: 'newSubgroup', - date: new Date().toISOString(), + testAgainstEphemeralSupport( + 'actionsPlugin.execute is called when notifyWhen=onActionGroupChange and alert instance state has changed', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); + } + ); + const taskRunner = new TaskRunner( + alertType, + { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + alertInstances: { + '1': { + meta: { + lastScheduledActions: { group: 'newGroup', date: new Date().toISOString() }, }, + state: { bar: false }, }, - state: { bar: false }, }, }, }, - }, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue({ - ...mockedAlertTypeSavedObject, - notifyWhen: 'onActionGroupChange', - }); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - await taskRunner.run(); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(1); - }); - - test('includes the apiKey in the request used to initialize the actionsClient', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); - } - ); - const taskRunner = new TaskRunner( - alertType, - mockedTaskInstance, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - await taskRunner.run(); - expect( - taskRunnerFactoryInitializerParams.actionsPlugin.getActionsClientWithRequest - ).toHaveBeenCalledWith( - expect.objectContaining({ - headers: { - // base64 encoded "123:abc" - authorization: 'ApiKey MTIzOmFiYw==', + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue({ + ...mockedAlertTypeSavedObject, + notifyWhen: 'onActionGroupChange', + }); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), }, - }) - ); + references: [], + }); + await taskRunner.run(); + expect(enqueueFunction).toHaveBeenCalledTimes(1); + } + ); - const [ - request, - ] = taskRunnerFactoryInitializerParams.actionsPlugin.getActionsClientWithRequest.mock.calls[0]; + testAgainstEphemeralSupport( + 'actionsPlugin.execute is called when notifyWhen=onActionGroupChange and alert instance state subgroup has changed', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); - expect(taskRunnerFactoryInitializerParams.basePathService.set).toHaveBeenCalledWith( - request, - '/' - ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices + .alertInstanceFactory('1') + .scheduleActionsWithSubGroup('default', 'subgroup1'); + } + ); + const taskRunner = new TaskRunner( + alertType, + { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + alertInstances: { + '1': { + meta: { + lastScheduledActions: { + group: 'default', + subgroup: 'newSubgroup', + date: new Date().toISOString(), + }, + }, + state: { bar: false }, + }, + }, + }, + }, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue({ + ...mockedAlertTypeSavedObject, + notifyWhen: 'onActionGroupChange', + }); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + await taskRunner.run(); + expect(enqueueFunction).toHaveBeenCalledTimes(1); + } + ); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(1); - expect(actionsClient.enqueueExecution.mock.calls[0]).toMatchInlineSnapshot(` + testAgainstEphemeralSupport( + 'includes the apiKey in the request used to initialize the actionsClient', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); + } + ); + const taskRunner = new TaskRunner( + alertType, + mockedTaskInstance, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValueOnce({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + await taskRunner.run(); + expect( + customTaskRunnerFactoryInitializerParams.actionsPlugin.getActionsClientWithRequest + ).toHaveBeenCalledWith( + expect.objectContaining({ + headers: { + // base64 encoded "123:abc" + authorization: 'ApiKey MTIzOmFiYw==', + }, + }) + ); + + const [ + request, + ] = customTaskRunnerFactoryInitializerParams.actionsPlugin.getActionsClientWithRequest.mock.calls[0]; + + expect(customTaskRunnerFactoryInitializerParams.basePathService.set).toHaveBeenCalledWith( + request, + '/' + ); + + expect(enqueueFunction).toHaveBeenCalledTimes(1); + expect((enqueueFunction as jest.Mock).mock.calls[0]).toMatchInlineSnapshot(` Array [ Object { "apiKey": "MTIzOmFiYw==", @@ -1147,10 +1229,10 @@ describe('Task Runner', () => { ] `); - const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; - expect(eventLogger.logEvent).toHaveBeenCalledTimes(5); - expect(eventLogger.startTiming).toHaveBeenCalledTimes(1); - expect(eventLogger.logEvent.mock.calls).toMatchInlineSnapshot(` + const eventLogger = customTaskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(5); + expect(eventLogger.startTiming).toHaveBeenCalledTimes(1); + expect(eventLogger.logEvent.mock.calls).toMatchInlineSnapshot(` Array [ Array [ Object { @@ -1340,64 +1422,75 @@ describe('Task Runner', () => { ], ] `); - }); + } + ); - test('fire recovered actions for execution for the alertInstances which is in the recovered state', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); + testAgainstEphemeralSupport( + 'fire recovered actions for execution for the alertInstances which is in the recovered state', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); - } - ); - const taskRunner = new TaskRunner( - alertType, - { - ...mockedTaskInstance, - state: { - ...mockedTaskInstance.state, - alertInstances: { - '1': { - meta: {}, - state: { - bar: false, - start: '1969-12-31T00:00:00.000Z', - duration: 80000000000, + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); + } + ); + const taskRunner = new TaskRunner( + alertType, + { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + alertInstances: { + '1': { + meta: {}, + state: { + bar: false, + start: '1969-12-31T00:00:00.000Z', + duration: 80000000000, + }, }, - }, - '2': { - meta: {}, - state: { - bar: false, - start: '1969-12-31T06:00:00.000Z', - duration: 70000000000, + '2': { + meta: {}, + state: { + bar: false, + start: '1969-12-31T06:00:00.000Z', + duration: 70000000000, + }, }, }, }, }, - }, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - const runnerResult = await taskRunner.run(); - expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + const runnerResult = await taskRunner.run(); + expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` Object { "1": Object { "meta": Object { @@ -1416,26 +1509,26 @@ describe('Task Runner', () => { } `); - const logger = taskRunnerFactoryInitializerParams.logger; - expect(logger.debug).toHaveBeenCalledTimes(4); - expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); - expect(logger.debug).nthCalledWith( - 2, - `alert test:1: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` - ); - expect(logger.debug).nthCalledWith( - 3, - `alert test:1: 'alert-name' has 1 recovered alert instances: [\"2\"]` - ); - expect(logger.debug).nthCalledWith( - 4, - 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' - ); + const logger = customTaskRunnerFactoryInitializerParams.logger; + expect(logger.debug).toHaveBeenCalledTimes(4); + expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); + expect(logger.debug).nthCalledWith( + 2, + `alert test:1: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` + ); + expect(logger.debug).nthCalledWith( + 3, + `alert test:1: 'alert-name' has 1 recovered alert instances: [\"2\"]` + ); + expect(logger.debug).nthCalledWith( + 4, + 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}' + ); - const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; - expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); - expect(eventLogger.startTiming).toHaveBeenCalledTimes(1); - expect(eventLogger.logEvent.mock.calls).toMatchInlineSnapshot(` + const eventLogger = customTaskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); + expect(eventLogger.startTiming).toHaveBeenCalledTimes(1); + expect(eventLogger.logEvent.mock.calls).toMatchInlineSnapshot(` Array [ Array [ Object { @@ -1667,8 +1760,8 @@ describe('Task Runner', () => { ] `); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(2); - expect(actionsClient.enqueueExecution.mock.calls[0]).toMatchInlineSnapshot(` + expect(enqueueFunction).toHaveBeenCalledTimes(2); + expect((enqueueFunction as jest.Mock).mock.calls[0]).toMatchInlineSnapshot(` Array [ Object { "apiKey": "MTIzOmFiYw==", @@ -1695,57 +1788,68 @@ describe('Task Runner', () => { }, ] `); - }); + } + ); - test('should skip alertInstances which werent active on the previous execution', async () => { - const alertId = 'e558aaad-fd81-46d2-96fc-3bd8fc3dc03f'; - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); + testAgainstEphemeralSupport( + 'should skip alertInstances which werent active on the previous execution', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + const alertId = 'e558aaad-fd81-46d2-96fc-3bd8fc3dc03f'; + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); - alertType.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); + alertType.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); - // create an instance, but don't schedule any actions, so it doesn't go active - executorServices.alertInstanceFactory('3'); - } - ); - const taskRunner = new TaskRunner( - alertType, - { - ...mockedTaskInstance, - state: { - ...mockedTaskInstance.state, - alertInstances: { - '1': { meta: {}, state: { bar: false } }, - '2': { meta: {}, state: { bar: false } }, + // create an instance, but don't schedule any actions, so it doesn't go active + executorServices.alertInstanceFactory('3'); + } + ); + const taskRunner = new TaskRunner( + alertType, + { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + alertInstances: { + '1': { meta: {}, state: { bar: false } }, + '2': { meta: {}, state: { bar: false } }, + }, + }, + params: { + alertId, }, }, - params: { - alertId, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: alertId, + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), }, - }, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: alertId, - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - const runnerResult = await taskRunner.run(); - expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` + references: [], + }); + const runnerResult = await taskRunner.run(); + expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` Object { "1": Object { "meta": Object { @@ -1762,93 +1866,111 @@ describe('Task Runner', () => { } `); - const logger = taskRunnerFactoryInitializerParams.logger; - expect(logger.debug).toHaveBeenCalledWith( - `alert test:${alertId}: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` - ); - expect(logger.debug).toHaveBeenCalledWith( - `alert test:${alertId}: 'alert-name' has 1 recovered alert instances: [\"2\"]` - ); + const logger = customTaskRunnerFactoryInitializerParams.logger; + expect(logger.debug).toHaveBeenCalledWith( + `alert test:${alertId}: 'alert-name' has 1 active alert instances: [{\"instanceId\":\"1\",\"actionGroup\":\"default\"}]` + ); - const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; - expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(2); - expect(actionsClient.enqueueExecution.mock.calls[1][0].id).toEqual('1'); - expect(actionsClient.enqueueExecution.mock.calls[0][0].id).toEqual('2'); - }); + expect(logger.debug).nthCalledWith( + 3, + `alert test:${alertId}: 'alert-name' has 1 recovered alert instances: [\"2\"]` + ); + expect(logger.debug).nthCalledWith( + 4, + `alertExecutionStatus for test:${alertId}: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"active"}` + ); - test('fire actions under a custom recovery group when specified on an alert type for alertInstances which are in the recovered state', async () => { - taskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue(true); - taskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue(true); + const eventLogger = customTaskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); + expect(enqueueFunction).toHaveBeenCalledTimes(2); + expect((enqueueFunction as jest.Mock).mock.calls[1][0].id).toEqual('1'); + expect((enqueueFunction as jest.Mock).mock.calls[0][0].id).toEqual('2'); + } + ); - const recoveryActionGroup = { - id: 'customRecovered', - name: 'Custom Recovered', - }; - const alertTypeWithCustomRecovery = { - ...alertType, - recoveryActionGroup, - actionGroups: [{ id: 'default', name: 'Default' }, recoveryActionGroup], - }; + testAgainstEphemeralSupport( + 'fire actions under a custom recovery group when specified on an alert type for alertInstances which are in the recovered state', + ( + customTaskRunnerFactoryInitializerParams: TaskRunnerFactoryInitializerParamsType, + enqueueFunction: (options: ExecuteOptions) => Promise + ) => async () => { + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionTypeEnabled.mockReturnValue( + true + ); + customTaskRunnerFactoryInitializerParams.actionsPlugin.isActionExecutable.mockReturnValue( + true + ); - alertTypeWithCustomRecovery.executor.mockImplementation( - async ({ - services: executorServices, - }: AlertExecutorOptions< - AlertTypeParams, - AlertTypeState, - AlertInstanceState, - AlertInstanceContext, - string - >) => { - executorServices.alertInstanceFactory('1').scheduleActions('default'); - } - ); - const taskRunner = new TaskRunner( - alertTypeWithCustomRecovery, - { - ...mockedTaskInstance, - state: { - ...mockedTaskInstance.state, - alertInstances: { - '1': { meta: {}, state: { bar: false } }, - '2': { meta: {}, state: { bar: false } }, - }, - }, - }, - taskRunnerFactoryInitializerParams - ); - alertsClient.get.mockResolvedValue({ - ...mockedAlertTypeSavedObject, - actions: [ + actionsClient.ephemeralEnqueuedExecution.mockResolvedValue(new Promise(() => {})); + + const recoveryActionGroup = { + id: 'customRecovered', + name: 'Custom Recovered', + }; + const alertTypeWithCustomRecovery = { + ...alertType, + recoveryActionGroup, + actionGroups: [{ id: 'default', name: 'Default' }, recoveryActionGroup], + }; + + alertTypeWithCustomRecovery.executor.mockImplementation( + async ({ + services: executorServices, + }: AlertExecutorOptions< + AlertTypeParams, + AlertTypeState, + AlertInstanceState, + AlertInstanceContext, + string + >) => { + executorServices.alertInstanceFactory('1').scheduleActions('default'); + } + ); + const taskRunner = new TaskRunner( + alertTypeWithCustomRecovery, { - group: 'default', - id: '1', - actionTypeId: 'action', - params: { - foo: true, + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + alertInstances: { + '1': { meta: {}, state: { bar: false } }, + '2': { meta: {}, state: { bar: false } }, + }, }, }, - { - group: recoveryActionGroup.id, - id: '2', - actionTypeId: 'action', - params: { - isResolved: true, + customTaskRunnerFactoryInitializerParams + ); + alertsClient.get.mockResolvedValue({ + ...mockedAlertTypeSavedObject, + actions: [ + { + group: 'default', + id: '1', + actionTypeId: 'action', + params: { + foo: true, + }, }, + { + group: recoveryActionGroup.id, + id: '2', + actionTypeId: 'action', + params: { + isResolved: true, + }, + }, + ], + }); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), }, - ], - }); - encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ - id: '1', - type: 'alert', - attributes: { - apiKey: Buffer.from('123:abc').toString('base64'), - }, - references: [], - }); - const runnerResult = await taskRunner.run(); - expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` + references: [], + }); + const runnerResult = await taskRunner.run(); + expect(runnerResult.state.alertInstances).toMatchInlineSnapshot(` Object { "1": Object { "meta": Object { @@ -1865,10 +1987,10 @@ describe('Task Runner', () => { } `); - const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; - expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); - expect(actionsClient.enqueueExecution).toHaveBeenCalledTimes(2); - expect(actionsClient.enqueueExecution.mock.calls[0]).toMatchInlineSnapshot(` + const eventLogger = customTaskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(6); + expect(enqueueFunction).toHaveBeenCalledTimes(2); + expect((enqueueFunction as jest.Mock).mock.calls[0]).toMatchInlineSnapshot(` Array [ Object { "apiKey": "MTIzOmFiYw==", @@ -1895,7 +2017,8 @@ describe('Task Runner', () => { }, ] `); - }); + } + ); test('persists alertInstances passed in from state, only if they are scheduled for execution', async () => { alertType.executor.mockImplementation( @@ -4081,4 +4204,160 @@ describe('Task Runner', () => { ] `); }); + + test('successfully executes the task with ephemeral tasks enabled', async () => { + const taskRunner = new TaskRunner( + alertType, + { + ...mockedTaskInstance, + state: { + ...mockedTaskInstance.state, + previousStartedAt: new Date(Date.now() - 5 * 60 * 1000).toISOString(), + }, + }, + { + ...taskRunnerFactoryInitializerParams, + supportsEphemeralTasks: true, + } + ); + alertsClient.get.mockResolvedValue(mockedAlertTypeSavedObject); + encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue({ + id: '1', + type: 'alert', + attributes: { + apiKey: Buffer.from('123:abc').toString('base64'), + }, + references: [], + }); + const runnerResult = await taskRunner.run(); + expect(runnerResult).toMatchInlineSnapshot(` + Object { + "schedule": Object { + "interval": "10s", + }, + "state": Object { + "alertInstances": Object {}, + "alertTypeState": undefined, + "previousStartedAt": 1970-01-01T00:00:00.000Z, + }, + } + `); + expect(alertType.executor).toHaveBeenCalledTimes(1); + const call = alertType.executor.mock.calls[0][0]; + expect(call.params).toMatchInlineSnapshot(` + Object { + "bar": true, + } + `); + expect(call.startedAt).toMatchInlineSnapshot(`1970-01-01T00:00:00.000Z`); + expect(call.previousStartedAt).toMatchInlineSnapshot(`1969-12-31T23:55:00.000Z`); + expect(call.state).toMatchInlineSnapshot(`Object {}`); + expect(call.name).toBe('alert-name'); + expect(call.tags).toEqual(['alert-', '-tags']); + expect(call.createdBy).toBe('alert-creator'); + expect(call.updatedBy).toBe('alert-updater'); + expect(call.rule).not.toBe(null); + expect(call.rule.name).toBe('alert-name'); + expect(call.rule.tags).toEqual(['alert-', '-tags']); + expect(call.rule.consumer).toBe('bar'); + expect(call.rule.enabled).toBe(true); + expect(call.rule.schedule).toMatchInlineSnapshot(` + Object { + "interval": "10s", + } + `); + expect(call.rule.createdBy).toBe('alert-creator'); + expect(call.rule.updatedBy).toBe('alert-updater'); + expect(call.rule.createdAt).toBe(mockDate); + expect(call.rule.updatedAt).toBe(mockDate); + expect(call.rule.notifyWhen).toBe('onActiveAlert'); + expect(call.rule.throttle).toBe(null); + expect(call.rule.producer).toBe('alerts'); + expect(call.rule.ruleTypeId).toBe('test'); + expect(call.rule.ruleTypeName).toBe('My test alert'); + expect(call.rule.actions).toMatchInlineSnapshot(` + Array [ + Object { + "actionTypeId": "action", + "group": "default", + "id": "1", + "params": Object { + "foo": true, + }, + }, + Object { + "actionTypeId": "action", + "group": "recovered", + "id": "2", + "params": Object { + "isResolved": true, + }, + }, + ] + `); + expect(call.services.alertInstanceFactory).toBeTruthy(); + expect(call.services.scopedClusterClient).toBeTruthy(); + expect(call.services).toBeTruthy(); + + const logger = taskRunnerFactoryInitializerParams.logger; + expect(logger.debug).toHaveBeenCalledTimes(2); + expect(logger.debug).nthCalledWith(1, 'executing alert test:1 at 1970-01-01T00:00:00.000Z'); + expect(logger.debug).nthCalledWith( + 2, + 'alertExecutionStatus for test:1: {"lastExecutionDate":"1970-01-01T00:00:00.000Z","status":"ok"}' + ); + + const eventLogger = taskRunnerFactoryInitializerParams.eventLogger; + expect(eventLogger.logEvent).toHaveBeenCalledTimes(2); + expect(eventLogger.startTiming).toHaveBeenCalledTimes(1); + expect(eventLogger.logEvent.mock.calls[0][0]).toMatchInlineSnapshot(` + Object { + "@timestamp": "1970-01-01T00:00:00.000Z", + "event": Object { + "action": "execute-start", + "category": Array [ + "alerts", + ], + "kind": "alert", + }, + "kibana": Object { + "saved_objects": Array [ + Object { + "id": "1", + "namespace": undefined, + "rel": "primary", + "type": "alert", + "type_id": "test", + }, + ], + "task": Object { + "schedule_delay": 0, + "scheduled": "1970-01-01T00:00:00.000Z", + }, + }, + "message": "alert execution start: \\"1\\"", + "rule": Object { + "category": "test", + "id": "1", + "license": "basic", + "ruleset": "alerts", + }, + } + `); + + expect( + taskRunnerFactoryInitializerParams.internalSavedObjectsRepository.update + ).toHaveBeenCalledWith( + 'alert', + '1', + { + executionStatus: { + error: null, + lastExecutionDate: '1970-01-01T00:00:00.000Z', + status: 'ok', + }, + }, + { refresh: false, namespace: undefined } + ); + }); }); diff --git a/x-pack/plugins/alerting/server/task_runner/task_runner.ts b/x-pack/plugins/alerting/server/task_runner/task_runner.ts index c66c054bc8ac..ff5393ccd9fb 100644 --- a/x-pack/plugins/alerting/server/task_runner/task_runner.ts +++ b/x-pack/plugins/alerting/server/task_runner/task_runner.ts @@ -190,6 +190,8 @@ export class TaskRunner< eventLogger: this.context.eventLogger, request: this.getFakeKibanaRequest(spaceId, apiKey), alertParams, + supportsEphemeralTasks: this.context.supportsEphemeralTasks, + maxEphemeralActionsPerAlert: this.context.maxEphemeralActionsPerAlert, }); } diff --git a/x-pack/plugins/alerting/server/task_runner/task_runner_factory.test.ts b/x-pack/plugins/alerting/server/task_runner/task_runner_factory.test.ts index 050345f3e617..a284fc25c625 100644 --- a/x-pack/plugins/alerting/server/task_runner/task_runner_factory.test.ts +++ b/x-pack/plugins/alerting/server/task_runner/task_runner_factory.test.ts @@ -79,6 +79,8 @@ describe('Task Runner Factory', () => { internalSavedObjectsRepository: savedObjectsRepositoryMock.create(), alertTypeRegistry: alertTypeRegistryMock.create(), kibanaBaseUrl: 'https://localhost:5601', + supportsEphemeralTasks: true, + maxEphemeralActionsPerAlert: new Promise((resolve) => resolve(10)), }; beforeEach(() => { diff --git a/x-pack/plugins/alerting/server/task_runner/task_runner_factory.ts b/x-pack/plugins/alerting/server/task_runner/task_runner_factory.ts index a023776134e9..5a6b28dc059f 100644 --- a/x-pack/plugins/alerting/server/task_runner/task_runner_factory.ts +++ b/x-pack/plugins/alerting/server/task_runner/task_runner_factory.ts @@ -41,6 +41,8 @@ export interface TaskRunnerContext { internalSavedObjectsRepository: ISavedObjectsRepository; alertTypeRegistry: AlertTypeRegistry; kibanaBaseUrl: string | undefined; + supportsEphemeralTasks: boolean; + maxEphemeralActionsPerAlert: Promise; } export class TaskRunnerFactory { diff --git a/x-pack/plugins/task_manager/kibana.json b/x-pack/plugins/task_manager/kibana.json index ad2d5d00ae0b..aab1cd0ab41a 100644 --- a/x-pack/plugins/task_manager/kibana.json +++ b/x-pack/plugins/task_manager/kibana.json @@ -4,5 +4,6 @@ "version": "8.0.0", "kibanaVersion": "kibana", "configPath": ["xpack", "task_manager"], + "optionalPlugins": ["usageCollection"], "ui": false } diff --git a/x-pack/plugins/task_manager/server/config.test.ts b/x-pack/plugins/task_manager/server/config.test.ts index 5e44181f35b2..14d95e3fd222 100644 --- a/x-pack/plugins/task_manager/server/config.test.ts +++ b/x-pack/plugins/task_manager/server/config.test.ts @@ -13,6 +13,10 @@ describe('config validation', () => { expect(configSchema.validate(config)).toMatchInlineSnapshot(` Object { "enabled": true, + "ephemeral_tasks": Object { + "enabled": false, + "request_capacity": 10, + }, "index": ".kibana_task_manager", "max_attempts": 3, "max_poll_inactivity_cycles": 10, @@ -65,6 +69,10 @@ describe('config validation', () => { expect(configSchema.validate(config)).toMatchInlineSnapshot(` Object { "enabled": true, + "ephemeral_tasks": Object { + "enabled": false, + "request_capacity": 10, + }, "index": ".kibana_task_manager", "max_attempts": 3, "max_poll_inactivity_cycles": 10, @@ -104,6 +112,10 @@ describe('config validation', () => { expect(configSchema.validate(config)).toMatchInlineSnapshot(` Object { "enabled": true, + "ephemeral_tasks": Object { + "enabled": false, + "request_capacity": 10, + }, "index": ".kibana_task_manager", "max_attempts": 3, "max_poll_inactivity_cycles": 10, diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index 03bb98170a34..9b4f4856bf8a 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -12,6 +12,7 @@ export const DEFAULT_MAX_WORKERS = 10; export const DEFAULT_POLL_INTERVAL = 3000; export const DEFAULT_MAX_POLL_INACTIVITY_CYCLES = 10; export const DEFAULT_VERSION_CONFLICT_THRESHOLD = 80; +export const DEFAULT_MAX_EPHEMERAL_REQUEST_CAPACITY = MAX_WORKERS_LIMIT; // Monitoring Constants // =================== @@ -117,6 +118,16 @@ export const configSchema = schema.object( defaultValue: DEFAULT_MONITORING_STATS_WARN_DELAYED_TASK_START_IN_SECONDS, }), }), + ephemeral_tasks: schema.object({ + enabled: schema.boolean({ defaultValue: false }), + /* How many requests can Task Manager buffer before it rejects new requests. */ + request_capacity: schema.number({ + // a nice round contrived number, feel free to change as we learn how it behaves + defaultValue: 10, + min: 1, + max: DEFAULT_MAX_EPHEMERAL_REQUEST_CAPACITY, + }), + }), }, { validate: (config) => { diff --git a/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.mock.ts b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.mock.ts new file mode 100644 index 000000000000..c1ae0c4141bf --- /dev/null +++ b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.mock.ts @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle'; +import { TaskLifecycleEvent } from './polling_lifecycle'; +import { of, Observable } from 'rxjs'; + +export const ephemeralTaskLifecycleMock = { + create(opts: { events$?: Observable; getQueuedTasks?: () => number }) { + return ({ + attemptToRun: jest.fn(), + get events() { + return opts.events$ ?? of(); + }, + get queuedTasks() { + return opts.getQueuedTasks ? opts.getQueuedTasks() : 0; + }, + } as unknown) as jest.Mocked; + }, +}; diff --git a/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.test.ts b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.test.ts new file mode 100644 index 000000000000..182e7cd5bcab --- /dev/null +++ b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.test.ts @@ -0,0 +1,396 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import _ from 'lodash'; +import { Subject } from 'rxjs'; + +import { TaskLifecycleEvent } from './polling_lifecycle'; +import { createInitialMiddleware } from './lib/middleware'; +import { TaskTypeDictionary } from './task_type_dictionary'; +import { mockLogger } from './test_utils'; +import { asErr, asOk } from './lib/result_type'; +import { FillPoolResult } from './lib/fill_pool'; +import { EphemeralTaskLifecycle, EphemeralTaskLifecycleOpts } from './ephemeral_task_lifecycle'; +import { ConcreteTaskInstance, TaskStatus } from './task'; +import uuid from 'uuid'; +import { asTaskPollingCycleEvent, asTaskRunEvent, TaskPersistence } from './task_events'; +import { TaskRunResult } from './task_running'; +import { TaskPoolRunResult } from './task_pool'; +import { TaskPoolMock } from './task_pool.mock'; + +describe('EphemeralTaskLifecycle', () => { + function initTaskLifecycleParams({ + config, + ...optOverrides + }: { + config?: Partial; + } & Partial> = {}) { + const taskManagerLogger = mockLogger(); + const poolCapacity = jest.fn(); + const pool = TaskPoolMock.create(poolCapacity); + const lifecycleEvent$ = new Subject(); + const elasticsearchAndSOAvailability$ = new Subject(); + const opts: EphemeralTaskLifecycleOpts = { + logger: taskManagerLogger, + definitions: new TaskTypeDictionary(taskManagerLogger), + config: { + enabled: true, + max_workers: 10, + index: 'foo', + max_attempts: 9, + poll_interval: 6000000, + version_conflict_threshold: 80, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_required_freshness: 5000, + monitored_stats_running_average_window: 50, + monitored_stats_health_verbose_log: { + enabled: true, + warn_delayed_task_start_in_seconds: 60, + }, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, + ...config, + }, + elasticsearchAndSOAvailability$, + pool, + lifecycleEvent: lifecycleEvent$, + middleware: createInitialMiddleware(), + ...optOverrides, + }; + + opts.definitions.registerTaskDefinitions({ + foo: { + title: 'foo', + createTaskRunner: jest.fn(), + }, + }); + + pool.run.mockResolvedValue(Promise.resolve(TaskPoolRunResult.RunningAllClaimedTasks)); + + return { poolCapacity, lifecycleEvent$, pool, elasticsearchAndSOAvailability$, opts }; + } + + describe('constructor', () => { + test('avoids unnecesery subscription if ephemeral tasks are disabled', () => { + const { opts } = initTaskLifecycleParams({ + config: { + ephemeral_tasks: { + enabled: false, + request_capacity: 10, + }, + }, + }); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const task = mockTask(); + expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asErr(task)); + }); + + test('queues up tasks when ephemeral tasks are enabled', () => { + const { opts } = initTaskLifecycleParams(); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const task = mockTask(); + expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task)); + }); + + test('rejects tasks when ephemeral tasks are enabled and queue is full', () => { + const { opts } = initTaskLifecycleParams({ + config: { ephemeral_tasks: { enabled: true, request_capacity: 2 } }, + }); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const task = mockTask(); + expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task)); + const task2 = mockTask(); + expect(ephemeralTaskLifecycle.attemptToRun(task2)).toMatchObject(asOk(task2)); + + const rejectedTask = mockTask(); + expect(ephemeralTaskLifecycle.attemptToRun(rejectedTask)).toMatchObject(asErr(rejectedTask)); + }); + + test('pulls tasks off queue when a polling cycle completes', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const task = mockTask({ id: `my-phemeral-task` }); + expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task)); + + poolCapacity.mockReturnValue({ + availableWorkers: 10, + }); + + lifecycleEvent$.next( + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })) + ); + + expect(pool.run).toHaveBeenCalledTimes(1); + + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(1); + expect(`${taskRunners[0]}`).toMatchInlineSnapshot(`"foo \\"my-phemeral-task\\" (Ephemeral)"`); + }); + + test('pulls tasks off queue when a task run completes', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const task = mockTask({ id: `my-phemeral-task` }); + expect(ephemeralTaskLifecycle.attemptToRun(task)).toMatchObject(asOk(task)); + + poolCapacity.mockReturnValue({ + availableWorkers: 10, + }); + + lifecycleEvent$.next( + asTaskRunEvent( + uuid.v4(), + asOk({ + task: mockTask(), + result: TaskRunResult.Success, + persistence: TaskPersistence.Ephemeral, + }) + ) + ); + + expect(pool.run).toHaveBeenCalledTimes(1); + + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(1); + expect(`${taskRunners[0]}`).toMatchInlineSnapshot(`"foo \\"my-phemeral-task\\" (Ephemeral)"`); + }); + + test('pulls as many tasks off queue as it has capacity for', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const tasks = [mockTask(), mockTask(), mockTask()]; + expect(ephemeralTaskLifecycle.attemptToRun(tasks[0])).toMatchObject(asOk(tasks[0])); + expect(ephemeralTaskLifecycle.attemptToRun(tasks[1])).toMatchObject(asOk(tasks[1])); + expect(ephemeralTaskLifecycle.attemptToRun(tasks[2])).toMatchObject(asOk(tasks[2])); + + poolCapacity.mockReturnValue({ + availableWorkers: 2, + }); + + lifecycleEvent$.next( + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })) + ); + + expect(pool.run).toHaveBeenCalledTimes(1); + + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(2); + expect(`${taskRunners[0]}`).toEqual(`foo "${tasks[0].id}" (Ephemeral)`); + expect(`${taskRunners[1]}`).toEqual(`foo "${tasks[1].id}" (Ephemeral)`); + }); + + test('pulls only as many tasks of the same type as is allowed by maxConcurrency', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + opts.definitions.registerTaskDefinitions({ + report: { + title: 'report', + maxConcurrency: 1, + createTaskRunner: jest.fn(), + }, + }); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const firstLimitedTask = mockTask({ taskType: 'report' }); + const secondLimitedTask = mockTask({ taskType: 'report' }); + // both are queued + expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject( + asOk(firstLimitedTask) + ); + expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject( + asOk(secondLimitedTask) + ); + + // pool has capacity for both + poolCapacity.mockReturnValue({ + availableWorkers: 10, + }); + pool.getOccupiedWorkersByType.mockReturnValue(0); + + lifecycleEvent$.next( + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })) + ); + + expect(pool.run).toHaveBeenCalledTimes(1); + + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(1); + expect(`${taskRunners[0]}`).toEqual(`report "${firstLimitedTask.id}" (Ephemeral)`); + }); + + test('when pulling tasks from the queue, it takes into account the maxConcurrency of tasks that are already in the pool', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + opts.definitions.registerTaskDefinitions({ + report: { + title: 'report', + maxConcurrency: 1, + createTaskRunner: jest.fn(), + }, + }); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const firstLimitedTask = mockTask({ taskType: 'report' }); + const secondLimitedTask = mockTask({ taskType: 'report' }); + // both are queued + expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject( + asOk(firstLimitedTask) + ); + expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject( + asOk(secondLimitedTask) + ); + + // pool has capacity in general + poolCapacity.mockReturnValue({ + availableWorkers: 2, + }); + // but when we ask how many it has occupied by type - wee always have one worker already occupied by that type + pool.getOccupiedWorkersByType.mockReturnValue(1); + + lifecycleEvent$.next( + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })) + ); + + expect(pool.run).toHaveBeenCalledTimes(0); + + // now we release the worker in the pool and cause another cycle in the epheemral queue + pool.getOccupiedWorkersByType.mockReturnValue(0); + lifecycleEvent$.next( + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed })) + ); + + expect(pool.run).toHaveBeenCalledTimes(1); + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(1); + expect(`${taskRunners[0]}`).toEqual(`report "${firstLimitedTask.id}" (Ephemeral)`); + }); + }); + + test('pulls tasks with both maxConcurrency and unlimited concurrency', () => { + const { pool, poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + opts.definitions.registerTaskDefinitions({ + report: { + title: 'report', + maxConcurrency: 1, + createTaskRunner: jest.fn(), + }, + }); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const fooTasks = [mockTask(), mockTask(), mockTask()]; + expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[0])).toMatchObject(asOk(fooTasks[0])); + + const firstLimitedTask = mockTask({ taskType: 'report' }); + expect(ephemeralTaskLifecycle.attemptToRun(firstLimitedTask)).toMatchObject( + asOk(firstLimitedTask) + ); + + expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[1])).toMatchObject(asOk(fooTasks[1])); + + const secondLimitedTask = mockTask({ taskType: 'report' }); + expect(ephemeralTaskLifecycle.attemptToRun(secondLimitedTask)).toMatchObject( + asOk(secondLimitedTask) + ); + + expect(ephemeralTaskLifecycle.attemptToRun(fooTasks[2])).toMatchObject(asOk(fooTasks[2])); + + // pool has capacity for all + poolCapacity.mockReturnValue({ + availableWorkers: 10, + }); + pool.getOccupiedWorkersByType.mockReturnValue(0); + + lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))); + + expect(pool.run).toHaveBeenCalledTimes(1); + + const taskRunners = pool.run.mock.calls[0][0]; + expect(taskRunners).toHaveLength(4); + const asStrings = taskRunners.map((taskRunner) => `${taskRunner}`); + expect(asStrings).toContain(`foo "${fooTasks[0].id}" (Ephemeral)`); + expect(asStrings).toContain(`report "${firstLimitedTask.id}" (Ephemeral)`); + expect(asStrings).toContain(`foo "${fooTasks[1].id}" (Ephemeral)`); + expect(asStrings).toContain(`foo "${fooTasks[2].id}" (Ephemeral)`); + }); + + test('properly removes from the queue after pulled', () => { + const { poolCapacity, opts, lifecycleEvent$ } = initTaskLifecycleParams(); + + const ephemeralTaskLifecycle = new EphemeralTaskLifecycle(opts); + + const tasks = [mockTask(), mockTask(), mockTask()]; + expect(ephemeralTaskLifecycle.attemptToRun(tasks[0])).toMatchObject(asOk(tasks[0])); + expect(ephemeralTaskLifecycle.attemptToRun(tasks[1])).toMatchObject(asOk(tasks[1])); + expect(ephemeralTaskLifecycle.attemptToRun(tasks[2])).toMatchObject(asOk(tasks[2])); + + expect(ephemeralTaskLifecycle.queuedTasks).toBe(3); + poolCapacity.mockReturnValue({ + availableWorkers: 1, + }); + lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))); + expect(ephemeralTaskLifecycle.queuedTasks).toBe(2); + + poolCapacity.mockReturnValue({ + availableWorkers: 1, + }); + lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))); + expect(ephemeralTaskLifecycle.queuedTasks).toBe(1); + + poolCapacity.mockReturnValue({ + availableWorkers: 1, + }); + lifecycleEvent$.next(asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed }))); + expect(ephemeralTaskLifecycle.queuedTasks).toBe(0); + }); +}); + +function mockTask(overrides: Partial = {}): ConcreteTaskInstance { + return { + id: uuid.v4(), + runAt: new Date(), + taskType: 'foo', + schedule: undefined, + attempts: 0, + status: TaskStatus.Idle, + params: { hello: 'world' }, + state: { baby: 'Henhen' }, + user: 'jimbo', + scope: ['reporting'], + ownerId: '', + startedAt: null, + retryAt: null, + scheduledAt: new Date(), + ...overrides, + }; +} diff --git a/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.ts b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.ts new file mode 100644 index 000000000000..ce719ebed36e --- /dev/null +++ b/x-pack/plugins/task_manager/server/ephemeral_task_lifecycle.ts @@ -0,0 +1,205 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { Subject, Observable, Subscription } from 'rxjs'; +import { filter } from 'rxjs/operators'; +import { Logger } from '../../../../src/core/server'; + +import { Result, asErr, asOk } from './lib/result_type'; +import { TaskManagerConfig } from './config'; + +import { asTaskManagerStatEvent, isTaskRunEvent, isTaskPollingCycleEvent } from './task_events'; +import { Middleware } from './lib/middleware'; +import { EphemeralTaskInstance } from './task'; +import { TaskTypeDictionary } from './task_type_dictionary'; +import { TaskLifecycleEvent } from './polling_lifecycle'; +import { EphemeralTaskManagerRunner } from './task_running/ephemeral_task_runner'; +import { TaskPool } from './task_pool'; + +export interface EphemeralTaskLifecycleOpts { + logger: Logger; + definitions: TaskTypeDictionary; + config: TaskManagerConfig; + middleware: Middleware; + elasticsearchAndSOAvailability$: Observable; + pool: TaskPool; + lifecycleEvent: Observable; +} + +export type EphemeralTaskInstanceRequest = Omit; + +export class EphemeralTaskLifecycle { + private definitions: TaskTypeDictionary; + private pool: TaskPool; + private lifecycleEvent: Observable; + // all task related events (task claimed, task marked as running, etc.) are emitted through events$ + private events$ = new Subject(); + private ephemeralTaskQueue: Array<{ + task: EphemeralTaskInstanceRequest; + enqueuedAt: number; + }> = []; + private logger: Logger; + private config: TaskManagerConfig; + private middleware: Middleware; + private lifecycleSubscription: Subscription = Subscription.EMPTY; + + constructor({ + logger, + middleware, + definitions, + pool, + lifecycleEvent, + config, + }: EphemeralTaskLifecycleOpts) { + this.logger = logger; + this.middleware = middleware; + this.definitions = definitions; + this.pool = pool; + this.lifecycleEvent = lifecycleEvent; + this.config = config; + + if (this.enabled) { + this.lifecycleSubscription = this.lifecycleEvent + .pipe( + filter((e) => { + const hasPollingCycleCompleted = isTaskPollingCycleEvent(e); + if (hasPollingCycleCompleted) { + this.emitEvent( + asTaskManagerStatEvent('queuedEphemeralTasks', asOk(this.queuedTasks)) + ); + } + return ( + // when a polling cycle or a task run have just completed + (hasPollingCycleCompleted || isTaskRunEvent(e)) && + // we want to know when the queue has ephemeral task run requests + this.queuedTasks > 0 && + this.getCapacity() > 0 + ); + }) + ) + .subscribe(async (e) => { + let overallCapacity = this.getCapacity(); + const capacityByType = new Map(); + const tasksWithinCapacity = [...this.ephemeralTaskQueue] + .filter(({ task }) => { + if (overallCapacity > 0) { + if (!capacityByType.has(task.taskType)) { + capacityByType.set(task.taskType, this.getCapacity(task.taskType)); + } + if (capacityByType.get(task.taskType)! > 0) { + overallCapacity--; + capacityByType.set(task.taskType, capacityByType.get(task.taskType)! - 1); + return true; + } + } + }) + .map((ephemeralTask) => { + const index = this.ephemeralTaskQueue.indexOf(ephemeralTask); + if (index >= 0) { + this.ephemeralTaskQueue.splice(index, 1); + } + this.emitEvent( + asTaskManagerStatEvent( + 'ephemeralTaskDelay', + asOk(Date.now() - ephemeralTask.enqueuedAt) + ) + ); + return this.createTaskRunnerForTask(ephemeralTask.task); + }); + + if (tasksWithinCapacity.length) { + this.pool + .run(tasksWithinCapacity) + .then((successTaskPoolRunResult) => { + this.logger.debug( + `Successful ephemeral task lifecycle resulted in: ${successTaskPoolRunResult}` + ); + }) + .catch((error) => { + this.logger.debug(`Failed ephemeral task lifecycle resulted in: ${error}`); + }); + } + }); + } + } + + public get enabled(): boolean { + return this.config.ephemeral_tasks.enabled; + } + + public get events(): Observable { + return this.events$; + } + + private getCapacity = (taskType?: string) => + taskType && this.definitions.get(taskType)?.maxConcurrency + ? Math.max( + Math.min( + this.pool.availableWorkers, + this.definitions.get(taskType)!.maxConcurrency! - + this.pool.getOccupiedWorkersByType(taskType) + ), + 0 + ) + : this.pool.availableWorkers; + + private emitEvent = (event: TaskLifecycleEvent) => { + this.events$.next(event); + }; + + public attemptToRun(task: EphemeralTaskInstanceRequest) { + if (this.lifecycleSubscription.closed) { + return asErr(task); + } + return pushIntoSetWithTimestamp( + this.ephemeralTaskQueue, + this.config.ephemeral_tasks.request_capacity, + task + ); + } + + public get queuedTasks() { + return this.ephemeralTaskQueue.length; + } + + private createTaskRunnerForTask = ( + instance: EphemeralTaskInstanceRequest + ): EphemeralTaskManagerRunner => { + return new EphemeralTaskManagerRunner({ + logger: this.logger, + instance: { + ...instance, + startedAt: new Date(), + }, + definitions: this.definitions, + beforeRun: this.middleware.beforeRun, + beforeMarkRunning: this.middleware.beforeMarkRunning, + onTaskEvent: this.emitEvent, + }); + }; +} + +/** + * Pushes values into a bounded set + * @param set A Set of generic type T + * @param maxCapacity How many values are we allowed to push into the set + * @param value A value T to push into the set if it is there + */ +function pushIntoSetWithTimestamp( + set: Array<{ + task: EphemeralTaskInstanceRequest; + enqueuedAt: number; + }>, + maxCapacity: number, + task: EphemeralTaskInstanceRequest +): Result { + if (set.length >= maxCapacity) { + return asErr(task); + } + set.push({ task, enqueuedAt: Date.now() }); + return asOk(task); +} diff --git a/x-pack/plugins/task_manager/server/index.ts b/x-pack/plugins/task_manager/server/index.ts index 80f0e298a8ac..0a0630d82f32 100644 --- a/x-pack/plugins/task_manager/server/index.ts +++ b/x-pack/plugins/task_manager/server/index.ts @@ -15,13 +15,19 @@ export const plugin = (initContext: PluginInitializerContext) => new TaskManager export { TaskInstance, ConcreteTaskInstance, + EphemeralTask, TaskRunCreatorFunction, TaskStatus, RunContext, } from './task'; export { asInterval } from './lib/intervals'; -export { isUnrecoverableError, throwUnrecoverableError } from './task_running'; +export { + isUnrecoverableError, + throwUnrecoverableError, + isEphemeralTaskRejectedDueToCapacityError, +} from './task_running'; +export { RunNowResult } from './task_scheduling'; export { TaskManagerPlugin as TaskManager, diff --git a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts index f925c4d978ad..496c0138cb1e 100644 --- a/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts +++ b/x-pack/plugins/task_manager/server/integration_tests/managed_configuration.test.ts @@ -51,11 +51,17 @@ describe('managed configuration', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, }); logger = context.logger.get('taskManager'); const taskManager = new TaskManagerPlugin(context); - (await taskManager.setup(coreMock.createSetup())).registerTaskDefinitions({ + ( + await taskManager.setup(coreMock.createSetup(), { usageCollection: undefined }) + ).registerTaskDefinitions({ foo: { title: 'Foo', createTaskRunner: jest.fn(), diff --git a/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts b/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts index aca73a4b7743..b8e3e78925df 100644 --- a/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts +++ b/x-pack/plugins/task_manager/server/lib/log_health_metrics.test.ts @@ -8,10 +8,10 @@ import { merge } from 'lodash'; import { loggingSystemMock } from 'src/core/server/mocks'; import { configSchema, TaskManagerConfig } from '../config'; import { HealthStatus } from '../monitoring'; -import { TaskPersistence } from '../monitoring/task_run_statistics'; import { MonitoredHealth } from '../routes/health'; import { logHealthMetrics, resetLastLogLevel } from './log_health_metrics'; import { Logger } from '../../../../../src/core/server'; +import { TaskPersistence } from '../task_events'; jest.mock('./calculate_health_status', () => ({ calculateHealthStatus: jest.fn(), diff --git a/x-pack/plugins/task_manager/server/mocks.ts b/x-pack/plugins/task_manager/server/mocks.ts index c713e1e98a1e..2db8cdd6268c 100644 --- a/x-pack/plugins/task_manager/server/mocks.ts +++ b/x-pack/plugins/task_manager/server/mocks.ts @@ -23,8 +23,10 @@ const createStartMock = () => { remove: jest.fn(), schedule: jest.fn(), runNow: jest.fn(), + ephemeralRunNow: jest.fn(), ensureScheduled: jest.fn(), removeIfExists: jest.fn(), + supportsEphemeralTasks: jest.fn(), }; return mock; }; diff --git a/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.test.ts b/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.test.ts index bd8ecf0cc6d9..5e2b075415a1 100644 --- a/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.test.ts @@ -835,6 +835,30 @@ function mockStats( runtime: Partial['runtime']['value']> = {} ): CapacityEstimationParams { return { + ephemeral: { + status: HealthStatus.OK, + timestamp: new Date().toISOString(), + value: { + load: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + executionsPerCycle: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + queuedTasks: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + }, + }, configuration: { status: HealthStatus.OK, timestamp: new Date().toISOString(), diff --git a/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.ts b/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.ts index 90f564152c8c..03efcff10eb6 100644 --- a/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.ts +++ b/x-pack/plugins/task_manager/server/monitoring/capacity_estimation.ts @@ -100,6 +100,7 @@ export function estimateCapacity( percentageOfExecutionsUsedByRecurringTasks + percentageOfExecutionsUsedByNonRecurringTasks ) ); + /** * On average, how much of this kibana's capacity has been historically used to execute * non-recurring and ephemeral tasks @@ -147,7 +148,7 @@ export function estimateCapacity( */ const minRequiredKibanaInstances = Math.ceil( hasTooLittleCapacityToEstimateRequiredNonRecurringCapacity - ? /* + ? /* if load is at 100% or there's no capacity for recurring tasks at the moment, then it's really difficult for us to assess how much capacity is needed for non-recurring tasks at normal times. This might be representative, but it might also be a spike and we have no way of knowing that. We'll recommend people scale up by 20% and go from there. */ @@ -182,7 +183,6 @@ export function estimateCapacity( const assumedRequiredThroughputPerMinutePerKibana = averageCapacityUsedByNonRecurringAndEphemeralTasksPerKibana + averageRecurringRequiredPerMinute / assumedKibanaInstances; - return { status: assumedRequiredThroughputPerMinutePerKibana < capacityPerMinutePerKibana diff --git a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts index 6aa8bad5717e..82a111305927 100644 --- a/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/configuration_statistics.test.ts @@ -35,6 +35,10 @@ describe('Configuration Statistics Aggregator', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, }; const managedConfig = { diff --git a/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.test.ts new file mode 100644 index 000000000000..1ddfe4bb2208 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.test.ts @@ -0,0 +1,384 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import uuid from 'uuid'; +import { Subject, Observable } from 'rxjs'; +import stats from 'stats-lite'; +import { take, bufferCount, skip, map } from 'rxjs/operators'; + +import { ConcreteTaskInstance, TaskStatus } from '../task'; +import { + asTaskRunEvent, + TaskTiming, + asTaskManagerStatEvent, + TaskPersistence, +} from '../task_events'; +import { asOk } from '../lib/result_type'; +import { TaskLifecycleEvent } from '../polling_lifecycle'; +import { TaskRunResult } from '../task_running'; +import { + createEphemeralTaskAggregator, + summarizeEphemeralStat, + SummarizedEphemeralTaskStat, + EphemeralTaskStat, +} from './ephemeral_task_statistics'; +import { AggregatedStat } from './runtime_statistics_aggregator'; +import { ephemeralTaskLifecycleMock } from '../ephemeral_task_lifecycle.mock'; +import { times, takeRight, take as takeLeft } from 'lodash'; + +describe('Ephemeral Task Statistics', () => { + test('returns the average size of the ephemeral queue', async () => { + const queueSize = [2, 6, 10, 10, 10, 6, 2, 0, 0]; + const events$ = new Subject(); + const getQueuedTasks = jest.fn(); + const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({ + events$: events$ as Observable, + getQueuedTasks, + }); + + const runningAverageWindowSize = 5; + const ephemeralTaskAggregator = createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + runningAverageWindowSize, + 10 + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.queuedTasks).toMatchObject({ + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), + }); + } + + return new Promise((resolve) => { + ephemeralTaskAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeEphemeralStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeEphemeralStat(value).value, + })), + take(queueSize.length), + bufferCount(queueSize.length) + ) + .subscribe((taskStats: Array>) => { + expectWindowEqualsUpdate(taskStats[0], queueSize.slice(0, 1)); + expectWindowEqualsUpdate(taskStats[1], queueSize.slice(0, 2)); + expectWindowEqualsUpdate(taskStats[2], queueSize.slice(0, 3)); + expectWindowEqualsUpdate(taskStats[3], queueSize.slice(0, 4)); + expectWindowEqualsUpdate(taskStats[4], queueSize.slice(0, 5)); + // from the 6th value, begin to drop old values as out window is 5 + expectWindowEqualsUpdate(taskStats[5], queueSize.slice(1, 6)); + expectWindowEqualsUpdate(taskStats[6], queueSize.slice(2, 7)); + expectWindowEqualsUpdate(taskStats[7], queueSize.slice(3, 8)); + resolve(); + }); + + for (const size of queueSize) { + events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(size))); + } + }); + }); + + test('returns the average number of ephemeral tasks executed per polling cycle', async () => { + const tasksQueueSize = [5, 2, 5, 0]; + const executionsPerCycle = [5, 0, 5]; + // we expect one event per "task queue size event", and we simmulate + // tasks being drained after each one of theseevents, so we expect + // the first cycle to show zero drained tasks + const expectedTasksDrainedEvents = [0, ...executionsPerCycle]; + + const events$ = new Subject(); + const getQueuedTasks = jest.fn(); + const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({ + events$: events$ as Observable, + getQueuedTasks, + }); + + const runningAverageWindowSize = 5; + const ephemeralTaskAggregator = createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + runningAverageWindowSize, + 10 + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.executionsPerCycle).toMatchObject({ + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), + }); + } + + return new Promise((resolve) => { + ephemeralTaskAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeEphemeralStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeEphemeralStat(value).value, + })), + take(tasksQueueSize.length), + bufferCount(tasksQueueSize.length) + ) + .subscribe((taskStats: Array>) => { + taskStats.forEach((taskStat, index) => { + expectWindowEqualsUpdate( + taskStat, + takeRight(takeLeft(expectedTasksDrainedEvents, index + 1), runningAverageWindowSize) + ); + }); + resolve(); + }); + + for (const tasksDrainedInCycle of executionsPerCycle) { + events$.next( + asTaskManagerStatEvent('queuedEphemeralTasks', asOk(tasksQueueSize.shift() ?? 0)) + ); + times(tasksDrainedInCycle, () => { + events$.next(mockTaskRunEvent()); + }); + } + events$.next( + asTaskManagerStatEvent('queuedEphemeralTasks', asOk(tasksQueueSize.shift() ?? 0)) + ); + }); + }); + + test('returns the average load added per polling cycle cycle by ephemeral tasks', async () => { + const tasksExecuted = [0, 5, 10, 10, 10, 5, 5, 0, 0, 0, 0, 0]; + const expectedLoad = [0, 50, 100, 100, 100, 50, 50, 0, 0, 0, 0, 0]; + + const events$ = new Subject(); + const getQueuedTasks = jest.fn(); + const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({ + events$: events$ as Observable, + getQueuedTasks, + }); + + const runningAverageWindowSize = 5; + const maxWorkers = 10; + const ephemeralTaskAggregator = createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + runningAverageWindowSize, + maxWorkers + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.load).toMatchObject({ + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), + }); + } + + return new Promise((resolve) => { + ephemeralTaskAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeEphemeralStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeEphemeralStat(value).value, + })), + take(tasksExecuted.length), + bufferCount(tasksExecuted.length) + ) + .subscribe((taskStats: Array>) => { + taskStats.forEach((taskStat, index) => { + expectWindowEqualsUpdate( + taskStat, + takeRight(takeLeft(expectedLoad, index + 1), runningAverageWindowSize) + ); + }); + resolve(); + }); + + for (const tasksExecutedInCycle of tasksExecuted) { + times(tasksExecutedInCycle, () => { + events$.next(mockTaskRunEvent()); + }); + events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(0))); + } + }); + }); +}); + +test('returns the average load added per polling cycle cycle by ephemeral tasks when load exceeds max workers', async () => { + const tasksExecuted = [0, 5, 10, 20, 15, 10, 5, 0, 0, 0, 0, 0]; + const expectedLoad = [0, 50, 100, 200, 150, 100, 50, 0, 0, 0, 0, 0]; + + const events$ = new Subject(); + const getQueuedTasks = jest.fn(); + const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({ + events$: events$ as Observable, + getQueuedTasks, + }); + + const runningAverageWindowSize = 5; + const maxWorkers = 10; + const ephemeralTaskAggregator = createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + runningAverageWindowSize, + maxWorkers + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.load).toMatchObject({ + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), + }); + } + + return new Promise((resolve) => { + ephemeralTaskAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeEphemeralStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeEphemeralStat(value).value, + })), + take(tasksExecuted.length), + bufferCount(tasksExecuted.length) + ) + .subscribe((taskStats: Array>) => { + taskStats.forEach((taskStat, index) => { + expectWindowEqualsUpdate( + taskStat, + takeRight(takeLeft(expectedLoad, index + 1), runningAverageWindowSize) + ); + }); + resolve(); + }); + + for (const tasksExecutedInCycle of tasksExecuted) { + times(tasksExecutedInCycle, () => { + events$.next(mockTaskRunEvent()); + }); + events$.next(asTaskManagerStatEvent('queuedEphemeralTasks', asOk(0))); + } + }); +}); + +test('returns the average delay experienced by tasks in the ephemeral queue', async () => { + const taskDelays = [100, 150, 500, 100, 100, 200, 2000, 10000, 20000, 100]; + + const events$ = new Subject(); + const getQueuedTasks = jest.fn(); + const ephemeralTaskLifecycle = ephemeralTaskLifecycleMock.create({ + events$: events$ as Observable, + getQueuedTasks, + }); + + const runningAverageWindowSize = 5; + const ephemeralTaskAggregator = createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + runningAverageWindowSize, + 10 + ); + + function expectWindowEqualsUpdate( + taskStat: AggregatedStat, + window: number[] + ) { + expect(taskStat.value.delay).toMatchObject({ + p50: stats.percentile(window, 0.5), + p90: stats.percentile(window, 0.9), + p95: stats.percentile(window, 0.95), + p99: stats.percentile(window, 0.99), + }); + } + + return new Promise((resolve) => { + ephemeralTaskAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeEphemeralStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeEphemeralStat(value).value, + })), + take(taskDelays.length), + bufferCount(taskDelays.length) + ) + .subscribe((taskStats: Array>) => { + taskStats.forEach((taskStat, index) => { + expectWindowEqualsUpdate( + taskStat, + takeRight(takeLeft(taskDelays, index + 1), runningAverageWindowSize) + ); + }); + resolve(); + }); + + for (const delay of taskDelays) { + events$.next(asTaskManagerStatEvent('ephemeralTaskDelay', asOk(delay))); + } + }); +}); + +const mockTaskRunEvent = ( + overrides: Partial = {}, + timing: TaskTiming = { + start: 0, + stop: 0, + }, + result: TaskRunResult = TaskRunResult.Success +) => { + const task = mockTaskInstance(overrides); + const persistence = TaskPersistence.Recurring; + return asTaskRunEvent(task.id, asOk({ task, persistence, result }), timing); +}; + +const mockTaskInstance = (overrides: Partial = {}): ConcreteTaskInstance => ({ + id: uuid.v4(), + attempts: 0, + status: TaskStatus.Running, + version: '123', + runAt: new Date(), + scheduledAt: new Date(), + startedAt: new Date(), + retryAt: new Date(Date.now() + 5 * 60 * 1000), + state: {}, + taskType: 'alerting:test', + params: { + alertId: '1', + }, + ownerId: null, + ...overrides, +}); diff --git a/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.ts new file mode 100644 index 000000000000..d1f3ef9c1405 --- /dev/null +++ b/x-pack/plugins/task_manager/server/monitoring/ephemeral_task_statistics.ts @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { map, filter, startWith, buffer, share } from 'rxjs/operators'; +import { JsonObject } from '@kbn/common-utils'; +import { combineLatest, Observable, zip } from 'rxjs'; +import { isOk, Ok } from '../lib/result_type'; +import { AggregatedStat, AggregatedStatProvider } from './runtime_statistics_aggregator'; +import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle'; +import { TaskLifecycleEvent } from '../polling_lifecycle'; +import { isTaskRunEvent, isTaskManagerStatEvent } from '../task_events'; +import { + AveragedStat, + calculateRunningAverage, + createRunningAveragedStat, +} from './task_run_calcultors'; +import { HealthStatus } from './monitoring_stats_stream'; + +export interface EphemeralTaskStat extends JsonObject { + queuedTasks: number[]; + executionsPerCycle: number[]; + load: number[]; + delay: number[]; +} + +export interface SummarizedEphemeralTaskStat extends JsonObject { + queuedTasks: AveragedStat; + executionsPerCycle: AveragedStat; + load: AveragedStat; +} +export function createEphemeralTaskAggregator( + ephemeralTaskLifecycle: EphemeralTaskLifecycle, + runningAverageWindowSize: number, + maxWorkers: number +): AggregatedStatProvider { + const ephemeralTaskRunEvents$ = ephemeralTaskLifecycle.events.pipe( + filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent)) + ); + + const ephemeralQueueSizeEvents$: Observable = ephemeralTaskLifecycle.events.pipe( + filter( + (taskEvent: TaskLifecycleEvent) => + isTaskManagerStatEvent(taskEvent) && + taskEvent.id === 'queuedEphemeralTasks' && + isOk(taskEvent.event) + ), + map((taskEvent: TaskLifecycleEvent) => { + return ((taskEvent.event as unknown) as Ok).value; + }), + // as we consume this stream twice below (in the buffer, and the zip) + // we want to use share, otherwise ther'll be 2 subscribers and both will emit event + share() + ); + + const ephemeralQueueExecutionsPerCycleQueue = createRunningAveragedStat( + runningAverageWindowSize + ); + const ephemeralQueuedTasksQueue = createRunningAveragedStat(runningAverageWindowSize); + const ephemeralTaskLoadQueue = createRunningAveragedStat(runningAverageWindowSize); + const ephemeralPollingCycleBasedStats$ = zip( + ephemeralTaskRunEvents$.pipe( + buffer(ephemeralQueueSizeEvents$), + map((taskEvents: TaskLifecycleEvent[]) => taskEvents.length) + ), + ephemeralQueueSizeEvents$ + ).pipe( + map(([tasksRanSincePreviousQueueSize, ephemeralQueueSize]) => ({ + queuedTasks: ephemeralQueuedTasksQueue(ephemeralQueueSize), + executionsPerCycle: ephemeralQueueExecutionsPerCycleQueue(tasksRanSincePreviousQueueSize), + load: ephemeralTaskLoadQueue(calculateWorkerLoad(maxWorkers, tasksRanSincePreviousQueueSize)), + })), + startWith({ + queuedTasks: [], + executionsPerCycle: [], + load: [], + }) + ); + + const ephemeralTaskDelayQueue = createRunningAveragedStat(runningAverageWindowSize); + const ephemeralTaskDelayEvents$: Observable = ephemeralTaskLifecycle.events.pipe( + filter( + (taskEvent: TaskLifecycleEvent) => + isTaskManagerStatEvent(taskEvent) && + taskEvent.id === 'ephemeralTaskDelay' && + isOk(taskEvent.event) + ), + map((taskEvent: TaskLifecycleEvent) => { + return ephemeralTaskDelayQueue(((taskEvent.event as unknown) as Ok).value); + }), + startWith([]) + ); + + return combineLatest([ephemeralPollingCycleBasedStats$, ephemeralTaskDelayEvents$]).pipe( + map(([stats, delay]: [Omit, EphemeralTaskStat['delay']]) => { + return { + key: 'ephemeral', + value: { ...stats, delay }, + } as AggregatedStat; + }) + ); +} + +function calculateWorkerLoad(maxWorkers: number, tasksExecuted: number) { + return Math.round((tasksExecuted * 100) / maxWorkers); +} + +export function summarizeEphemeralStat({ + queuedTasks, + executionsPerCycle, + load, + delay, +}: EphemeralTaskStat): { value: SummarizedEphemeralTaskStat; status: HealthStatus } { + return { + value: { + queuedTasks: calculateRunningAverage(queuedTasks.length ? queuedTasks : [0]), + load: calculateRunningAverage(load.length ? load : [0]), + executionsPerCycle: calculateRunningAverage( + executionsPerCycle.length ? executionsPerCycle : [0] + ), + delay: calculateRunningAverage(delay.length ? delay : [0]), + }, + status: HealthStatus.OK, + }; +} diff --git a/x-pack/plugins/task_manager/server/monitoring/index.ts b/x-pack/plugins/task_manager/server/monitoring/index.ts index 802a60b82ced..99a4e31dbdb0 100644 --- a/x-pack/plugins/task_manager/server/monitoring/index.ts +++ b/x-pack/plugins/task_manager/server/monitoring/index.ts @@ -16,6 +16,7 @@ import { import { TaskStore } from '../task_store'; import { TaskPollingLifecycle } from '../polling_lifecycle'; import { ManagedConfiguration } from '../lib/create_managed_configuration'; +import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle'; export { MonitoringStats, @@ -28,6 +29,7 @@ export { export function createMonitoringStats( taskPollingLifecycle: TaskPollingLifecycle, + ephemeralTaskLifecycle: EphemeralTaskLifecycle, taskStore: TaskStore, elasticsearchAndSOAvailability$: Observable, config: TaskManagerConfig, @@ -37,6 +39,7 @@ export function createMonitoringStats( return createMonitoringStatsStream( createAggregators( taskPollingLifecycle, + ephemeralTaskLifecycle, taskStore, elasticsearchAndSOAvailability$, config, diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts index 2e53850814e8..8e615fb86171 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.test.ts @@ -39,6 +39,10 @@ describe('createMonitoringStatsStream', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, }; it('returns the initial config used to configure Task Manager', async () => { diff --git a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts index 0d3b6ebf56de..b187faf9e964 100644 --- a/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts +++ b/x-pack/plugins/task_manager/server/monitoring/monitoring_stats_stream.ts @@ -18,6 +18,12 @@ import { SummarizedWorkloadStat, WorkloadStat, } from './workload_statistics'; +import { + EphemeralTaskStat, + createEphemeralTaskAggregator, + SummarizedEphemeralTaskStat, + summarizeEphemeralStat, +} from './ephemeral_task_statistics'; import { createTaskRunAggregator, summarizeTaskRunStat, @@ -28,6 +34,7 @@ import { ConfigStat, createConfigurationAggregator } from './configuration_stati import { TaskManagerConfig } from '../config'; import { AggregatedStatProvider } from './runtime_statistics_aggregator'; import { ManagedConfiguration } from '../lib/create_managed_configuration'; +import { EphemeralTaskLifecycle } from '../ephemeral_task_lifecycle'; import { CapacityEstimationStat, withCapacityEstimate } from './capacity_estimation'; export { AggregatedStatProvider, AggregatedStat } from './runtime_statistics_aggregator'; @@ -38,6 +45,7 @@ export interface MonitoringStats { configuration?: MonitoredStat; workload?: MonitoredStat; runtime?: MonitoredStat; + ephemeral?: MonitoredStat; }; } @@ -61,19 +69,21 @@ export interface RawMonitoringStats { configuration?: RawMonitoredStat; workload?: RawMonitoredStat; runtime?: RawMonitoredStat; + ephemeral?: RawMonitoredStat; capacity_estimation?: RawMonitoredStat; }; } export function createAggregators( taskPollingLifecycle: TaskPollingLifecycle, + ephemeralTaskLifecycle: EphemeralTaskLifecycle, taskStore: TaskStore, elasticsearchAndSOAvailability$: Observable, config: TaskManagerConfig, managedConfig: ManagedConfiguration, logger: Logger ): AggregatedStatProvider { - return merge( + const aggregators: AggregatedStatProvider[] = [ createConfigurationAggregator(config, managedConfig), createTaskRunAggregator(taskPollingLifecycle, config.monitored_stats_running_average_window), createWorkloadAggregator( @@ -82,8 +92,18 @@ export function createAggregators( config.monitored_aggregated_stats_refresh_rate, config.poll_interval, logger - ) - ); + ), + ]; + if (ephemeralTaskLifecycle.enabled) { + aggregators.push( + createEphemeralTaskAggregator( + ephemeralTaskLifecycle, + config.monitored_stats_running_average_window, + config.max_workers + ) + ); + } + return merge(...aggregators); } export function createMonitoringStatsStream( @@ -119,7 +139,7 @@ export function summarizeMonitoringStats( { // eslint-disable-next-line @typescript-eslint/naming-convention last_update, - stats: { runtime, workload, configuration }, + stats: { runtime, workload, configuration, ephemeral }, }: MonitoringStats, config: TaskManagerConfig ): RawMonitoringStats { @@ -148,6 +168,14 @@ export function summarizeMonitoringStats( }, } : {}), + ...(ephemeral + ? { + ephemeral: { + timestamp: ephemeral.timestamp, + ...summarizeEphemeralStat(ephemeral.value), + }, + } + : {}), }); return { diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts index 38fdc89278e8..46dc56b2bac4 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.test.ts @@ -17,6 +17,8 @@ import { asTaskPollingCycleEvent, TaskTiming, asTaskManagerStatEvent, + TaskPersistence, + asTaskClaimEvent, } from '../task_events'; import { asOk } from '../lib/result_type'; import { TaskLifecycleEvent } from '../polling_lifecycle'; @@ -400,6 +402,44 @@ describe('Task Run Statistics', () => { runningAverageWindowSize ); + const taskEvents = [ + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success), + mockTaskRunEvent( + { schedule: { interval: '3s' } }, + { start: 0, stop: 0 }, + TaskRunResult.Success + ), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed), + mockTaskRunEvent( + { schedule: { interval: '3s' } }, + { start: 0, stop: 0 }, + TaskRunResult.Failed + ), + mockTaskRunEvent( + { schedule: { interval: '3s' } }, + { start: 0, stop: 0 }, + TaskRunResult.RetryScheduled + ), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success), + mockTaskRunEvent( + { schedule: { interval: '3s' } }, + { start: 0, stop: 0 }, + TaskRunResult.Success + ), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success), + mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success, TaskPersistence.Ephemeral), + mockTaskRunEvent( + { schedule: { interval: '3s' } }, + { start: 0, stop: 0 }, + TaskRunResult.Success + ), + ]; + return new Promise((resolve, reject) => { taskRunAggregator .pipe( @@ -409,22 +449,10 @@ describe('Task Run Statistics', () => { // Use 'summarizeTaskRunStat' to receive summarize stats map(({ key, value }: AggregatedStat) => ({ key, - value: summarizeTaskRunStat( - value, - getTaskManagerConfig({ - monitored_task_execution_thresholds: { - custom: { - 'alerting:test': { - error_threshold: 59, - warn_threshold: 39, - }, - }, - }, - }) - ).value, + value: summarizeTaskRunStat(value, getTaskManagerConfig({})).value, })), - take(10), - bufferCount(10) + take(taskEvents.length), + bufferCount(taskEvents.length) ) .subscribe((taskStats: Array>) => { try { @@ -485,6 +513,31 @@ describe('Task Run Statistics', () => { "non_recurring": 40, "recurring": 60, }, + Object { + "ephemeral": 20, + "non_recurring": 40, + "recurring": 40, + }, + Object { + "ephemeral": 40, + "non_recurring": 40, + "recurring": 20, + }, + Object { + "ephemeral": 40, + "non_recurring": 40, + "recurring": 20, + }, + Object { + "ephemeral": 60, + "non_recurring": 20, + "recurring": 20, + }, + Object { + "ephemeral": 60, + "non_recurring": 20, + "recurring": 20, + }, ] `); resolve(); @@ -493,40 +546,142 @@ describe('Task Run Statistics', () => { } }); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); - events$.next( - mockTaskRunEvent( - { schedule: { interval: '3s' } }, - { start: 0, stop: 0 }, - TaskRunResult.Success + taskEvents.forEach((event) => events$.next(event)); + }); + }); + + test('frequency of polled tasks by their persistence', async () => { + const events$ = new Subject(); + + const taskPollingLifecycle = taskPollingLifecycleMock.create({ + events$: events$ as Observable, + }); + + const runningAverageWindowSize = 5; + const taskRunAggregator = createTaskRunAggregator( + taskPollingLifecycle, + runningAverageWindowSize + ); + + const taskEvents = [ + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({ schedule: { interval: '3s' } }), + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({ schedule: { interval: '3s' } }), + mockTaskPollingEvent({ schedule: { interval: '3s' } }), + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({ schedule: { interval: '3s' } }), + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({}), + mockTaskPollingEvent({ schedule: { interval: '3s' } }), + ]; + + return new Promise((resolve, reject) => { + taskRunAggregator + .pipe( + // skip initial stat which is just initialized data which + // ensures we don't stall on combineLatest + skip(1), + // Use 'summarizeTaskRunStat' to receive summarize stats + map(({ key, value }: AggregatedStat) => ({ + key, + value: summarizeTaskRunStat(value, getTaskManagerConfig({})).value, + })), + take(taskEvents.length), + bufferCount(taskEvents.length) ) - ); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Failed)); + .subscribe((taskStats: Array>) => { + try { + /** + * At any given time we only keep track of the last X Polling Results + * In the tests this is ocnfiugured to a window size of 5 + */ + expect(taskStats.map((taskStat) => taskStat.value.polling.persistence)) + .toMatchInlineSnapshot(` + Array [ + Object { + "non_recurring": 0, + "recurring": 0, + }, + Object { + "non_recurring": 100, + "recurring": 0, + }, + Object { + "non_recurring": 100, + "recurring": 0, + }, + Object { + "non_recurring": 67, + "recurring": 33, + }, + Object { + "non_recurring": 75, + "recurring": 25, + }, + Object { + "non_recurring": 80, + "recurring": 20, + }, + Object { + "non_recurring": 60, + "recurring": 40, + }, + Object { + "non_recurring": 40, + "recurring": 60, + }, + Object { + "non_recurring": 60, + "recurring": 40, + }, + Object { + "non_recurring": 60, + "recurring": 40, + }, + Object { + "non_recurring": 40, + "recurring": 60, + }, + Object { + "non_recurring": 60, + "recurring": 40, + }, + Object { + "non_recurring": 80, + "recurring": 20, + }, + Object { + "non_recurring": 80, + "recurring": 20, + }, + Object { + "non_recurring": 80, + "recurring": 20, + }, + ] + `); + resolve(); + } catch (e) { + reject(e); + } + }); + + const timing = { + start: 0, + stop: 0, + }; events$.next( - mockTaskRunEvent( - { schedule: { interval: '3s' } }, - { start: 0, stop: 0 }, - TaskRunResult.Failed - ) - ); - events$.next( - mockTaskRunEvent( - { schedule: { interval: '3s' } }, - { start: 0, stop: 0 }, - TaskRunResult.RetryScheduled - ) - ); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.RetryScheduled)); - events$.next(mockTaskRunEvent({}, { start: 0, stop: 0 }, TaskRunResult.Success)); - events$.next( - mockTaskRunEvent( - { schedule: { interval: '3s' } }, - { start: 0, stop: 0 }, - TaskRunResult.Success - ) + asTaskPollingCycleEvent(asOk({ result: FillPoolResult.NoTasksClaimed, timing })) ); + events$.next(asTaskManagerStatEvent('pollingDelay', asOk(0))); + events$.next(asTaskManagerStatEvent('claimDuration', asOk(10))); + taskEvents.forEach((event) => events$.next(event)); }); }); @@ -713,10 +868,25 @@ function runAtMillisecondsAgo(ms: number): Date { const mockTaskRunEvent = ( overrides: Partial = {}, timing: TaskTiming, - result: TaskRunResult = TaskRunResult.Success + result: TaskRunResult = TaskRunResult.Success, + persistence?: TaskPersistence ) => { const task = mockTaskInstance(overrides); - return asTaskRunEvent(task.id, asOk({ task, result }), timing); + return asTaskRunEvent( + task.id, + asOk({ + task, + persistence: + persistence ?? (task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring), + result, + }), + timing + ); +}; + +const mockTaskPollingEvent = (overrides: Partial = {}) => { + const task = mockTaskInstance(overrides); + return asTaskClaimEvent(task.id, asOk(task)); }; const mockTaskInstance = (overrides: Partial = {}): ConcreteTaskInstance => ({ diff --git a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts index da86cfad2a91..d43137d237a9 100644 --- a/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts +++ b/x-pack/plugins/task_manager/server/monitoring/task_run_statistics.ts @@ -20,6 +20,9 @@ import { TaskTiming, isTaskManagerStatEvent, TaskManagerStat, + TaskPersistence, + TaskClaim, + isTaskClaimEvent, } from '../task_events'; import { isOk, Ok, unwrap } from '../lib/result_type'; import { ConcreteTaskInstance } from '../task'; @@ -36,24 +39,17 @@ import { HealthStatus } from './monitoring_stats_stream'; import { TaskPollingLifecycle } from '../polling_lifecycle'; import { TaskExecutionFailureThreshold, TaskManagerConfig } from '../config'; -export enum TaskPersistence { - Recurring = 'recurring', - NonRecurring = 'non_recurring', - Ephemeral = 'ephemeral', -} - -function persistenceOf(task: ConcreteTaskInstance) { - return task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring; -} - interface FillPoolStat extends JsonObject { - last_successful_poll: string; - last_polling_delay: string; duration: number[]; claim_duration: number[]; claim_conflicts: number[]; claim_mismatches: number[]; result_frequency_percent_as_number: FillPoolResult[]; + persistence: TaskPersistence[]; +} +interface OptionalFillPoolStat extends JsonObject { + last_successful_poll: string; + last_polling_delay: string; } interface ExecutionStat extends JsonObject { @@ -68,8 +64,7 @@ export interface TaskRunStat extends JsonObject { drift_by_type: Record; load: number[]; execution: ExecutionStat; - polling: Omit & - Pick, 'last_successful_poll' | 'last_polling_delay'>; + polling: FillPoolStat & Partial; } interface FillPoolRawStat extends JsonObject { @@ -83,6 +78,7 @@ interface FillPoolRawStat extends JsonObject { [FillPoolResult.RunningAtCapacity]: number; [FillPoolResult.PoolFilled]: number; }; + persistence: TaskPersistenceTypes; } interface ResultFrequency extends JsonObject { @@ -126,8 +122,10 @@ export function createTaskRunAggregator( > = taskPollingLifecycle.events.pipe( filter((taskEvent: TaskLifecycleEvent) => isTaskRunEvent(taskEvent) && hasTiming(taskEvent)), map((taskEvent: TaskLifecycleEvent) => { - const { task, result }: RanTask | ErroredTask = unwrap((taskEvent as TaskRun).event); - return taskRunEventToStat(task, taskEvent.timing!, result); + const { task, result, persistence }: RanTask | ErroredTask = unwrap( + (taskEvent as TaskRun).event + ); + return taskRunEventToStat(task, persistence, taskEvent.timing!, result); }) ); @@ -153,6 +151,9 @@ export function createTaskRunAggregator( const claimDurationQueue = createRunningAveragedStat(runningAverageWindowSize); const claimConflictsQueue = createRunningAveragedStat(runningAverageWindowSize); const claimMismatchesQueue = createRunningAveragedStat(runningAverageWindowSize); + const polledTasksByPersistenceQueue = createRunningAveragedStat( + runningAverageWindowSize + ); const taskPollingEvents$: Observable> = combineLatest([ // get latest polling stats taskPollingLifecycle.events.pipe( @@ -194,6 +195,22 @@ export function createTaskRunAggregator( ), map(() => new Date().toISOString()) ), + // get the average ratio of polled tasks by their persistency + taskPollingLifecycle.events.pipe( + filter( + (taskEvent: TaskLifecycleEvent) => isTaskClaimEvent(taskEvent) && isOk(taskEvent.event) + ), + map((taskClaimEvent) => { + const claimedTask = ((taskClaimEvent as TaskClaim).event as Ok).value; + return polledTasksByPersistenceQueue( + claimedTask.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring + ); + }), + // unlike the other streams that emit once TM polls, this will only emit when a task is actually + // claimed, so to make sure `combineLatest` doesn't stall until a task is actually emitted we seed + // the stream with an empty queue + startWith([]) + ), // get duration of task claim stage in polling taskPollingLifecycle.events.pipe( filter( @@ -204,16 +221,15 @@ export function createTaskRunAggregator( ), map((claimDurationEvent) => { const duration = ((claimDurationEvent as TaskManagerStat).event as Ok).value; - return { - claimDuration: duration ? claimDurationQueue(duration) : claimDurationQueue(), - }; + return duration ? claimDurationQueue(duration) : claimDurationQueue(); }) ), ]).pipe( - map(([{ polling }, pollingDelay, { claimDuration }]) => ({ + map(([{ polling }, pollingDelay, persistence, claimDuration]) => ({ polling: { last_polling_delay: pollingDelay, claim_duration: claimDuration, + persistence, ...polling, }, })) @@ -245,13 +261,14 @@ export function createTaskRunAggregator( claim_conflicts: [], claim_mismatches: [], result_frequency_percent_as_number: [], + persistence: [], }, }) ), ]).pipe( map( ([taskRun, load, polling]: [ - Pick, + Pick, Pick, Pick ]) => { @@ -285,12 +302,12 @@ function createTaskRunEventToStat(runningAverageWindowSize: number) { ); return ( task: ConcreteTaskInstance, + persistence: TaskPersistence, timing: TaskTiming, result: TaskRunResult ): Pick => { const drift = timing!.start - task.runAt.getTime(); const duration = timing!.stop - timing!.start; - const persistence = persistenceOf(task); return { drift: driftQueue(drift), drift_by_type: driftByTaskQueue(task.taskType, drift), @@ -318,11 +335,6 @@ const DEFAULT_POLLING_FREQUENCIES = { [FillPoolResult.RunningAtCapacity]: 0, [FillPoolResult.PoolFilled]: 0, }; -const DEFAULT_PERSISTENCE_FREQUENCIES = { - [TaskPersistence.Recurring]: 0, - [TaskPersistence.NonRecurring]: 0, - [TaskPersistence.Ephemeral]: 0, -}; export function summarizeTaskRunStat( { @@ -337,6 +349,7 @@ export function summarizeTaskRunStat( result_frequency_percent_as_number: pollingResultFrequency, claim_conflicts: claimConflicts, claim_mismatches: claimMismatches, + persistence: pollingPersistence, }, drift, // eslint-disable-next-line @typescript-eslint/naming-convention @@ -366,6 +379,11 @@ export function summarizeTaskRunStat( ...DEFAULT_POLLING_FREQUENCIES, ...calculateFrequency(pollingResultFrequency as FillPoolResult[]), }, + persistence: { + [TaskPersistence.Recurring]: 0, + [TaskPersistence.NonRecurring]: 0, + ...calculateFrequency(pollingPersistence as TaskPersistence[]), + }, }, drift: calculateRunningAverage(drift), drift_by_type: mapValues(drift_by_type, (typedDrift) => calculateRunningAverage(typedDrift)), @@ -376,7 +394,9 @@ export function summarizeTaskRunStat( calculateRunningAverage(typedDurations) ), persistence: { - ...DEFAULT_PERSISTENCE_FREQUENCIES, + [TaskPersistence.Recurring]: 0, + [TaskPersistence.NonRecurring]: 0, + [TaskPersistence.Ephemeral]: 0, ...calculateFrequency(persistence), }, result_frequency_percent_as_number: mapValues( diff --git a/x-pack/plugins/task_manager/server/plugin.test.ts b/x-pack/plugins/task_manager/server/plugin.test.ts index 0d9f285164f1..dff94259dbe6 100644 --- a/x-pack/plugins/task_manager/server/plugin.test.ts +++ b/x-pack/plugins/task_manager/server/plugin.test.ts @@ -38,12 +38,18 @@ describe('TaskManagerPlugin', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: false, + request_capacity: 10, + }, }); pluginInitializerContext.env.instanceUuid = ''; const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); - expect(() => taskManagerPlugin.setup(coreMock.createSetup())).toThrow( + expect(() => + taskManagerPlugin.setup(coreMock.createSetup(), { usageCollection: undefined }) + ).toThrow( new Error(`TaskManager is unable to start as Kibana has no valid UUID assigned to it.`) ); }); @@ -72,11 +78,17 @@ describe('TaskManagerPlugin', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, }); const taskManagerPlugin = new TaskManagerPlugin(pluginInitializerContext); - const setupApi = await taskManagerPlugin.setup(coreMock.createSetup()); + const setupApi = await taskManagerPlugin.setup(coreMock.createSetup(), { + usageCollection: undefined, + }); // we only start a poller if we have task types that we support and we track // phases (moving from Setup to Start) based on whether the poller is working diff --git a/x-pack/plugins/task_manager/server/plugin.ts b/x-pack/plugins/task_manager/server/plugin.ts index d3e251b751ef..3d3d180fc066 100644 --- a/x-pack/plugins/task_manager/server/plugin.ts +++ b/x-pack/plugins/task_manager/server/plugin.ts @@ -7,6 +7,7 @@ import { combineLatest, Observable, Subject } from 'rxjs'; import { map, distinctUntilChanged } from 'rxjs/operators'; +import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; import { PluginInitializerContext, Plugin, @@ -27,6 +28,9 @@ import { createManagedConfiguration } from './lib/create_managed_configuration'; import { TaskScheduling } from './task_scheduling'; import { healthRoute } from './routes'; import { createMonitoringStats, MonitoringStats } from './monitoring'; +import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle'; +import { EphemeralTask } from './task'; +import { registerTaskManagerUsageCollector } from './usage'; export type TaskManagerSetupContract = { /** @@ -38,15 +42,16 @@ export type TaskManagerSetupContract = { export type TaskManagerStartContract = Pick< TaskScheduling, - 'schedule' | 'runNow' | 'ensureScheduled' + 'schedule' | 'runNow' | 'ephemeralRunNow' | 'ensureScheduled' > & Pick & { removeIfExists: TaskStore['remove']; - }; + } & { supportsEphemeralTasks: () => boolean }; export class TaskManagerPlugin implements Plugin { private taskPollingLifecycle?: TaskPollingLifecycle; + private ephemeralTaskLifecycle?: EphemeralTaskLifecycle; private taskManagerId?: string; private config: TaskManagerConfig; private logger: Logger; @@ -62,7 +67,10 @@ export class TaskManagerPlugin this.definitions = new TaskTypeDictionary(this.logger); } - public setup(core: CoreSetup): TaskManagerSetupContract { + public setup( + core: CoreSetup, + plugins: { usageCollection?: UsageCollectionSetup } + ): TaskManagerSetupContract { this.elasticsearchAndSOAvailability$ = getElasticsearchAndSOAvailability(core.status.core$); setupSavedObjects(core.savedObjects, this.config); @@ -79,7 +87,7 @@ export class TaskManagerPlugin // Routes const router = core.http.createRouter(); - const serviceStatus$ = healthRoute( + const { serviceStatus$, monitoredHealth$ } = healthRoute( router, this.monitoringStats$, this.logger, @@ -95,6 +103,16 @@ export class TaskManagerPlugin ) ); + const usageCollection = plugins.usageCollection; + if (usageCollection) { + registerTaskManagerUsageCollector( + usageCollection, + monitoredHealth$, + this.config.ephemeral_tasks.enabled, + this.config.ephemeral_tasks.request_capacity + ); + } + return { index: this.config.index, addMiddleware: (middleware: Middleware) => { @@ -138,8 +156,19 @@ export class TaskManagerPlugin ...managedConfiguration, }); + this.ephemeralTaskLifecycle = new EphemeralTaskLifecycle({ + config: this.config!, + definitions: this.definitions, + logger: this.logger, + middleware: this.middleware, + elasticsearchAndSOAvailability$: this.elasticsearchAndSOAvailability$!, + pool: this.taskPollingLifecycle.pool, + lifecycleEvent: this.taskPollingLifecycle.events, + }); + createMonitoringStats( this.taskPollingLifecycle, + this.ephemeralTaskLifecycle, taskStore, this.elasticsearchAndSOAvailability$!, this.config!, @@ -152,7 +181,9 @@ export class TaskManagerPlugin taskStore, middleware: this.middleware, taskPollingLifecycle: this.taskPollingLifecycle, + ephemeralTaskLifecycle: this.ephemeralTaskLifecycle, definitions: this.definitions, + taskManagerId: taskStore.taskManagerId, }); return { @@ -163,6 +194,8 @@ export class TaskManagerPlugin schedule: (...args) => taskScheduling.schedule(...args), ensureScheduled: (...args) => taskScheduling.ensureScheduled(...args), runNow: (...args) => taskScheduling.runNow(...args), + ephemeralRunNow: (task: EphemeralTask) => taskScheduling.ephemeralRunNow(task), + supportsEphemeralTasks: () => this.config.ephemeral_tasks.enabled, }; } diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts index 73b892c9f59e..aad03951bbb9 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.test.ts @@ -58,6 +58,10 @@ describe('TaskPollingLifecycle', () => { }, custom: {}, }, + ephemeral_tasks: { + enabled: true, + request_capacity: 10, + }, }, taskStore: mockTaskStore, logger: taskManagerLogger, diff --git a/x-pack/plugins/task_manager/server/polling_lifecycle.ts b/x-pack/plugins/task_manager/server/polling_lifecycle.ts index 454e49fe3f86..16b15d0c46e3 100644 --- a/x-pack/plugins/task_manager/server/polling_lifecycle.ts +++ b/x-pack/plugins/task_manager/server/polling_lifecycle.ts @@ -25,6 +25,7 @@ import { asTaskPollingCycleEvent, TaskManagerStat, asTaskManagerStatEvent, + EphemeralTaskRejectedDueToCapacity, } from './task_events'; import { fillPool, FillPoolResult, TimedFillPoolResult } from './lib/fill_pool'; import { Middleware } from './lib/middleware'; @@ -60,7 +61,8 @@ export type TaskLifecycleEvent = | TaskClaim | TaskRunRequest | TaskPollingCycle - | TaskManagerStat; + | TaskManagerStat + | EphemeralTaskRejectedDueToCapacity; /** * The public interface into the task manager system. @@ -73,7 +75,7 @@ export class TaskPollingLifecycle { private bufferedStore: BufferedTaskStore; private logger: Logger; - private pool: TaskPool; + public pool: TaskPool; // all task related events (task claimed, task marked as running, etc.) are emitted through events$ private events$ = new Subject(); // all on-demand requests we wish to pipe into the poller @@ -160,7 +162,15 @@ export class TaskPollingLifecycle { pollInterval$: pollIntervalConfiguration$, pollIntervalDelay$, bufferCapacity: config.request_capacity, - getCapacity: () => this.pool.availableWorkers, + getCapacity: () => { + const capacity = this.pool.availableWorkers; + if (!capacity) { + // if there isn't capacity, emit a load event so that we can expose how often + // high load causes the poller to skip work (work isn'tcalled when there is no capacity) + this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad))); + } + return capacity; + }, pollRequests$: this.claimRequests$, work: this.pollForWork, // Time out the `work` phase if it takes longer than a certain number of polling cycles @@ -227,8 +237,8 @@ export class TaskPollingLifecycle { private pollForWork = async (...tasksToClaim: string[]): Promise => { return fillPool( // claim available tasks - () => - claimAvailableTasks( + () => { + return claimAvailableTasks( tasksToClaim.splice(0, this.pool.availableWorkers), this.taskClaiming, this.logger @@ -242,11 +252,18 @@ export class TaskPollingLifecycle { } }) ) - ), + ); + }, // wrap each task in a Task Runner this.createTaskRunnerForTask, // place tasks in the Task Pool - async (tasks: TaskRunner[]) => await this.pool.run(tasks) + async (tasks: TaskRunner[]) => { + const result = await this.pool.run(tasks); + // Emit the load after fetching tasks, giving us a good metric for evaluating how + // busy Task manager tends to be in this Kibana instance + this.emitEvent(asTaskManagerStatEvent('load', asOk(this.pool.workerLoad))); + return result; + } ); }; diff --git a/x-pack/plugins/task_manager/server/queries/task_claiming.ts b/x-pack/plugins/task_manager/server/queries/task_claiming.ts index 7f15707a14b3..20a0275d8fa0 100644 --- a/x-pack/plugins/task_manager/server/queries/task_claiming.ts +++ b/x-pack/plugins/task_manager/server/queries/task_claiming.ts @@ -11,7 +11,7 @@ import apm from 'elastic-apm-node'; import { Subject, Observable, from, of } from 'rxjs'; import { map, mergeScan } from 'rxjs/operators'; -import { difference, partition, groupBy, mapValues, countBy, pick } from 'lodash'; +import { difference, partition, groupBy, mapValues, countBy, pick, isPlainObject } from 'lodash'; import { some, none } from 'fp-ts/lib/Option'; import { Logger } from '../../../../../src/core/server'; @@ -87,6 +87,9 @@ export interface ClaimOwnershipResult { docs: ConcreteTaskInstance[]; timing?: TaskTiming; } +export const isClaimOwnershipResult = (result: unknown): result is ClaimOwnershipResult => + isPlainObject((result as ClaimOwnershipResult).stats) && + Array.isArray((result as ClaimOwnershipResult).docs); enum BatchConcurrency { Unlimited, diff --git a/x-pack/plugins/task_manager/server/routes/health.test.ts b/x-pack/plugins/task_manager/server/routes/health.test.ts index ece91ed571f8..fd7e37e0fe9a 100644 --- a/x-pack/plugins/task_manager/server/routes/health.test.ts +++ b/x-pack/plugins/task_manager/server/routes/health.test.ts @@ -23,6 +23,7 @@ import { import { ServiceStatusLevels } from 'src/core/server'; import { configSchema, TaskManagerConfig } from '../config'; import { calculateHealthStatusMock } from '../lib/calculate_health_status.mock'; +import { FillPoolResult } from '../lib/fill_pool'; jest.mock('../lib/log_health_metrics', () => ({ logHealthMetrics: jest.fn(), @@ -106,6 +107,7 @@ describe('healthRoute', () => { const warnRuntimeStat = mockHealthStats(); const warnConfigurationStat = mockHealthStats(); const warnWorkloadStat = mockHealthStats(); + const warnEphemeralStat = mockHealthStats(); const stats$ = new Subject(); @@ -130,8 +132,10 @@ describe('healthRoute', () => { stats$.next(warnConfigurationStat); await sleep(1001); stats$.next(warnWorkloadStat); + await sleep(1001); + stats$.next(warnEphemeralStat); - expect(logHealthMetrics).toBeCalledTimes(3); + expect(logHealthMetrics).toBeCalledTimes(4); expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({ id, timestamp: expect.any(String), @@ -156,6 +160,14 @@ describe('healthRoute', () => { summarizeMonitoringStats(warnWorkloadStat, getTaskManagerConfig({})) ), }); + expect(logHealthMetrics.mock.calls[2][0]).toMatchObject({ + id, + timestamp: expect.any(String), + status: expect.any(String), + ...ignoreCapacityEstimation( + summarizeMonitoringStats(warnEphemeralStat, getTaskManagerConfig({})) + ), + }); }); it(`logs at an error level if the status is error`, async () => { @@ -168,6 +180,7 @@ describe('healthRoute', () => { const errorRuntimeStat = mockHealthStats(); const errorConfigurationStat = mockHealthStats(); const errorWorkloadStat = mockHealthStats(); + const errorEphemeralStat = mockHealthStats(); const stats$ = new Subject(); @@ -192,8 +205,10 @@ describe('healthRoute', () => { stats$.next(errorConfigurationStat); await sleep(1001); stats$.next(errorWorkloadStat); + await sleep(1001); + stats$.next(errorEphemeralStat); - expect(logHealthMetrics).toBeCalledTimes(3); + expect(logHealthMetrics).toBeCalledTimes(4); expect(logHealthMetrics.mock.calls[0][0]).toMatchObject({ id, timestamp: expect.any(String), @@ -218,6 +233,14 @@ describe('healthRoute', () => { summarizeMonitoringStats(errorWorkloadStat, getTaskManagerConfig({})) ), }); + expect(logHealthMetrics.mock.calls[2][0]).toMatchObject({ + id, + timestamp: expect.any(String), + status: expect.any(String), + ...ignoreCapacityEstimation( + summarizeMonitoringStats(errorEphemeralStat, getTaskManagerConfig({})) + ), + }); }); it('returns a error status if the overall stats have not been updated within the required hot freshness', async () => { @@ -225,7 +248,7 @@ describe('healthRoute', () => { const stats$ = new Subject(); - const serviceStatus$ = healthRoute( + const { serviceStatus$ } = healthRoute( router, stats$, loggingSystemMock.create().get(), @@ -264,6 +287,9 @@ describe('healthRoute', () => { workload: { timestamp: expect.any(String), }, + ephemeral: { + timestamp: expect.any(String), + }, runtime: { timestamp: expect.any(String), value: { @@ -335,6 +361,9 @@ describe('healthRoute', () => { workload: { timestamp: expect.any(String), }, + ephemeral: { + timestamp: expect.any(String), + }, runtime: { timestamp: expect.any(String), value: { @@ -403,6 +432,9 @@ describe('healthRoute', () => { workload: { timestamp: expect.any(String), }, + ephemeral: { + timestamp: expect.any(String), + }, runtime: { timestamp: expect.any(String), value: { @@ -488,14 +520,25 @@ function mockHealthStats(overrides = {}) { duration: [500, 400, 3000], claim_conflicts: [0, 100, 75], claim_mismatches: [0, 100, 75], + claim_duration: [0, 100, 75], result_frequency_percent_as_number: [ - 'NoTasksClaimed', - 'NoTasksClaimed', - 'NoTasksClaimed', + FillPoolResult.NoTasksClaimed, + FillPoolResult.NoTasksClaimed, + FillPoolResult.NoTasksClaimed, ], + persistence: [], }, }, }, + ephemeral: { + timestamp: new Date().toISOString(), + value: { + load: [], + executionsPerCycle: [], + queuedTasks: [], + delay: [], + }, + }, }, }; return (merge(stub, overrides) as unknown) as MonitoringStats; diff --git a/x-pack/plugins/task_manager/server/routes/health.ts b/x-pack/plugins/task_manager/server/routes/health.ts index b5d8a23ba555..fe58ee3490af 100644 --- a/x-pack/plugins/task_manager/server/routes/health.ts +++ b/x-pack/plugins/task_manager/server/routes/health.ts @@ -53,7 +53,10 @@ export function healthRoute( logger: Logger, taskManagerId: string, config: TaskManagerConfig -): Observable { +): { + serviceStatus$: Observable; + monitoredHealth$: Observable; +} { // if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default) // consider the system unhealthy const requiredHotStatsFreshness: number = config.monitored_stats_required_freshness; @@ -67,6 +70,7 @@ export function healthRoute( } const serviceStatus$: Subject = new Subject(); + const monitoredHealth$: Subject = new Subject(); /* keep track of last health summary, as we'll return that to the next call to _health */ let lastMonitoredStats: MonitoringStats | null = null; @@ -84,6 +88,7 @@ export function healthRoute( ) .subscribe(([monitoredHealth, serviceStatus]) => { serviceStatus$.next(serviceStatus); + monitoredHealth$.next(monitoredHealth); logHealthMetrics(monitoredHealth, logger, config); }); @@ -104,7 +109,7 @@ export function healthRoute( }); } ); - return serviceStatus$; + return { serviceStatus$, monitoredHealth$ }; } export function withServiceStatus( diff --git a/x-pack/plugins/task_manager/server/task.ts b/x-pack/plugins/task_manager/server/task.ts index 8f515e1951ef..2452e3e6f492 100644 --- a/x-pack/plugins/task_manager/server/task.ts +++ b/x-pack/plugins/task_manager/server/task.ts @@ -363,6 +363,13 @@ export interface ConcreteTaskInstance extends TaskInstance { ownerId: string | null; } +/** + * A task instance that has an id and is ready for storage. + */ +export type EphemeralTask = Pick; +export type EphemeralTaskInstance = EphemeralTask & + Pick; + export type SerializedConcreteTaskInstance = Omit< ConcreteTaskInstance, 'state' | 'params' | 'scheduledAt' | 'startedAt' | 'retryAt' | 'runAt' diff --git a/x-pack/plugins/task_manager/server/task_events.ts b/x-pack/plugins/task_manager/server/task_events.ts index aecf7c9a2b7e..7c7845569a10 100644 --- a/x-pack/plugins/task_manager/server/task_events.ts +++ b/x-pack/plugins/task_manager/server/task_events.ts @@ -13,6 +13,13 @@ import { Result, Err } from './lib/result_type'; import { ClaimAndFillPoolResult } from './lib/fill_pool'; import { PollingError } from './polling'; import { TaskRunResult } from './task_running'; +import { EphemeralTaskInstanceRequest } from './ephemeral_task_lifecycle'; + +export enum TaskPersistence { + Recurring = 'recurring', + NonRecurring = 'non_recurring', + Ephemeral = 'ephemeral', +} export enum TaskEventType { TASK_CLAIM = 'TASK_CLAIM', @@ -21,6 +28,7 @@ export enum TaskEventType { TASK_RUN_REQUEST = 'TASK_RUN_REQUEST', TASK_POLLING_CYCLE = 'TASK_POLLING_CYCLE', TASK_MANAGER_STAT = 'TASK_MANAGER_STAT', + EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY = 'EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY', } export enum TaskClaimErrorType { @@ -48,6 +56,7 @@ export interface TaskEvent { } export interface RanTask { task: ConcreteTaskInstance; + persistence: TaskPersistence; result: TaskRunResult; } export type ErroredTask = RanTask & { @@ -62,9 +71,15 @@ export type TaskMarkRunning = TaskEvent; export type TaskRun = TaskEvent; export type TaskClaim = TaskEvent; export type TaskRunRequest = TaskEvent; +export type EphemeralTaskRejectedDueToCapacity = TaskEvent; export type TaskPollingCycle = TaskEvent>; -export type TaskManagerStats = 'load' | 'pollingDelay' | 'claimDuration'; +export type TaskManagerStats = + | 'load' + | 'pollingDelay' + | 'claimDuration' + | 'queuedEphemeralTasks' + | 'ephemeralTaskDelay'; export type TaskManagerStat = TaskEvent; export type OkResultOf = EventType extends TaskEvent @@ -149,6 +164,19 @@ export function asTaskManagerStatEvent( }; } +export function asEphemeralTaskRejectedDueToCapacityEvent( + id: string, + event: Result, + timing?: TaskTiming +): EphemeralTaskRejectedDueToCapacity { + return { + id, + type: TaskEventType.EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY, + event, + timing, + }; +} + export function isTaskMarkRunningEvent( taskEvent: TaskEvent ): taskEvent is TaskMarkRunning { @@ -175,3 +203,8 @@ export function isTaskManagerStatEvent( ): taskEvent is TaskManagerStat { return taskEvent.type === TaskEventType.TASK_MANAGER_STAT; } +export function isEphemeralTaskRejectedDueToCapacityEvent( + taskEvent: TaskEvent +): taskEvent is EphemeralTaskRejectedDueToCapacity { + return taskEvent.type === TaskEventType.EPHEMERAL_TASK_DELAYED_DUE_TO_CAPACITY; +} diff --git a/x-pack/plugins/task_manager/server/task_pool.mock.ts b/x-pack/plugins/task_manager/server/task_pool.mock.ts new file mode 100644 index 000000000000..de82d5872d5d --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_pool.mock.ts @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { TaskPool } from './task_pool'; + +const defaultGetCapacityOverride: () => Partial<{ + load: number; + occupiedWorkers: number; + workerLoad: number; + max: number; + availableWorkers: number; +}> = () => ({ + load: 0, + occupiedWorkers: 0, + workerLoad: 0, + max: 10, + availableWorkers: 10, +}); + +const createTaskPoolMock = (getCapacityOverride = defaultGetCapacityOverride) => { + return ({ + get load() { + return getCapacityOverride().load ?? 0; + }, + get occupiedWorkers() { + return getCapacityOverride().occupiedWorkers ?? 0; + }, + get workerLoad() { + return getCapacityOverride().workerLoad ?? 0; + }, + get max() { + return getCapacityOverride().max ?? 10; + }, + get availableWorkers() { + return getCapacityOverride().availableWorkers ?? 10; + }, + getOccupiedWorkersByType: jest.fn(), + run: jest.fn(), + cancelRunningTasks: jest.fn(), + } as unknown) as jest.Mocked; +}; + +export const TaskPoolMock = { + create: createTaskPoolMock, +}; diff --git a/x-pack/plugins/task_manager/server/task_pool.ts b/x-pack/plugins/task_manager/server/task_pool.ts index 14c0c4581a15..d394214e6c77 100644 --- a/x-pack/plugins/task_manager/server/task_pool.ts +++ b/x-pack/plugins/task_manager/server/task_pool.ts @@ -16,8 +16,7 @@ import { padStart } from 'lodash'; import { Logger } from '../../../../src/core/server'; import { TaskRunner } from './task_running'; import { isTaskSavedObjectNotFoundError } from './lib/is_task_not_found_error'; -import { TaskManagerStat, asTaskManagerStatEvent } from './task_events'; -import { asOk } from './lib/result_type'; +import { TaskManagerStat } from './task_events'; interface Opts { maxWorkers$: Observable; @@ -84,10 +83,6 @@ export class TaskPool { * Gets how many workers are currently available. */ public get availableWorkers() { - // emit load whenever we check how many available workers there are - // this should happen less often than the actual changes to the worker queue - // so is lighter than emitting the load every time we add/remove a task from the queue - this.load$.next(asTaskManagerStatEvent('load', asOk(this.workerLoad))); // cancel expired task whenever a call is made to check for capacity // this ensures that we don't end up with a queue of hung tasks causing both // the poller and the pool from hanging due to lack of capacity @@ -174,7 +169,9 @@ export class TaskPool { this.logger.warn(errorLogLine); } }) - .then(() => this.tasksInPool.delete(taskRunner.id)); + .then(() => { + this.tasksInPool.delete(taskRunner.id); + }); } private handleFailureOfMarkAsRunning(task: TaskRunner, err: Error) { diff --git a/x-pack/plugins/task_manager/server/task_running/ephemeral_task_runner.ts b/x-pack/plugins/task_manager/server/task_running/ephemeral_task_runner.ts new file mode 100644 index 000000000000..bc1ff0541fdf --- /dev/null +++ b/x-pack/plugins/task_manager/server/task_running/ephemeral_task_runner.ts @@ -0,0 +1,337 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/* + * This module contains the core logic for running an individual task. + * It handles the full lifecycle of a task run, including error handling, + * rescheduling, middleware application, etc. + */ + +import apm from 'elastic-apm-node'; +import { withSpan } from '@kbn/apm-utils'; +import { identity } from 'lodash'; +import { Logger } from '../../../../../src/core/server'; + +import { Middleware } from '../lib/middleware'; +import { asOk, asErr, eitherAsync, Result } from '../lib/result_type'; +import { + TaskRun, + TaskMarkRunning, + asTaskRunEvent, + asTaskMarkRunningEvent, + startTaskTimer, + TaskTiming, + TaskPersistence, +} from '../task_events'; +import { intervalFromDate } from '../lib/intervals'; +import { + CancellableTask, + ConcreteTaskInstance, + isFailedRunResult, + SuccessfulRunResult, + FailedRunResult, + TaskStatus, + EphemeralTaskInstance, +} from '../task'; +import { TaskTypeDictionary } from '../task_type_dictionary'; +import { + asPending, + asReadyToRun, + EMPTY_RUN_RESULT, + isPending, + isReadyToRun, + TaskRunner, + TaskRunningInstance, + TaskRunResult, +} from './task_runner'; + +type Opts = { + logger: Logger; + definitions: TaskTypeDictionary; + instance: EphemeralTaskInstance; + onTaskEvent?: (event: TaskRun | TaskMarkRunning) => void; +} & Pick; + +// ephemeral tasks cannot be rescheduled or scheduled to run again in the future +type EphemeralSuccessfulRunResult = Omit; +type EphemeralFailedRunResult = Omit; + +/** + * + * @export + * @class EphemeralTaskManagerRunner + * @implements {TaskRunner} + */ +export class EphemeralTaskManagerRunner implements TaskRunner { + private task?: CancellableTask; + private instance: TaskRunningInstance; + private definitions: TaskTypeDictionary; + private logger: Logger; + private beforeRun: Middleware['beforeRun']; + private beforeMarkRunning: Middleware['beforeMarkRunning']; + private onTaskEvent: (event: TaskRun | TaskMarkRunning) => void; + + /** + * Creates an instance of EphemeralTaskManagerRunner. + * @param {Opts} opts + * @prop {Logger} logger - The task manager logger + * @prop {TaskDefinition} definition - The definition of the task being run + * @prop {EphemeralTaskInstance} instance - The record describing this particular task instance + * @prop {BeforeRunFunction} beforeRun - A function that adjusts the run context prior to running the task + * @memberof TaskManagerRunner + */ + constructor({ + instance, + definitions, + logger, + beforeRun, + beforeMarkRunning, + onTaskEvent = identity, + }: Opts) { + this.instance = asPending(asConcreteInstance(sanitizeInstance(instance))); + this.definitions = definitions; + this.logger = logger; + this.beforeRun = beforeRun; + this.beforeMarkRunning = beforeMarkRunning; + this.onTaskEvent = onTaskEvent; + } + + /** + * Gets the id of this task instance. + */ + public get id() { + return this.instance.task.id; + } + + /** + * Gets the task type of this task instance. + */ + public get taskType() { + return this.instance.task.taskType; + } + + /** + * Get the stage this TaskRunner is at + */ + public get stage() { + return this.instance.stage; + } + + /** + * Gets the task defintion from the dictionary. + */ + public get definition() { + return this.definitions.get(this.taskType); + } + + /** + * Gets the time at which this task will expire. + */ + public get expiration() { + return intervalFromDate( + // if the task is running, use it's started at, otherwise use the timestamp at + // which it was last updated + // this allows us to catch tasks that remain in Pending/Finalizing without being + // cleaned up + isReadyToRun(this.instance) ? this.instance.task.startedAt : this.instance.timestamp, + this.definition.timeout + )!; + } + + /** + * Gets the duration of the current task run + */ + public get startedAt() { + return this.instance.task.startedAt; + } + + /** + * Gets whether or not this task has run longer than its expiration setting allows. + */ + public get isExpired() { + return this.expiration < new Date(); + } + + public get isEphemeral() { + return true; + } + + /** + * Returns a log-friendly representation of this task. + */ + public toString() { + return `${this.taskType} "${this.id}" (Ephemeral)`; + } + + /** + * Runs the task, handling the task result, errors, etc, rescheduling if need + * be. NOTE: the time of applying the middleware's beforeRun is incorporated + * into the total timeout time the task in configured with. We may decide to + * start the timer after beforeRun resolves + * + * @returns {Promise>} + */ + public async run(): Promise> { + if (!isReadyToRun(this.instance)) { + throw new Error( + `Running ephemeral task ${this} failed as it ${ + isPending(this.instance) ? `isn't ready to be ran` : `has already been ran` + }` + ); + } + this.logger.debug(`Running ephemeral task ${this}`); + const apmTrans = apm.startTransaction(this.taskType, 'taskManager ephemeral run', { + childOf: this.instance.task.traceparent, + }); + const modifiedContext = await this.beforeRun({ + taskInstance: asConcreteInstance(this.instance.task), + }); + const stopTaskTimer = startTaskTimer(); + try { + this.task = this.definition.createTaskRunner(modifiedContext); + const result = await withSpan({ name: 'ephemeral run', type: 'task manager' }, () => + this.task!.run() + ); + const validatedResult = this.validateResult(result); + const processedResult = await withSpan( + { name: 'process ephemeral result', type: 'task manager' }, + () => this.processResult(validatedResult, stopTaskTimer()) + ); + if (apmTrans) apmTrans.end('success'); + return processedResult; + } catch (err) { + this.logger.error(`Task ${this} failed: ${err}`); + // in error scenario, we can not get the RunResult + const processedResult = await withSpan( + { name: 'process ephemeral result', type: 'task manager' }, + () => + this.processResult( + asErr({ error: err, state: modifiedContext.taskInstance.state }), + stopTaskTimer() + ) + ); + if (apmTrans) apmTrans.end('failure'); + return processedResult; + } + } + + /** + * Noop for Ephemeral tasks + * + * @returns {Promise} + */ + public async markTaskAsRunning(): Promise { + if (!isPending(this.instance)) { + throw new Error( + `Marking ephemeral task ${this} as running has failed as it ${ + isReadyToRun(this.instance) ? `is already running` : `has already been ran` + }` + ); + } + + const apmTrans = apm.startTransaction('taskManager', 'taskManager markTaskAsRunning'); + + const now = new Date(); + try { + const { taskInstance } = await this.beforeMarkRunning({ + taskInstance: asConcreteInstance(this.instance.task), + }); + + this.instance = asReadyToRun({ + ...taskInstance, + status: TaskStatus.Running, + startedAt: now, + attempts: taskInstance.attempts + 1, + retryAt: null, + }); + + if (apmTrans) apmTrans.end('success'); + this.onTaskEvent(asTaskMarkRunningEvent(this.id, asOk(this.instance.task))); + return true; + } catch (error) { + if (apmTrans) apmTrans.end('failure'); + this.onTaskEvent(asTaskMarkRunningEvent(this.id, asErr(error))); + } + return false; + } + + /** + * Attempts to cancel the task. + * + * @returns {Promise} + */ + public async cancel() { + const { task } = this; + if (task?.cancel) { + this.task = undefined; + return task.cancel(); + } + + this.logger.debug(`The ephemral task ${this} is not cancellable.`); + } + + private validateResult( + result?: SuccessfulRunResult | FailedRunResult | void + ): Result { + return isFailedRunResult(result) + ? asErr({ ...result, error: result.error }) + : asOk(result || EMPTY_RUN_RESULT); + } + + private async processResult( + result: Result, + taskTiming: TaskTiming + ): Promise> { + await eitherAsync( + result, + async ({ state }: EphemeralSuccessfulRunResult) => { + this.onTaskEvent( + asTaskRunEvent( + this.id, + asOk({ + task: { ...this.instance.task, state }, + persistence: TaskPersistence.Ephemeral, + result: TaskRunResult.Success, + }), + taskTiming + ) + ); + }, + async ({ error, state }: EphemeralFailedRunResult) => { + this.onTaskEvent( + asTaskRunEvent( + this.id, + asErr({ + task: { ...this.instance.task, state }, + persistence: TaskPersistence.Ephemeral, + result: TaskRunResult.Failed, + error, + }), + taskTiming + ) + ); + } + ); + return result; + } +} + +function sanitizeInstance(instance: EphemeralTaskInstance): EphemeralTaskInstance { + return { + ...instance, + params: instance.params || {}, + state: instance.state || {}, + }; +} + +function asConcreteInstance(instance: EphemeralTaskInstance): ConcreteTaskInstance { + return { + ...instance, + attempts: 0, + retryAt: null, + }; +} diff --git a/x-pack/plugins/task_manager/server/task_running/errors.ts b/x-pack/plugins/task_manager/server/task_running/errors.ts index 8b01a5fb266c..43466fae0e2e 100644 --- a/x-pack/plugins/task_manager/server/task_running/errors.ts +++ b/x-pack/plugins/task_manager/server/task_running/errors.ts @@ -4,6 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ +import { EphemeralTask } from '../task'; // Unrecoverable const CODE_UNRECOVERABLE = 'TaskManager/unrecoverable'; @@ -14,6 +15,19 @@ export interface DecoratedError extends Error { [code]?: string; } +export class EphemeralTaskRejectedDueToCapacityError extends Error { + private _task: EphemeralTask; + + constructor(message: string, task: EphemeralTask) { + super(message); + this._task = task; + } + + public get task() { + return this._task; + } +} + function isTaskManagerError(error: unknown): error is DecoratedError { return Boolean(error && (error as DecoratedError)[code]); } @@ -26,3 +40,9 @@ export function throwUnrecoverableError(error: Error) { (error as DecoratedError)[code] = CODE_UNRECOVERABLE; throw error; } + +export function isEphemeralTaskRejectedDueToCapacityError( + error: Error | EphemeralTaskRejectedDueToCapacityError +) { + return Boolean(error && error instanceof EphemeralTaskRejectedDueToCapacityError); +} diff --git a/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts index d5a86b532b0a..e54962c7c885 100644 --- a/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts @@ -10,7 +10,13 @@ import sinon from 'sinon'; import { secondsFromNow } from '../lib/intervals'; import { asOk, asErr } from '../lib/result_type'; import { TaskManagerRunner, TaskRunningStage, TaskRunResult } from '../task_running'; -import { TaskEvent, asTaskRunEvent, asTaskMarkRunningEvent, TaskRun } from '../task_events'; +import { + TaskEvent, + asTaskRunEvent, + asTaskMarkRunningEvent, + TaskRun, + TaskPersistence, +} from '../task_events'; import { ConcreteTaskInstance, TaskStatus } from '../task'; import { SavedObjectsErrorHelpers } from '../../../../../src/core/server'; import moment from 'moment'; @@ -854,7 +860,12 @@ describe('TaskManagerRunner', () => { const onTaskEvent = jest.fn(); const { runner, store, instance: originalInstance } = await readyToRunStageSetup({ onTaskEvent, - instance: { id, status: TaskStatus.Running, startedAt: new Date() }, + instance: { + id, + schedule: { interval: '20m' }, + status: TaskStatus.Running, + startedAt: new Date(), + }, definitions: { bar: { title: 'Bar!', @@ -878,6 +889,7 @@ describe('TaskManagerRunner', () => { id, asErr({ error, + persistence: TaskPersistence.Recurring, task: originalInstance, result: TaskRunResult.Failed, }) @@ -1209,7 +1221,16 @@ describe('TaskManagerRunner', () => { await runner.run(); expect(onTaskEvent).toHaveBeenCalledWith( - withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success }))) + withAnyTiming( + asTaskRunEvent( + id, + asOk({ + task: instance, + persistence: TaskPersistence.NonRecurring, + result: TaskRunResult.Success, + }) + ) + ) ); }); @@ -1238,7 +1259,16 @@ describe('TaskManagerRunner', () => { await runner.run(); expect(onTaskEvent).toHaveBeenCalledWith( - withAnyTiming(asTaskRunEvent(id, asOk({ task: instance, result: TaskRunResult.Success }))) + withAnyTiming( + asTaskRunEvent( + id, + asOk({ + task: instance, + persistence: TaskPersistence.Recurring, + result: TaskRunResult.Success, + }) + ) + ) ); }); @@ -1268,7 +1298,12 @@ describe('TaskManagerRunner', () => { withAnyTiming( asTaskRunEvent( id, - asErr({ error, task: instance, result: TaskRunResult.RetryScheduled }) + asErr({ + error, + task: instance, + persistence: TaskPersistence.NonRecurring, + result: TaskRunResult.RetryScheduled, + }) ) ) ); @@ -1304,7 +1339,12 @@ describe('TaskManagerRunner', () => { withAnyTiming( asTaskRunEvent( id, - asErr({ error, task: instance, result: TaskRunResult.RetryScheduled }) + asErr({ + error, + task: instance, + persistence: TaskPersistence.Recurring, + result: TaskRunResult.RetryScheduled, + }) ) ) ); @@ -1346,6 +1386,7 @@ describe('TaskManagerRunner', () => { asErr({ error, task: originalInstance, + persistence: TaskPersistence.NonRecurring, result: TaskRunResult.Failed, }) ) diff --git a/x-pack/plugins/task_manager/server/task_running/task_runner.ts b/x-pack/plugins/task_manager/server/task_running/task_runner.ts index fc88a6632917..97b40a75a59c 100644 --- a/x-pack/plugins/task_manager/server/task_running/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_running/task_runner.ts @@ -36,6 +36,7 @@ import { asTaskMarkRunningEvent, startTaskTimer, TaskTiming, + TaskPersistence, } from '../task_events'; import { intervalFromDate, maxIntervalFromDate } from '../lib/intervals'; import { @@ -53,7 +54,7 @@ import { TaskTypeDictionary } from '../task_type_dictionary'; import { isUnrecoverableError } from './errors'; const defaultBackoffPerFailure = 5 * 60 * 1000; -const EMPTY_RUN_RESULT: SuccessfulRunResult = { state: {} }; +export const EMPTY_RUN_RESULT: SuccessfulRunResult = { state: {} }; export interface TaskRunner { isExpired: boolean; @@ -65,6 +66,7 @@ export interface TaskRunner { run: () => Promise>; id: string; stage: string; + isEphemeral?: boolean; toString: () => string; } @@ -105,14 +107,17 @@ export enum TaskRunResult { } // A ConcreteTaskInstance which we *know* has a `startedAt` Date on it -type ConcreteTaskInstanceWithStartedAt = ConcreteTaskInstance & { startedAt: Date }; +export type ConcreteTaskInstanceWithStartedAt = ConcreteTaskInstance & { startedAt: Date }; // The three possible stages for a Task Runner - Pending -> ReadyToRun -> Ran -type PendingTask = TaskRunning; -type ReadyToRunTask = TaskRunning; -type RanTask = TaskRunning; +export type PendingTask = TaskRunning; +export type ReadyToRunTask = TaskRunning< + TaskRunningStage.READY_TO_RUN, + ConcreteTaskInstanceWithStartedAt +>; +export type RanTask = TaskRunning; -type TaskRunningInstance = PendingTask | ReadyToRunTask | RanTask; +export type TaskRunningInstance = PendingTask | ReadyToRunTask | RanTask; /** * Runs a background task, ensures that errors are properly handled, @@ -528,6 +533,10 @@ export class TaskManagerRunner implements TaskRunner { this.id, asOk({ task, + persistence: + schedule || task.schedule + ? TaskPersistence.Recurring + : TaskPersistence.NonRecurring, result: await (runAt || schedule || task.schedule ? this.processResultForRecurringTask(result) : this.processResultWhenDone()), @@ -540,7 +549,12 @@ export class TaskManagerRunner implements TaskRunner { this.onTaskEvent( asTaskRunEvent( this.id, - asErr({ task, result: await this.processResultForRecurringTask(result), error }), + asErr({ + task, + persistence: task.schedule ? TaskPersistence.Recurring : TaskPersistence.NonRecurring, + result: await this.processResultForRecurringTask(result), + error, + }), taskTiming ) ); @@ -602,20 +616,20 @@ function performanceStopMarkingTaskAsRunning() { // in a specific place in the code might be type InstanceOf = T extends TaskRunning ? I : never; -function isPending(taskRunning: TaskRunningInstance): taskRunning is PendingTask { +export function isPending(taskRunning: TaskRunningInstance): taskRunning is PendingTask { return taskRunning.stage === TaskRunningStage.PENDING; } -function asPending(task: InstanceOf): PendingTask { +export function asPending(task: InstanceOf): PendingTask { return { timestamp: new Date(), stage: TaskRunningStage.PENDING, task, }; } -function isReadyToRun(taskRunning: TaskRunningInstance): taskRunning is ReadyToRunTask { +export function isReadyToRun(taskRunning: TaskRunningInstance): taskRunning is ReadyToRunTask { return taskRunning.stage === TaskRunningStage.READY_TO_RUN; } -function asReadyToRun( +export function asReadyToRun( task: InstanceOf ): ReadyToRunTask { return { @@ -624,7 +638,7 @@ function asReadyToRun( task, }; } -function asRan(task: InstanceOf): RanTask { +export function asRan(task: InstanceOf): RanTask { return { timestamp: new Date(), stage: TaskRunningStage.RAN, diff --git a/x-pack/plugins/task_manager/server/task_scheduling.mock.ts b/x-pack/plugins/task_manager/server/task_scheduling.mock.ts index 02b58eafa5fe..60742e83664b 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.mock.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.mock.ts @@ -12,6 +12,7 @@ const createTaskSchedulingMock = () => { ensureScheduled: jest.fn(), schedule: jest.fn(), runNow: jest.fn(), + ephemeralRunNow: jest.fn(), } as unknown) as jest.Mocked; }; diff --git a/x-pack/plugins/task_manager/server/task_scheduling.test.ts b/x-pack/plugins/task_manager/server/task_scheduling.test.ts index 3445bd18de10..41a172bfb2f8 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.test.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.test.ts @@ -15,6 +15,7 @@ import { asTaskClaimEvent, asTaskRunRequestEvent, TaskClaimErrorType, + TaskPersistence, } from './task_events'; import { TaskLifecycleEvent } from './polling_lifecycle'; import { taskPollingLifecycleMock } from './polling_lifecycle.mock'; @@ -26,6 +27,11 @@ import { taskStoreMock } from './task_store.mock'; import { TaskRunResult } from './task_running'; import { mockLogger } from './test_utils'; import { TaskTypeDictionary } from './task_type_dictionary'; +import { ephemeralTaskLifecycleMock } from './ephemeral_task_lifecycle.mock'; + +jest.mock('uuid', () => ({ + v4: () => 'v4uuid', +})); jest.mock('elastic-apm-node', () => ({ currentTraceparent: 'parent', @@ -41,6 +47,8 @@ describe('TaskScheduling', () => { logger: mockLogger(), middleware: createInitialMiddleware(), definitions, + ephemeralTaskLifecycle: ephemeralTaskLifecycleMock.create({}), + taskManagerId: '', }; definitions.registerTaskDefinitions({ @@ -137,7 +145,12 @@ describe('TaskScheduling', () => { const result = taskScheduling.runNow(id); const task = mockTask({ id }); - events$.next(asTaskRunEvent(id, asOk({ task, result: TaskRunResult.Success }))); + events$.next( + asTaskRunEvent( + id, + asOk({ task, result: TaskRunResult.Success, persistence: TaskPersistence.Recurring }) + ) + ); return expect(result).resolves.toEqual({ id }); }); @@ -163,6 +176,7 @@ describe('TaskScheduling', () => { task, error: new Error('some thing gone wrong'), result: TaskRunResult.Failed, + persistence: TaskPersistence.Recurring, }) ) ); @@ -393,7 +407,14 @@ describe('TaskScheduling', () => { events$.next(asTaskClaimEvent(id, asOk(task))); events$.next(asTaskClaimEvent(differentTask, asOk(otherTask))); events$.next( - asTaskRunEvent(differentTask, asOk({ task: otherTask, result: TaskRunResult.Success })) + asTaskRunEvent( + differentTask, + asOk({ + task: otherTask, + result: TaskRunResult.Success, + persistence: TaskPersistence.Recurring, + }) + ) ); events$.next( @@ -403,6 +424,7 @@ describe('TaskScheduling', () => { task, error: new Error('some thing gone wrong'), result: TaskRunResult.Failed, + persistence: TaskPersistence.Recurring, }) ) ); @@ -411,6 +433,97 @@ describe('TaskScheduling', () => { `[Error: Failed to run task "01ddff11-e88a-4d13-bc4e-256164e755e2": Error: some thing gone wrong]` ); }); + + test('runs a task ephemerally', async () => { + const ephemeralEvents$ = new Subject(); + const ephemeralTask = mockTask({ + state: { + foo: 'bar', + }, + }); + const customEphemeralTaskLifecycleMock = ephemeralTaskLifecycleMock.create({ + events$: ephemeralEvents$, + }); + + customEphemeralTaskLifecycleMock.attemptToRun.mockImplementation((value) => { + return { + tag: 'ok', + value, + }; + }); + + const middleware = createInitialMiddleware(); + middleware.beforeSave = jest.fn().mockImplementation(async () => { + return { taskInstance: ephemeralTask }; + }); + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + middleware, + ephemeralTaskLifecycle: customEphemeralTaskLifecycleMock, + }); + + const result = taskScheduling.ephemeralRunNow(ephemeralTask); + ephemeralEvents$.next( + asTaskRunEvent( + 'v4uuid', + asOk({ + task: { + ...ephemeralTask, + id: 'v4uuid', + }, + result: TaskRunResult.Success, + persistence: TaskPersistence.Ephemeral, + }) + ) + ); + + expect(result).resolves.toEqual({ id: 'v4uuid', state: { foo: 'bar' } }); + }); + + test('rejects ephemeral task if lifecycle returns an error', async () => { + const ephemeralEvents$ = new Subject(); + const ephemeralTask = mockTask({ + state: { + foo: 'bar', + }, + }); + const customEphemeralTaskLifecycleMock = ephemeralTaskLifecycleMock.create({ + events$: ephemeralEvents$, + }); + + customEphemeralTaskLifecycleMock.attemptToRun.mockImplementation((value) => { + return asErr(value); + }); + + const middleware = createInitialMiddleware(); + middleware.beforeSave = jest.fn().mockImplementation(async () => { + return { taskInstance: ephemeralTask }; + }); + const taskScheduling = new TaskScheduling({ + ...taskSchedulingOpts, + middleware, + ephemeralTaskLifecycle: customEphemeralTaskLifecycleMock, + }); + + const result = taskScheduling.ephemeralRunNow(ephemeralTask); + ephemeralEvents$.next( + asTaskRunEvent( + 'v4uuid', + asOk({ + task: { + ...ephemeralTask, + id: 'v4uuid', + }, + result: TaskRunResult.Failed, + persistence: TaskPersistence.Ephemeral, + }) + ) + ); + + expect(result).rejects.toMatchInlineSnapshot( + `[Error: Ephemeral Task of type foo was rejected]` + ); + }); }); }); diff --git a/x-pack/plugins/task_manager/server/task_scheduling.ts b/x-pack/plugins/task_manager/server/task_scheduling.ts index 153c16f5c4bf..88176b25680c 100644 --- a/x-pack/plugins/task_manager/server/task_scheduling.ts +++ b/x-pack/plugins/task_manager/server/task_scheduling.ts @@ -5,14 +5,17 @@ * 2.0. */ -import { filter } from 'rxjs/operators'; +import { filter, take } from 'rxjs/operators'; import { pipe } from 'fp-ts/lib/pipeable'; import { Option, map as mapOptional, getOrElse, isSome } from 'fp-ts/lib/Option'; +import uuid from 'uuid'; +import { pick } from 'lodash'; +import { merge, Subject } from 'rxjs'; import agent from 'elastic-apm-node'; import { Logger } from '../../../../src/core/server'; -import { asOk, either, map, mapErr, promiseResult } from './lib/result_type'; +import { asOk, either, map, mapErr, promiseResult, isErr } from './lib/result_type'; import { isTaskRunEvent, isTaskClaimEvent, @@ -32,11 +35,14 @@ import { TaskLifecycle, TaskLifecycleResult, TaskStatus, + EphemeralTask, } from './task'; import { TaskStore } from './task_store'; import { ensureDeprecatedFieldsAreCorrected } from './lib/correct_deprecated_fields'; import { TaskLifecycleEvent, TaskPollingLifecycle } from './polling_lifecycle'; import { TaskTypeDictionary } from './task_type_dictionary'; +import { EphemeralTaskLifecycle } from './ephemeral_task_lifecycle'; +import { EphemeralTaskRejectedDueToCapacityError } from './task_running'; const VERSION_CONFLICT_STATUS = 409; @@ -44,20 +50,25 @@ export interface TaskSchedulingOpts { logger: Logger; taskStore: TaskStore; taskPollingLifecycle: TaskPollingLifecycle; + ephemeralTaskLifecycle: EphemeralTaskLifecycle; middleware: Middleware; definitions: TaskTypeDictionary; + taskManagerId: string; } -interface RunNowResult { - id: string; +export interface RunNowResult { + id: ConcreteTaskInstance['id']; + state?: ConcreteTaskInstance['state']; } export class TaskScheduling { private store: TaskStore; private taskPollingLifecycle: TaskPollingLifecycle; + private ephemeralTaskLifecycle: EphemeralTaskLifecycle; private logger: Logger; private middleware: Middleware; private definitions: TaskTypeDictionary; + private taskManagerId: string; /** * Initializes the task manager, preventing any further addition of middleware, @@ -68,8 +79,10 @@ export class TaskScheduling { this.logger = opts.logger; this.middleware = opts.middleware; this.taskPollingLifecycle = opts.taskPollingLifecycle; + this.ephemeralTaskLifecycle = opts.ephemeralTaskLifecycle; this.store = opts.taskStore; this.definitions = opts.definitions; + this.taskManagerId = opts.taskManagerId; } /** @@ -100,11 +113,67 @@ export class TaskScheduling { */ public async runNow(taskId: string): Promise { return new Promise(async (resolve, reject) => { - this.awaitTaskRunResult(taskId).then(resolve).catch(reject); + this.awaitTaskRunResult(taskId) + // don't expose state on runNow + .then(({ id }) => resolve({ id })) + .catch(reject); this.taskPollingLifecycle.attemptToRun(taskId); }); } + /** + * Run an ad-hoc task in memory without persisting it into ES or distributing the load across the cluster. + * + * @param task - The ephemeral task being queued. + * @returns {Promise} + */ + public async ephemeralRunNow( + task: EphemeralTask, + options?: Record + ): Promise { + const id = uuid.v4(); + const { taskInstance: modifiedTask } = await this.middleware.beforeSave({ + ...options, + taskInstance: task, + }); + return new Promise(async (resolve, reject) => { + // The actual promise returned from this function is resolved after the awaitTaskRunResult promise resolves. + // However, we do not wait to await this promise, as we want later execution to happen in parallel. + // The awaitTaskRunResult promise is resolved once the ephemeral task is successfully executed (technically, when a TaskEventType.TASK_RUN is emitted with the same id). + // However, the ephemeral task won't even get into the queue until the subsequent this.ephemeralTaskLifecycle.attemptToRun is called (which puts it in the queue). + + // The reason for all this confusion? Timing. + + // In the this.ephemeralTaskLifecycle.attemptToRun, it's possible that the ephemeral task is put into the queue and processed before this function call returns anything. + // If that happens, putting the awaitTaskRunResult after would just hang because the task already completed. We need to listen for the completion before we add it to the queue to avoid this possibility. + const { cancel, resolveOnCancel } = cancellablePromise(); + this.awaitTaskRunResult(id, resolveOnCancel) + .then((arg: RunNowResult) => { + resolve(arg); + }) + .catch((err: Error) => { + reject(err); + }); + const attemptToRunResult = this.ephemeralTaskLifecycle.attemptToRun({ + id, + scheduledAt: new Date(), + runAt: new Date(), + status: TaskStatus.Idle, + ownerId: this.taskManagerId, + ...modifiedTask, + }); + if (isErr(attemptToRunResult)) { + cancel(); + reject( + new EphemeralTaskRejectedDueToCapacityError( + `Ephemeral Task of type ${task.taskType} was rejected`, + task + ) + ); + } + }); + } + /** * Schedules a task with an Id * @@ -125,10 +194,13 @@ export class TaskScheduling { } } - private async awaitTaskRunResult(taskId: string): Promise { + private awaitTaskRunResult(taskId: string, cancel?: Promise): Promise { return new Promise((resolve, reject) => { - const subscription = this.taskPollingLifecycle.events - // listen for all events related to the current task + // listen for all events related to the current task + const subscription = merge( + this.taskPollingLifecycle.events, + this.ephemeralTaskLifecycle.events + ) .pipe(filter(({ id }: TaskLifecycleEvent) => id === taskId)) .subscribe((taskEvent: TaskLifecycleEvent) => { if (isTaskClaimEvent(taskEvent)) { @@ -161,7 +233,7 @@ export class TaskScheduling { // resolve if the task has run sucessfully if (isTaskRunEvent(taskEvent)) { subscription.unsubscribe(); - resolve({ id: (taskInstance as RanTask).task.id }); + resolve(pick((taskInstance as RanTask).task, ['id', 'state'])); } }, async (errorResult: ErrResultOf) => { @@ -182,6 +254,12 @@ export class TaskScheduling { ); } }); + + if (cancel) { + cancel.then(() => { + subscription.unsubscribe(); + }); + } }); } @@ -216,3 +294,14 @@ export class TaskScheduling { ); } } + +const cancellablePromise = () => { + const boolStream = new Subject(); + return { + cancel: () => boolStream.next(true), + resolveOnCancel: boolStream + .pipe(take(1)) + .toPromise() + .then(() => {}), + }; +}; diff --git a/x-pack/plugins/task_manager/server/usage/index.ts b/x-pack/plugins/task_manager/server/usage/index.ts new file mode 100644 index 000000000000..2f52014fa40e --- /dev/null +++ b/x-pack/plugins/task_manager/server/usage/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { registerTaskManagerUsageCollector } from './task_manager_usage_collector'; diff --git a/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.test.ts b/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.test.ts new file mode 100644 index 000000000000..4b993a4e0629 --- /dev/null +++ b/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.test.ts @@ -0,0 +1,172 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { Subject } from 'rxjs'; +import { merge } from 'lodash'; +import { loggingSystemMock } from 'src/core/server/mocks'; +import { + Collector, + createCollectorFetchContextWithKibanaMock, + createUsageCollectionSetupMock, +} from 'src/plugins/usage_collection/server/mocks'; +import { HealthStatus } from '../monitoring'; +import { MonitoredHealth } from '../routes/health'; +import { TaskPersistence } from '../task_events'; +import { registerTaskManagerUsageCollector } from './task_manager_usage_collector'; +import { sleep } from '../test_utils'; + +describe('registerTaskManagerUsageCollector', () => { + let collector: Collector; + const logger = loggingSystemMock.createLogger(); + + it('should report telemetry on the ephemeral queue', async () => { + const monitoringStats$ = new Subject(); + const usageCollectionMock = createUsageCollectionSetupMock(); + const fetchContext = createCollectorFetchContextWithKibanaMock(); + usageCollectionMock.makeUsageCollector.mockImplementation((config) => { + collector = new Collector(logger, config); + return createUsageCollectionSetupMock().makeUsageCollector(config); + }); + + registerTaskManagerUsageCollector(usageCollectionMock, monitoringStats$, true, 10); + + const mockHealth = getMockMonitoredHealth(); + monitoringStats$.next(mockHealth); + await sleep(1001); + + expect(usageCollectionMock.makeUsageCollector).toBeCalled(); + const telemetry = await collector.fetch(fetchContext); + expect(telemetry).toMatchObject({ + ephemeral_tasks_enabled: true, + ephemeral_request_capacity: 10, + ephemeral_stats: { + status: mockHealth.stats.ephemeral?.status, + load: mockHealth.stats.ephemeral?.value.load, + executions_per_cycle: mockHealth.stats.ephemeral?.value.executionsPerCycle, + queued_tasks: mockHealth.stats.ephemeral?.value.queuedTasks, + }, + }); + }); +}); + +function getMockMonitoredHealth(overrides = {}): MonitoredHealth { + const stub: MonitoredHealth = { + id: '1', + status: HealthStatus.OK, + timestamp: new Date().toISOString(), + last_update: new Date().toISOString(), + stats: { + configuration: { + timestamp: new Date().toISOString(), + status: HealthStatus.OK, + value: { + max_workers: 10, + poll_interval: 3000, + max_poll_inactivity_cycles: 10, + request_capacity: 1000, + monitored_aggregated_stats_refresh_rate: 5000, + monitored_stats_running_average_window: 50, + monitored_task_execution_thresholds: { + default: { + error_threshold: 90, + warn_threshold: 80, + }, + custom: {}, + }, + }, + }, + workload: { + timestamp: new Date().toISOString(), + status: HealthStatus.OK, + value: { + count: 4, + task_types: { + actions_telemetry: { count: 2, status: { idle: 2 } }, + alerting_telemetry: { count: 1, status: { idle: 1 } }, + session_cleanup: { count: 1, status: { idle: 1 } }, + }, + schedule: [], + overdue: 0, + overdue_non_recurring: 0, + estimatedScheduleDensity: [], + non_recurring: 20, + owner_ids: 2, + estimated_schedule_density: [], + capacity_requirements: { + per_minute: 150, + per_hour: 360, + per_day: 820, + }, + }, + }, + ephemeral: { + status: HealthStatus.OK, + timestamp: new Date().toISOString(), + value: { + load: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + executionsPerCycle: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + queuedTasks: { + p50: 4, + p90: 6, + p95: 6, + p99: 6, + }, + }, + }, + runtime: { + timestamp: new Date().toISOString(), + status: HealthStatus.OK, + value: { + drift: { + p50: 1000, + p90: 2000, + p95: 2500, + p99: 3000, + }, + drift_by_type: {}, + load: { + p50: 1000, + p90: 2000, + p95: 2500, + p99: 3000, + }, + execution: { + duration: {}, + duration_by_persistence: {}, + persistence: { + [TaskPersistence.Recurring]: 10, + [TaskPersistence.NonRecurring]: 10, + [TaskPersistence.Ephemeral]: 10, + }, + result_frequency_percent_as_number: {}, + }, + polling: { + last_successful_poll: new Date().toISOString(), + duration: [500, 400, 3000], + claim_conflicts: [0, 100, 75], + claim_mismatches: [0, 100, 75], + result_frequency_percent_as_number: [ + 'NoTasksClaimed', + 'NoTasksClaimed', + 'NoTasksClaimed', + ], + }, + }, + }, + }, + }; + return (merge(stub, overrides) as unknown) as MonitoredHealth; +} diff --git a/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.ts b/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.ts new file mode 100644 index 000000000000..3eff2370ec0c --- /dev/null +++ b/x-pack/plugins/task_manager/server/usage/task_manager_usage_collector.ts @@ -0,0 +1,96 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import { Observable } from 'rxjs'; +import { UsageCollectionSetup } from 'src/plugins/usage_collection/server'; +import { MonitoredHealth } from '../routes/health'; +import { TaskManagerUsage } from './types'; + +export function createTaskManagerUsageCollector( + usageCollection: UsageCollectionSetup, + monitoringStats$: Observable, + ephemeralTasksEnabled: boolean, + ephemeralRequestCapacity: number +) { + let lastMonitoredHealth: MonitoredHealth | null = null; + monitoringStats$.subscribe((health) => { + lastMonitoredHealth = health; + }); + + return usageCollection.makeUsageCollector({ + type: 'task_manager', + isReady: async () => { + return Boolean(lastMonitoredHealth); + }, + fetch: async () => { + return { + ephemeral_tasks_enabled: ephemeralTasksEnabled, + ephemeral_request_capacity: ephemeralRequestCapacity, + ephemeral_stats: { + status: lastMonitoredHealth?.stats.ephemeral?.status ?? '', + queued_tasks: { + p50: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p50 ?? 0, + p90: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p90 ?? 0, + p95: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p95 ?? 0, + p99: lastMonitoredHealth?.stats.ephemeral?.value.queuedTasks.p99 ?? 0, + }, + load: { + p50: lastMonitoredHealth?.stats.ephemeral?.value.load.p50 ?? 0, + p90: lastMonitoredHealth?.stats.ephemeral?.value.load.p90 ?? 0, + p95: lastMonitoredHealth?.stats.ephemeral?.value.load.p95 ?? 0, + p99: lastMonitoredHealth?.stats.ephemeral?.value.load.p99 ?? 0, + }, + executions_per_cycle: { + p50: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p50 ?? 0, + p90: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p90 ?? 0, + p95: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p95 ?? 0, + p99: lastMonitoredHealth?.stats.ephemeral?.value.executionsPerCycle.p99 ?? 0, + }, + }, + }; + }, + schema: { + ephemeral_tasks_enabled: { type: 'boolean' }, + ephemeral_request_capacity: { type: 'short' }, + ephemeral_stats: { + status: { type: 'keyword' }, + queued_tasks: { + p50: { type: 'long' }, + p90: { type: 'long' }, + p95: { type: 'long' }, + p99: { type: 'long' }, + }, + load: { + p50: { type: 'long' }, + p90: { type: 'long' }, + p95: { type: 'long' }, + p99: { type: 'long' }, + }, + executions_per_cycle: { + p50: { type: 'long' }, + p90: { type: 'long' }, + p95: { type: 'long' }, + p99: { type: 'long' }, + }, + }, + }, + }); +} + +export function registerTaskManagerUsageCollector( + usageCollection: UsageCollectionSetup, + monitoringStats$: Observable, + ephemeralTasksEnabled: boolean, + ephemeralRequestCapacity: number +) { + const collector = createTaskManagerUsageCollector( + usageCollection, + monitoringStats$, + ephemeralTasksEnabled, + ephemeralRequestCapacity + ); + usageCollection.registerCollector(collector); +} diff --git a/x-pack/plugins/task_manager/server/usage/types.ts b/x-pack/plugins/task_manager/server/usage/types.ts new file mode 100644 index 000000000000..78e948e21d0a --- /dev/null +++ b/x-pack/plugins/task_manager/server/usage/types.ts @@ -0,0 +1,32 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export interface TaskManagerUsage { + ephemeral_tasks_enabled: boolean; + ephemeral_request_capacity: number; + ephemeral_stats: { + status: string; + queued_tasks: { + p50: number; + p90: number; + p95: number; + p99: number; + }; + load: { + p50: number; + p90: number; + p95: number; + p99: number; + }; + executions_per_cycle: { + p50: number; + p90: number; + p95: number; + p99: number; + }; + }; +} diff --git a/x-pack/plugins/task_manager/tsconfig.json b/x-pack/plugins/task_manager/tsconfig.json index a72b678da1f7..4b53dcac72c8 100644 --- a/x-pack/plugins/task_manager/tsconfig.json +++ b/x-pack/plugins/task_manager/tsconfig.json @@ -15,5 +15,6 @@ "references": [ { "path": "../../../src/core/tsconfig.json" }, { "path": "../../../src/plugins/kibana_utils/tsconfig.json" }, + { "path": "../../../src/plugins/usage_collection/tsconfig.json" }, ] } diff --git a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json index 02cff73bfc11..270117eed849 100644 --- a/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json +++ b/x-pack/plugins/telemetry_collection_xpack/schema/xpack_plugins.json @@ -5840,6 +5840,71 @@ } } }, + "task_manager": { + "properties": { + "ephemeral_tasks_enabled": { + "type": "boolean" + }, + "ephemeral_request_capacity": { + "type": "short" + }, + "ephemeral_stats": { + "properties": { + "status": { + "type": "keyword" + }, + "queued_tasks": { + "properties": { + "p50": { + "type": "long" + }, + "p90": { + "type": "long" + }, + "p95": { + "type": "long" + }, + "p99": { + "type": "long" + } + } + }, + "load": { + "properties": { + "p50": { + "type": "long" + }, + "p90": { + "type": "long" + }, + "p95": { + "type": "long" + }, + "p99": { + "type": "long" + } + } + }, + "executions_per_cycle": { + "properties": { + "p50": { + "type": "long" + }, + "p90": { + "type": "long" + }, + "p95": { + "type": "long" + }, + "p99": { + "type": "long" + } + } + } + } + } + } + }, "upgrade-assistant-telemetry": { "properties": { "features": { diff --git a/x-pack/test/alerting_api_integration/common/config.ts b/x-pack/test/alerting_api_integration/common/config.ts index 3dcbde5f2114..5a9d2a20fee1 100644 --- a/x-pack/test/alerting_api_integration/common/config.ts +++ b/x-pack/test/alerting_api_integration/common/config.ts @@ -42,6 +42,7 @@ const enabledActionTypes = [ 'test.failing', 'test.index-record', 'test.noop', + 'test.delayed', 'test.rate-limit', 'test.throw', ]; @@ -158,6 +159,7 @@ export function createTestConfig(name: string, options: CreateTestConfigOptions) ...actionsProxyUrl, ...customHostSettings, '--xpack.eventLog.logEntries=true', + '--xpack.task_manager.ephemeral_tasks.enabled=false', `--xpack.actions.preconfiguredAlertHistoryEsIndex=${preconfiguredAlertHistoryEsIndex}`, `--xpack.actions.preconfigured=${JSON.stringify({ 'my-slack1': { diff --git a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts index 2a80280ef2aa..a848207bf1b7 100644 --- a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts +++ b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/action_types.ts @@ -34,6 +34,7 @@ export function defineActionTypes( actions.registerType(noopActionType); actions.registerType(throwActionType); actions.registerType(getIndexRecordActionType()); + actions.registerType(getDelayedActionType()); actions.registerType(getFailingActionType()); actions.registerType(getRateLimitedActionType()); actions.registerType(getAuthorizationActionType(core)); @@ -81,6 +82,40 @@ function getIndexRecordActionType() { return result; } +function getDelayedActionType() { + const paramsSchema = schema.object({ + delayInMs: schema.number({ defaultValue: 1000 }), + }); + type ParamsType = TypeOf; + const configSchema = schema.object({ + unencrypted: schema.string(), + }); + type ConfigType = TypeOf; + const secretsSchema = schema.object({ + encrypted: schema.string(), + }); + type SecretsType = TypeOf; + const result: ActionType = { + id: 'test.delayed', + name: 'Test: Delayed', + minimumLicenseRequired: 'gold', + validate: { + params: paramsSchema, + config: configSchema, + secrets: secretsSchema, + }, + async executor({ config, secrets, params, services, actionId }) { + await new Promise((resolve) => { + setTimeout(() => { + resolve(true); + }, params.delayInMs); + }); + return { status: 'ok', actionId }; + }, + }; + return result; +} + function getFailingActionType() { const paramsSchema = schema.object({ index: schema.string(), diff --git a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/plugin.ts b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/plugin.ts index e98b7af075d6..5674e644f9c2 100644 --- a/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/plugin.ts +++ b/x-pack/test/alerting_api_integration/common/fixtures/plugins/alerts/server/plugin.ts @@ -52,6 +52,7 @@ export class FixturePlugin implements Plugin { + const objectRemover = new ObjectRemover(supertest); + + beforeEach(async () => { + await esTestIndexTool.destroy(); + await esTestIndexTool.setup(); + }); + afterEach(async () => await esTestIndexTool.destroy()); + after(async () => { + await objectRemover.removeAll(); + }); + + it('should execute all requests, when some will be ephemeral and some not', async () => { + const nonEphemeralTasks = 3; + const actionPromises = []; + for (let i = 0; i < DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT + nonEphemeralTasks; i++) { + actionPromises.push( + supertest + .post(`${getUrlPrefix(Spaces.space1.id)}/api/actions/connector`) + .set('kbn-xsrf', 'foo') + .send({ + name: `My action${i}`, + connector_type_id: 'test.index-record', + config: { + unencrypted: `This value shouldn't get encrypted`, + }, + secrets: { + encrypted: 'This value should be encrypted', + }, + }) + .expect(200) + ); + } + const createdActions = await Promise.all(actionPromises); + createdActions.forEach((createdAction) => + objectRemover.add(Spaces.space1.id, createdAction.body.id, 'action', 'actions') + ); + + const pattern = { + instance: [true, true, true, false, true, true], + }; + const alertData = getTestAlertData({ + rule_type_id: 'test.patternFiring', + params: { pattern }, + schedule: { interval: '1m' }, + throttle: null, + notify_when: 'onActiveAlert', + actions: createdActions.map((createdAction) => { + return { + id: createdAction.body.id, + group: 'default', + params: { + index: ES_TEST_INDEX_NAME, + reference: '', + message: 'test message', + }, + }; + }), + }); + const { body: createdAlert } = await supertest + .post(`${getUrlPrefix(Spaces.space1.id)}/api/alerting/rule`) + .set('kbn-xsrf', 'foo') + .send(alertData) + .expect(200); + objectRemover.add(Spaces.space1.id, createdAlert.id, 'rule', 'alerting'); + + const events = flatten( + await Promise.all( + createdActions.map(async (createdAction) => { + return await retry.try(async () => { + return await getEventLog({ + getService, + spaceId: Spaces.space1.id, + type: 'action', + id: createdAction.body.id, + provider: 'actions', + actions: new Map([['execute', { gte: 1 }]]), + }); + }); + }) + ) + ); + + const executeActionsEvents = getEventsByAction(events, 'execute'); + expect(executeActionsEvents.length).equal( + nonEphemeralTasks + DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT + ); + + const searchResult = await esTestIndexTool.search('action:test.index-record'); + expect(searchResult.hits.total.value).equal( + nonEphemeralTasks + DEFAULT_MAX_EPHEMERAL_ACTIONS_PER_ALERT + ); + }); + }); +} + +function getEventsByAction(events: IValidatedEvent[], action: string) { + return events.filter((event) => event?.event?.action === action); +} diff --git a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/index.ts b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/index.ts index 5c3374a4d9c7..3a4cc62c2550 100644 --- a/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/index.ts +++ b/x-pack/test/alerting_api_integration/spaces_only/tests/alerting/index.ts @@ -37,6 +37,7 @@ export default function alertingTests({ loadTestFile, getService }: FtrProviderC loadTestFile(require.resolve('./builtin_alert_types')); loadTestFile(require.resolve('./mustache_templates.ts')); loadTestFile(require.resolve('./notify_when')); + loadTestFile(require.resolve('./ephemeral')); loadTestFile(require.resolve('./event_log_alerts')); // note that this test will destroy existing spaces diff --git a/x-pack/test/plugin_api_integration/config.ts b/x-pack/test/plugin_api_integration/config.ts index 0b4369d199c3..cd13186a69cc 100644 --- a/x-pack/test/plugin_api_integration/config.ts +++ b/x-pack/test/plugin_api_integration/config.ts @@ -42,6 +42,8 @@ export default async function ({ readConfigFile }: FtrConfigProviderContext) { '--xpack.eventLog.logEntries=true', '--xpack.eventLog.indexEntries=true', '--xpack.task_manager.monitored_aggregated_stats_refresh_rate=5000', + '--xpack.task_manager.ephemeral_tasks.enabled=false', + '--xpack.task_manager.ephemeral_tasks.request_capacity=100', ...plugins.map( (pluginDir) => `--plugin-path=${path.resolve(__dirname, 'plugins', pluginDir)}` ), diff --git a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/init_routes.ts b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/init_routes.ts index 7213beb2b49a..5d0d72ae94ac 100644 --- a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/init_routes.ts +++ b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/init_routes.ts @@ -112,6 +112,45 @@ export function initRoutes( } ); + router.post( + { + path: `/api/sample_tasks/ephemeral_run_now`, + validate: { + body: schema.object({ + task: schema.object({ + taskType: schema.string(), + state: schema.recordOf(schema.string(), schema.any()), + params: schema.recordOf(schema.string(), schema.any()), + }), + }), + }, + }, + async function ( + context: RequestHandlerContext, + req: KibanaRequest< + any, + any, + { + task: { + taskType: string; + params: Record; + state: Record; + }; + }, + any + >, + res: KibanaResponseFactory + ): Promise> { + const { task } = req.body; + try { + const taskManager = await taskManagerStart; + return res.ok({ body: await taskManager.ephemeralRunNow(task) }); + } catch (err) { + return res.ok({ body: { task, error: `${err}` } }); + } + } + ); + router.post( { path: `/api/sample_tasks/ensure_scheduled`, diff --git a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts index 203155141089..e4770f79b360 100644 --- a/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts +++ b/x-pack/test/plugin_api_integration/plugins/sample_task_plugin/server/plugin.ts @@ -15,6 +15,7 @@ import { TaskManagerSetupContract, TaskManagerStartContract, ConcreteTaskInstance, + EphemeralTask, } from '../../../../../plugins/task_manager/server'; import { DEFAULT_MAX_WORKERS } from '../../../../../plugins/task_manager/server/config'; @@ -38,6 +39,8 @@ export class SampleTaskManagerFixturePlugin const taskTestingEvents = new EventEmitter(); taskTestingEvents.setMaxListeners(DEFAULT_MAX_WORKERS * 2); + const tmStart = this.taskManagerStart; + const defaultSampleTaskConfig = { timeout: '1m', // This task allows tests to specify its behavior (whether it reschedules itself, whether it errors, etc) @@ -155,6 +158,85 @@ export class SampleTaskManagerFixturePlugin }, }); + const taskWithTiming = { + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const stopTiming = startTaskTimer(); + + const { + params: { delay = 0 }, + state: { timings = [] }, + } = taskInstance; + + if (delay) { + await new Promise((resolve) => { + setTimeout(resolve, delay); + }); + } + + return { + state: { timings: [...timings, stopTiming()] }, + }; + }, + }), + }; + + taskManager.registerTaskDefinitions({ + timedTask: { + title: 'Task With Tracked Timings', + timeout: '60s', + description: 'A task that tracks its execution timing.', + ...taskWithTiming, + }, + timedTaskWithSingleConcurrency: { + title: 'Task With Tracked Timings and Single Concurrency', + maxConcurrency: 1, + timeout: '60s', + description: + 'A task that can only have one concurrent instance and tracks its execution timing.', + ...taskWithTiming, + }, + timedTaskWithLimitedConcurrency: { + title: 'Task With Tracked Timings and Limited Concurrency', + maxConcurrency: 2, + timeout: '60s', + description: + 'A task that can only have two concurrent instance and tracks its execution timing.', + ...taskWithTiming, + }, + taskWhichExecutesOtherTasksEphemerally: { + title: 'Task Which Executes Other Tasks Ephemerally', + description: 'A sample task used to validate how ephemeral tasks are executed.', + maxAttempts: 1, + timeout: '60s', + createTaskRunner: ({ taskInstance }: { taskInstance: ConcreteTaskInstance }) => ({ + async run() { + const { + params: { tasks = [] }, + } = taskInstance; + + const tm = await tmStart; + const executions = await Promise.all( + (tasks as EphemeralTask[]).map(async (task) => { + return tm + .ephemeralRunNow(task) + .then((result) => ({ + result, + })) + .catch((error) => ({ + error, + })); + }) + ); + + return { + state: { executions }, + }; + }, + }), + }, + }); + taskManager.addMiddleware({ async beforeSave({ taskInstance, ...opts }) { const modifiedInstance = { @@ -213,3 +295,8 @@ const once = function (emitter: EventEmitter, event: string): Promise resolve(data || {})); }); }; + +function startTaskTimer(): () => { start: number; stop: number } { + const start = Date.now(); + return () => ({ start, stop: Date.now() }); +} diff --git a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts index b8ba9c93fe8b..3c460c2a6d8c 100644 --- a/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts +++ b/x-pack/test/plugin_api_integration/test_suites/task_manager/task_management.ts @@ -5,7 +5,7 @@ * 2.0. */ -import _ from 'lodash'; +import { random, times } from 'lodash'; import expect from '@kbn/expect'; import type { estypes } from '@elastic/elasticsearch'; import url from 'url'; @@ -183,6 +183,20 @@ export default function ({ getService }: FtrProviderContext) { .then((response) => response.body); } + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // function runEphemeralTaskNow(task: { + // taskType: string; + // params: Record; + // state: Record; + // }) { + // return supertest + // .post('/api/sample_tasks/ephemeral_run_now') + // .set('kbn-xsrf', 'xxx') + // .send({ task }) + // .expect(200) + // .then((response) => response.body); + // } + function scheduleTaskIfNotExists(task: Partial) { return supertest .post('/api/sample_tasks/ensure_scheduled') @@ -225,7 +239,7 @@ export default function ({ getService }: FtrProviderContext) { } it('should support middleware', async () => { - const historyItem = _.random(1, 100); + const historyItem = random(1, 100); const scheduledTask = await scheduleTask({ taskType: 'sampleTask', @@ -330,8 +344,8 @@ export default function ({ getService }: FtrProviderContext) { }); it('should reschedule if task returns runAt', async () => { - const nextRunMilliseconds = _.random(60000, 200000); - const count = _.random(1, 20); + const nextRunMilliseconds = random(60000, 200000); + const count = random(1, 20); const originalTask = await scheduleTask({ taskType: 'sampleTask', @@ -351,7 +365,7 @@ export default function ({ getService }: FtrProviderContext) { }); it('should reschedule if task has an interval', async () => { - const interval = _.random(5, 200); + const interval = random(5, 200); const intervalMilliseconds = interval * 60000; const originalTask = await scheduleTask({ @@ -372,7 +386,7 @@ export default function ({ getService }: FtrProviderContext) { }); it('should support the deprecated interval field', async () => { - const interval = _.random(5, 200); + const interval = random(5, 200); const intervalMilliseconds = interval * 60000; const originalTask = await scheduleTask({ @@ -471,7 +485,7 @@ export default function ({ getService }: FtrProviderContext) { // Task Manager to use up its worker capacity // causing tasks to pile up await Promise.all( - _.times(DEFAULT_MAX_WORKERS + _.random(1, DEFAULT_MAX_WORKERS), () => + times(DEFAULT_MAX_WORKERS + random(1, DEFAULT_MAX_WORKERS), () => scheduleTask({ taskType: 'sampleTask', params: { @@ -887,5 +901,246 @@ export default function ({ getService }: FtrProviderContext) { expect(scheduledTask.attempts).to.be.greaterThan(3); }); }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('should return the resulting task state when asked to run an ephemeral task now', async () => { + // const ephemeralTask = await runEphemeralTaskNow({ + // taskType: 'sampleTask', + // params: {}, + // state: {}, + // }); + + // await retry.try(async () => { + // expect( + // (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id) + // .length + // ).to.eql(1); + + // expect(ephemeralTask.state.count).to.eql(1); + // }); + + // const secondEphemeralTask = await runEphemeralTaskNow({ + // taskType: 'sampleTask', + // params: {}, + // // pass state from previous ephemeral run as input for the second run + // state: ephemeralTask.state, + // }); + + // // ensure state is cumulative + // expect(secondEphemeralTask.state.count).to.eql(2); + + // await retry.try(async () => { + // // ensure new id is produced for second task execution + // expect( + // (await historyDocs()).filter((taskDoc) => taskDoc._source.taskId === ephemeralTask.id) + // .length + // ).to.eql(1); + // expect( + // (await historyDocs()).filter( + // (taskDoc) => taskDoc._source.taskId === secondEphemeralTask.id + // ).length + // ).to.eql(1); + // }); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Epheemral task run should only run one instance of a task if its maxConcurrency is 1', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: [ + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithSingleConcurrency', + // params: { delay: 1000 }, + // state: {}, + // }, + // ], + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with any other task + // 0 + // ); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Ephemeral task run should only run as many instances of a task as its maxConcurrency will allow', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: [ + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // { + // taskType: 'timedTaskWithLimitedConcurrency', + // params: { delay: 100 }, + // state: {}, + // }, + // ], + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with, at most, 1 other task + // 1 + // ); + // }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // it('Ephemeral task executions cant exceed the max workes in Task Manager', async () => { + // const ephemeralTaskWithSingleConcurrency: { + // state: { + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>; + // }; + // } = await runEphemeralTaskNow({ + // taskType: 'taskWhichExecutesOtherTasksEphemerally', + // params: { + // tasks: times(20, () => ({ + // taskType: 'timedTask', + // params: { delay: 100 }, + // state: {}, + // })), + // }, + // state: {}, + // }); + + // ensureOverlappingTasksDontExceedThreshold( + // ephemeralTaskWithSingleConcurrency.state.executions, + // // make sure each task intersects with, at most, 9 other tasks (as max workes is 10) + // 9 + // ); + // }); }); + + // TODO: Add this back in with https://github.com/elastic/kibana/issues/106139 + // function ensureOverlappingTasksDontExceedThreshold( + // executions: Array<{ + // result: { + // id: string; + // state: { + // timings: Array<{ + // start: number; + // stop: number; + // }>; + // }; + // }; + // }>, + // threshold: number + // ) { + // const executionRanges = executions.map((execution) => ({ + // id: execution.result.id, + // range: range( + // // calculate range of milliseconds + // // in which the task was running (that should be good enough) + // execution.result.state.timings[0].start, + // execution.result.state.timings[0].stop + // ), + // })); + + // const intersections = new Map(); + // for (const currentExecution of executionRanges) { + // for (const executionToComparteTo of executionRanges) { + // if (currentExecution.id !== executionToComparteTo.id) { + // // find all executions that intersect + // if (intersection(currentExecution.range, executionToComparteTo.range).length) { + // intersections.set(currentExecution.id, [ + // ...(intersections.get(currentExecution.id) ?? []), + // executionToComparteTo.id, + // ]); + // } + // } + // } + // } + + // const tooManyIntersectingTasks = [...intersections.entries()].find( + // // make sure each task intersects with, at most, threshold of other task + // ([, intersectingTasks]) => intersectingTasks.length > threshold + // ); + // if (tooManyIntersectingTasks) { + // throw new Error( + // `Invalid execution found: ${tooManyIntersectingTasks[0]} overlaps with ${tooManyIntersectingTasks[1]}` + // ); + // } + // } }