From 08257f1f93c993a4f08bf68fe6d26782fc3172a9 Mon Sep 17 00:00:00 2001 From: Sandra Gonzales Date: Tue, 6 Jul 2021 15:11:01 -0400 Subject: [PATCH] [Monitoring] update SM rule template variables (#104176) * continue to support shardIndices template variable * add unit test for large shard size alert * change template variables in CCR rule * add CCR rule unit test * fix internationalization Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> --- .../alerts/ccr_read_exceptions_alert.test.ts | 201 ++++++++++++++++++ .../alerts/ccr_read_exceptions_alert.ts | 43 ++-- .../alerts/large_shard_size_alert.test.ts | 179 ++++++++++++++++ .../server/alerts/large_shard_size_alert.ts | 5 + .../translations/translations/ja-JP.json | 4 - .../translations/translations/zh-CN.json | 4 - 6 files changed, 409 insertions(+), 27 deletions(-) create mode 100644 x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.test.ts create mode 100644 x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.test.ts diff --git a/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.test.ts new file mode 100644 index 000000000000..5c8ef7abbbf5 --- /dev/null +++ b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.test.ts @@ -0,0 +1,201 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { CCRReadExceptionsAlert } from './ccr_read_exceptions_alert'; +import { ALERT_CCR_READ_EXCEPTIONS } from '../../common/constants'; +import { fetchCCRReadExceptions } from '../lib/alerts/fetch_ccr_read_exceptions'; +import { fetchClusters } from '../lib/alerts/fetch_clusters'; +import { elasticsearchServiceMock } from 'src/core/server/mocks'; + +type ICCRReadExceptionsAlertMock = CCRReadExceptionsAlert & { + defaultParams: { + duration: string; + }; +} & { + actionVariables: Array<{ + name: string; + description: string; + }>; +}; + +const RealDate = Date; + +jest.mock('../lib/alerts/fetch_ccr_read_exceptions', () => ({ + fetchCCRReadExceptions: jest.fn(), +})); +jest.mock('../lib/alerts/fetch_clusters', () => ({ + fetchClusters: jest.fn(), +})); + +jest.mock('../static_globals', () => ({ + Globals: { + app: { + getLogger: () => ({ debug: jest.fn() }), + url: 'http://localhost:5601', + config: { + ui: { + ccs: { enabled: true }, + metricbeat: { index: 'metricbeat-*' }, + container: { elasticsearch: { enabled: false } }, + }, + }, + }, + }, +})); + +describe('CCRReadExceptionsAlert', () => { + it('should have defaults', () => { + const alert = new CCRReadExceptionsAlert() as ICCRReadExceptionsAlertMock; + expect(alert.alertOptions.id).toBe(ALERT_CCR_READ_EXCEPTIONS); + expect(alert.alertOptions.name).toBe('CCR read exceptions'); + expect(alert.alertOptions.throttle).toBe('6h'); + expect(alert.alertOptions.defaultParams).toStrictEqual({ + duration: '1h', + }); + expect(alert.alertOptions.actionVariables).toStrictEqual([ + { + name: 'remoteCluster', + description: 'The remote cluster experiencing CCR read exceptions.', + }, + { + name: 'followerIndex', + description: 'The follower index reporting CCR read exceptions.', + }, + { + name: 'internalShortMessage', + description: 'The short internal message generated by Elastic.', + }, + { + name: 'internalFullMessage', + description: 'The full internal message generated by Elastic.', + }, + { name: 'state', description: 'The current state of the alert.' }, + { name: 'clusterName', description: 'The cluster to which the node(s) belongs.' }, + { name: 'action', description: 'The recommended action for this alert.' }, + { + name: 'actionPlain', + description: 'The recommended action for this alert, without any markdown.', + }, + ]); + }); + describe('execute', () => { + const FakeDate = function () {}; + FakeDate.prototype.valueOf = () => 1; + + const clusterUuid = 'abc123'; + const clusterName = 'testCluster'; + const nodeId = 'myNodeId'; + const nodeName = 'myNodeName'; + const remoteCluster = 'BcK-0pmsQniyPQfZuauuXw_remote_cluster_1'; + const followerIndex = '.follower_index_1'; + const leaderIndex = '.leader_index_1'; + const readExceptions = [ + { + exception: { + type: 'read_exceptions_type_1', + reason: 'read_exceptions_reason_1', + }, + }, + ]; + const stat = { + remoteCluster, + followerIndex, + leaderIndex, + read_exceptions: readExceptions, + clusterUuid, + nodeId, + nodeName, + }; + + const replaceState = jest.fn(); + const scheduleActions = jest.fn(); + const getState = jest.fn(); + const executorOptions = { + services: { + scopedClusterClient: elasticsearchServiceMock.createScopedClusterClient(), + alertInstanceFactory: jest.fn().mockImplementation(() => { + return { + replaceState, + scheduleActions, + getState, + }; + }), + }, + state: {}, + }; + + beforeEach(() => { + Date = FakeDate as DateConstructor; + (fetchCCRReadExceptions as jest.Mock).mockImplementation(() => { + return [stat]; + }); + (fetchClusters as jest.Mock).mockImplementation(() => { + return [{ clusterUuid, clusterName }]; + }); + }); + + afterEach(() => { + Date = RealDate; + replaceState.mockReset(); + scheduleActions.mockReset(); + getState.mockReset(); + }); + + it('should fire actions', async () => { + const alert = new CCRReadExceptionsAlert() as ICCRReadExceptionsAlertMock; + const type = alert.getAlertType(); + await type.executor({ + ...executorOptions, + params: alert.alertOptions.defaultParams, + } as any); + expect(scheduleActions).toHaveBeenCalledWith('default', { + internalFullMessage: `CCR read exceptions alert is firing for the following remote cluster: ${remoteCluster}. Current 'follower_index' index affected: ${followerIndex}. [View CCR stats](http://localhost:5601/app/monitoring#/elasticsearch/ccr?_g=(cluster_uuid:${clusterUuid}))`, + internalShortMessage: `CCR read exceptions alert is firing for the following remote cluster: ${remoteCluster}. Verify follower and leader index relationships on the affected remote cluster.`, + action: `[View CCR stats](http://localhost:5601/app/monitoring#/elasticsearch/ccr?_g=(cluster_uuid:${clusterUuid}))`, + actionPlain: + 'Verify follower and leader index relationships on the affected remote cluster.', + clusterName, + state: 'firing', + remoteCluster, + remoteClusters: remoteCluster, + followerIndex, + followerIndices: followerIndex, + }); + }); + + it('should handle ccs', async () => { + const ccs = 'testCluster'; + (fetchCCRReadExceptions as jest.Mock).mockImplementation(() => { + return [ + { + ...stat, + ccs, + }, + ]; + }); + const alert = new CCRReadExceptionsAlert() as ICCRReadExceptionsAlertMock; + const type = alert.getAlertType(); + await type.executor({ + ...executorOptions, + params: alert.alertOptions.defaultParams, + } as any); + expect(scheduleActions).toHaveBeenCalledWith('default', { + internalFullMessage: `CCR read exceptions alert is firing for the following remote cluster: ${remoteCluster}. Current 'follower_index' index affected: ${followerIndex}. [View CCR stats](http://localhost:5601/app/monitoring#/elasticsearch/ccr?_g=(cluster_uuid:${clusterUuid},ccs:testCluster))`, + internalShortMessage: `CCR read exceptions alert is firing for the following remote cluster: ${remoteCluster}. Verify follower and leader index relationships on the affected remote cluster.`, + action: `[View CCR stats](http://localhost:5601/app/monitoring#/elasticsearch/ccr?_g=(cluster_uuid:${clusterUuid},ccs:testCluster))`, + actionPlain: + 'Verify follower and leader index relationships on the affected remote cluster.', + clusterName, + state: 'firing', + remoteCluster, + remoteClusters: remoteCluster, + followerIndex, + followerIndices: followerIndex, + }); + }); + }); +}); diff --git a/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts index 2995566c7c09..28f562b2cb13 100644 --- a/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/ccr_read_exceptions_alert.ts @@ -47,20 +47,20 @@ export class CCRReadExceptionsAlert extends BaseAlert { }, actionVariables: [ { - name: 'remoteClusters', + name: 'remoteCluster', description: i18n.translate( - 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.remoteClusters', + 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.remoteCluster', { - defaultMessage: 'List of remote clusters that are experiencing CCR read exceptions.', + defaultMessage: 'The remote cluster experiencing CCR read exceptions.', } ), }, { - name: 'followerIndices', + name: 'followerIndex', description: i18n.translate( - 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.followerIndices', + 'xpack.monitoring.alerts.ccrReadExceptions.actionVariables.followerIndex', { - defaultMessage: 'List of follower indices reporting CCR read exceptions.', + defaultMessage: 'The follower index reporting CCR read exceptions.', } ), }, @@ -229,12 +229,11 @@ export class CCRReadExceptionsAlert extends BaseAlert { item: AlertData | null, cluster: AlertCluster ) { - const remoteClustersList = alertStates - .map((alertState) => (alertState.meta as CCRReadExceptionsUIMeta).remoteCluster) - .join(', '); - const followerIndicesList = alertStates - .map((alertState) => (alertState.meta as CCRReadExceptionsUIMeta).followerIndex) - .join(', '); + if (alertStates.length === 0) { + return; + } + const CCRReadExceptionsMeta = alertStates[0].meta as CCRReadExceptionsUIMeta; + const { remoteCluster, followerIndex } = CCRReadExceptionsMeta; const shortActionText = i18n.translate( 'xpack.monitoring.alerts.ccrReadExceptions.shortAction', @@ -258,9 +257,9 @@ export class CCRReadExceptionsAlert extends BaseAlert { const internalShortMessage = i18n.translate( 'xpack.monitoring.alerts.ccrReadExceptions.firing.internalShortMessage', { - defaultMessage: `CCR read exceptions alert is firing for the following remote cluster: {remoteClustersList}. {shortActionText}`, + defaultMessage: `CCR read exceptions alert is firing for the following remote cluster: {remoteCluster}. {shortActionText}`, values: { - remoteClustersList, + remoteCluster, shortActionText, }, } @@ -268,11 +267,11 @@ export class CCRReadExceptionsAlert extends BaseAlert { const internalFullMessage = i18n.translate( 'xpack.monitoring.alerts.ccrReadExceptions.firing.internalFullMessage', { - defaultMessage: `CCR read exceptions alert is firing for the following remote cluster: {remoteClustersList}. Current 'follower_index' index affected: {followerIndicesList}. {action}`, + defaultMessage: `CCR read exceptions alert is firing for the following remote cluster: {remoteCluster}. Current 'follower_index' index affected: {followerIndex}. {action}`, values: { action, - remoteClustersList, - followerIndicesList, + remoteCluster, + followerIndex, }, } ); @@ -281,8 +280,14 @@ export class CCRReadExceptionsAlert extends BaseAlert { internalShortMessage, internalFullMessage, state: AlertingDefaults.ALERT_STATE.firing, - remoteClusters: remoteClustersList, - followerIndices: followerIndicesList, + remoteCluster, + followerIndex, + /* continue to send "remoteClusters" and "followerIndices" values for users still using it though + we have replaced it with "remoteCluster" and "followerIndex" in the template due to alerts per index instead of all indices + see https://github.com/elastic/kibana/issues/100136#issuecomment-865229431 + */ + remoteClusters: remoteCluster, + followerIndices: followerIndex, clusterName: cluster.clusterName, action, actionPlain: shortActionText, diff --git a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.test.ts b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.test.ts new file mode 100644 index 000000000000..18987a24e552 --- /dev/null +++ b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.test.ts @@ -0,0 +1,179 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { LargeShardSizeAlert } from './large_shard_size_alert'; +import { ALERT_LARGE_SHARD_SIZE } from '../../common/constants'; +import { fetchIndexShardSize } from '../lib/alerts/fetch_index_shard_size'; +import { fetchClusters } from '../lib/alerts/fetch_clusters'; +import { elasticsearchServiceMock } from 'src/core/server/mocks'; + +type ILargeShardSizeAlertMock = LargeShardSizeAlert & { + defaultParams: { + threshold: number; + duration: string; + }; +} & { + actionVariables: Array<{ + name: string; + description: string; + }>; +}; + +const RealDate = Date; + +jest.mock('../lib/alerts/fetch_index_shard_size', () => ({ + fetchIndexShardSize: jest.fn(), +})); +jest.mock('../lib/alerts/fetch_clusters', () => ({ + fetchClusters: jest.fn(), +})); + +jest.mock('../static_globals', () => ({ + Globals: { + app: { + getLogger: () => ({ debug: jest.fn() }), + url: 'http://localhost:5601', + config: { + ui: { + ccs: { enabled: true }, + metricbeat: { index: 'metricbeat-*' }, + container: { elasticsearch: { enabled: false } }, + }, + }, + }, + }, +})); + +describe('LargeShardSizeAlert', () => { + it('should have defaults', () => { + const alert = new LargeShardSizeAlert() as ILargeShardSizeAlertMock; + expect(alert.alertOptions.id).toBe(ALERT_LARGE_SHARD_SIZE); + expect(alert.alertOptions.name).toBe('Shard size'); + expect(alert.alertOptions.throttle).toBe('12h'); + expect(alert.alertOptions.defaultParams).toStrictEqual({ + threshold: 55, + indexPattern: '-.*', + }); + expect(alert.alertOptions.actionVariables).toStrictEqual([ + { name: 'shardIndex', description: 'The index experiencing large average shard size.' }, + { + name: 'internalShortMessage', + description: 'The short internal message generated by Elastic.', + }, + { + name: 'internalFullMessage', + description: 'The full internal message generated by Elastic.', + }, + { name: 'state', description: 'The current state of the alert.' }, + { name: 'clusterName', description: 'The cluster to which the node(s) belongs.' }, + { name: 'action', description: 'The recommended action for this alert.' }, + { + name: 'actionPlain', + description: 'The recommended action for this alert, without any markdown.', + }, + ]); + }); + describe('execute', () => { + const FakeDate = function () {}; + FakeDate.prototype.valueOf = () => 1; + + const shardIndex = 'apm-8.0.0-onboarding-2021.06.30'; + const shardSize = 0; + const clusterUuid = 'abc123'; + const clusterName = 'testCluster'; + const nodeId = 'myNodeId'; + const nodeName = 'myNodeName'; + const stat = { + shardIndex, + shardSize, + clusterUuid, + nodeId, + nodeName, + }; + + const replaceState = jest.fn(); + const scheduleActions = jest.fn(); + const getState = jest.fn(); + const executorOptions = { + services: { + scopedClusterClient: elasticsearchServiceMock.createScopedClusterClient(), + alertInstanceFactory: jest.fn().mockImplementation(() => { + return { + replaceState, + scheduleActions, + getState, + }; + }), + }, + state: {}, + }; + + beforeEach(() => { + Date = FakeDate as DateConstructor; + (fetchIndexShardSize as jest.Mock).mockImplementation(() => { + return [stat]; + }); + (fetchClusters as jest.Mock).mockImplementation(() => { + return [{ clusterUuid, clusterName }]; + }); + }); + + afterEach(() => { + Date = RealDate; + replaceState.mockReset(); + scheduleActions.mockReset(); + getState.mockReset(); + }); + + it('should fire actions', async () => { + const alert = new LargeShardSizeAlert() as ILargeShardSizeAlertMock; + const type = alert.getAlertType(); + await type.executor({ + ...executorOptions, + params: alert.alertOptions.defaultParams, + } as any); + expect(scheduleActions).toHaveBeenCalledWith('default', { + internalFullMessage: `Large shard size alert is firing for the following index: ${shardIndex}. [View index shard size stats](http://localhost:5601/app/monitoring#/elasticsearch/indices/${shardIndex}?_g=(cluster_uuid:${clusterUuid}))`, + internalShortMessage: `Large shard size alert is firing for the following index: ${shardIndex}. Investigate indices with large shard sizes.`, + action: `[View index shard size stats](http://localhost:5601/app/monitoring#/elasticsearch/indices/${shardIndex}?_g=(cluster_uuid:${clusterUuid}))`, + actionPlain: 'Investigate indices with large shard sizes.', + clusterName, + state: 'firing', + shardIndex, + shardIndices: shardIndex, + }); + }); + + it('should handle ccs', async () => { + const ccs = 'testCluster'; + (fetchIndexShardSize as jest.Mock).mockImplementation(() => { + return [ + { + ...stat, + ccs, + }, + ]; + }); + const alert = new LargeShardSizeAlert() as ILargeShardSizeAlertMock; + const type = alert.getAlertType(); + await type.executor({ + ...executorOptions, + params: alert.alertOptions.defaultParams, + } as any); + expect(scheduleActions).toHaveBeenCalledWith('default', { + internalFullMessage: `Large shard size alert is firing for the following index: ${shardIndex}. [View index shard size stats](http://localhost:5601/app/monitoring#/elasticsearch/indices/${shardIndex}?_g=(cluster_uuid:${clusterUuid},ccs:testCluster))`, + internalShortMessage: `Large shard size alert is firing for the following index: ${shardIndex}. Investigate indices with large shard sizes.`, + action: `[View index shard size stats](http://localhost:5601/app/monitoring#/elasticsearch/indices/${shardIndex}?_g=(cluster_uuid:${clusterUuid},ccs:testCluster))`, + actionPlain: 'Investigate indices with large shard sizes.', + clusterName, + state: 'firing', + shardIndex, + shardIndices: shardIndex, + }); + }); + }); +}); diff --git a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts index 75e22fb41025..a365e530cbd0 100644 --- a/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts +++ b/x-pack/plugins/monitoring/server/alerts/large_shard_size_alert.ts @@ -211,6 +211,11 @@ export class LargeShardSizeAlert extends BaseAlert { internalShortMessage, internalFullMessage, state: AlertingDefaults.ALERT_STATE.firing, + /* continue to send "shardIndices" values for users still using it though + we have replaced it with shardIndex in the template due to alerts per index instead of all indices + see https://github.com/elastic/kibana/issues/100136#issuecomment-865229431 + */ + shardIndices: shardIndex, shardIndex, clusterName: cluster.clusterName, action, diff --git a/x-pack/plugins/translations/translations/ja-JP.json b/x-pack/plugins/translations/translations/ja-JP.json index df454b21ee72..9efedf495737 100644 --- a/x-pack/plugins/translations/translations/ja-JP.json +++ b/x-pack/plugins/translations/translations/ja-JP.json @@ -15669,11 +15669,7 @@ "xpack.monitoring.alerts.badge.panelCategory.errors": "エラーと例外", "xpack.monitoring.alerts.badge.panelCategory.resourceUtilization": "リソースの利用状況", "xpack.monitoring.alerts.badge.panelTitle": "アラート", - "xpack.monitoring.alerts.ccrReadExceptions.actionVariables.followerIndices": "CCR読み取り例外を報告するフォロワーインデックスのリスト。", - "xpack.monitoring.alerts.ccrReadExceptions.actionVariables.remoteClusters": "CCR読み取り例外が発生しているリモートクラスターのリスト。", "xpack.monitoring.alerts.ccrReadExceptions.description": "CCR読み取り例外が検出された場合にアラートを発行します。", - "xpack.monitoring.alerts.ccrReadExceptions.firing.internalFullMessage": "CCR読み取り例外アラートは次のリモートクラスターに対して発行されます。{remoteClustersList}。現在の「follower_index」インデックスが影響を受けます。{followerIndicesList}。{action}", - "xpack.monitoring.alerts.ccrReadExceptions.firing.internalShortMessage": "CCR読み取り例外アラートは次のリモートクラスターに対して発行されます。{remoteClustersList}。{shortActionText}", "xpack.monitoring.alerts.ccrReadExceptions.fullAction": "CCR統計情報を表示", "xpack.monitoring.alerts.ccrReadExceptions.label": "CCR読み取り例外", "xpack.monitoring.alerts.ccrReadExceptions.paramDetails.duration.label": "最後の", diff --git a/x-pack/plugins/translations/translations/zh-CN.json b/x-pack/plugins/translations/translations/zh-CN.json index 447ba99945ed..384237e2d140 100644 --- a/x-pack/plugins/translations/translations/zh-CN.json +++ b/x-pack/plugins/translations/translations/zh-CN.json @@ -15901,11 +15901,7 @@ "xpack.monitoring.alerts.badge.panelCategory.errors": "错误和异常", "xpack.monitoring.alerts.badge.panelCategory.resourceUtilization": "资源使用率", "xpack.monitoring.alerts.badge.panelTitle": "告警", - "xpack.monitoring.alerts.ccrReadExceptions.actionVariables.followerIndices": "报告 CCR 读取异常的 Follower 索引列表。", - "xpack.monitoring.alerts.ccrReadExceptions.actionVariables.remoteClusters": "有 CCR 读取异常的远程集群列表。", "xpack.monitoring.alerts.ccrReadExceptions.description": "检测到任何 CCR 读取异常时告警。", - "xpack.monitoring.alerts.ccrReadExceptions.firing.internalFullMessage": "以下远程集群触发 CCR 读取异常告警:{remoteClustersList}。当前“follower_index”索引受影响:{followerIndicesList}。{action}", - "xpack.monitoring.alerts.ccrReadExceptions.firing.internalShortMessage": "以下远程集群触发 CCR 读取异常告警:{remoteClustersList}。{shortActionText}", "xpack.monitoring.alerts.ccrReadExceptions.fullAction": "查看 CCR 统计", "xpack.monitoring.alerts.ccrReadExceptions.label": "CCR 读取异常", "xpack.monitoring.alerts.ccrReadExceptions.paramDetails.duration.label": "过去",