[APM] Index reason field for alerts (#108019)

This commit is contained in:
Dario Gieselaar 2021-08-13 13:05:45 +02:00 committed by GitHub
parent 3b4dca1efb
commit 444355cdc3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 160 additions and 41 deletions

View file

@ -7,6 +7,7 @@
import { i18n } from '@kbn/i18n';
import type { ValuesType } from 'utility-types';
import type { AsDuration, AsPercent } from '../../observability/common';
import type { ActionGroup } from '../../alerting/common';
import { ANOMALY_SEVERITY, ANOMALY_THRESHOLD } from './ml_constants';
@ -28,6 +29,89 @@ const THRESHOLD_MET_GROUP: ActionGroup<ThresholdMetActionGroupId> = {
}),
};
export function formatErrorCountReason({
threshold,
measured,
serviceName,
}: {
threshold: number;
measured: number;
serviceName: string;
}) {
return i18n.translate('xpack.apm.alertTypes.errorCount.reason', {
defaultMessage: `Error count is greater than {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold,
measured,
serviceName,
},
});
}
export function formatTransactionDurationReason({
threshold,
measured,
serviceName,
asDuration,
}: {
threshold: number;
measured: number;
serviceName: string;
asDuration: AsDuration;
}) {
return i18n.translate('xpack.apm.alertTypes.transactionDuration.reason', {
defaultMessage: `Latency is above {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold: asDuration(threshold),
measured: asDuration(measured),
serviceName,
},
});
}
export function formatTransactionErrorRateReason({
threshold,
measured,
serviceName,
asPercent,
}: {
threshold: number;
measured: number;
serviceName: string;
asPercent: AsPercent;
}) {
return i18n.translate('xpack.apm.alertTypes.transactionErrorRate.reason', {
defaultMessage: `Failed transactions rate is greater than {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold: asPercent(threshold, 100),
measured: asPercent(measured, 100),
serviceName,
},
});
}
export function formatTransactionDurationAnomalyReason({
serviceName,
severityLevel,
measured,
}: {
serviceName: string;
severityLevel: string;
measured: number;
}) {
return i18n.translate(
'xpack.apm.alertTypes.transactionDurationAnomaly.reason',
{
defaultMessage: `{severityLevel} anomaly detected for {serviceName} (score was {measured})`,
values: {
serviceName,
severityLevel,
measured,
},
}
);
}
export const ALERT_TYPES_CONFIG: Record<
AlertType,
{

View file

@ -21,7 +21,13 @@ import {
} from '@kbn/rule-data-utils/target_node/technical_field_names';
import type { ObservabilityRuleTypeRegistry } from '../../../../observability/public';
import { ENVIRONMENT_ALL } from '../../../common/environment_filter_values';
import { AlertType } from '../../../common/alert_types';
import {
AlertType,
formatErrorCountReason,
formatTransactionDurationAnomalyReason,
formatTransactionDurationReason,
formatTransactionErrorRateReason,
} from '../../../common/alert_types';
// copied from elasticsearch_fieldnames.ts to limit page load bundle size
const SERVICE_ENVIRONMENT = 'service.environment';
@ -53,13 +59,10 @@ export function registerApmAlerts(
}),
format: ({ fields }) => {
return {
reason: i18n.translate('xpack.apm.alertTypes.errorCount.reason', {
defaultMessage: `Error count is greater than {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold: fields[ALERT_EVALUATION_THRESHOLD],
measured: fields[ALERT_EVALUATION_VALUE],
serviceName: String(fields[SERVICE_NAME][0]),
},
reason: formatErrorCountReason({
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
measured: fields[ALERT_EVALUATION_VALUE]!,
serviceName: String(fields[SERVICE_NAME][0]),
}),
link: format({
pathname: `/app/apm/services/${String(
@ -105,17 +108,12 @@ export function registerApmAlerts(
}
),
format: ({ fields, formatters: { asDuration } }) => ({
reason: i18n.translate(
'xpack.apm.alertTypes.transactionDuration.reason',
{
defaultMessage: `Latency is above {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold: asDuration(fields[ALERT_EVALUATION_THRESHOLD]),
measured: asDuration(fields[ALERT_EVALUATION_VALUE]),
serviceName: String(fields[SERVICE_NAME][0]),
},
}
),
reason: formatTransactionDurationReason({
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
measured: fields[ALERT_EVALUATION_VALUE]!,
serviceName: String(fields[SERVICE_NAME][0]),
asDuration,
}),
link: format({
pathname: `/app/apm/services/${fields[SERVICE_NAME][0]!}`,
query: {
@ -161,17 +159,12 @@ export function registerApmAlerts(
}
),
format: ({ fields, formatters: { asPercent } }) => ({
reason: i18n.translate(
'xpack.apm.alertTypes.transactionErrorRate.reason',
{
defaultMessage: `Failed transactions rate is greater than {threshold} (current value is {measured}) for {serviceName}`,
values: {
threshold: asPercent(fields[ALERT_EVALUATION_THRESHOLD], 100),
measured: asPercent(fields[ALERT_EVALUATION_VALUE], 100),
serviceName: String(fields[SERVICE_NAME][0]),
},
}
),
reason: formatTransactionErrorRateReason({
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
measured: fields[ALERT_EVALUATION_VALUE]!,
serviceName: String(fields[SERVICE_NAME][0]),
asPercent,
}),
link: format({
pathname: `/app/apm/services/${String(fields[SERVICE_NAME][0]!)}`,
query: {
@ -216,17 +209,11 @@ export function registerApmAlerts(
}
),
format: ({ fields }) => ({
reason: i18n.translate(
'xpack.apm.alertTypes.transactionDurationAnomaly.reason',
{
defaultMessage: `{severityLevel} anomaly detected for {serviceName} (score was {measured})`,
values: {
serviceName: String(fields[SERVICE_NAME][0]),
severityLevel: String(fields[ALERT_SEVERITY_LEVEL]),
measured: Number(fields[ALERT_EVALUATION_VALUE]),
},
}
),
reason: formatTransactionDurationAnomalyReason({
serviceName: String(fields[SERVICE_NAME][0]),
severityLevel: String(fields[ALERT_SEVERITY_LEVEL]),
measured: Number(fields[ALERT_EVALUATION_VALUE]),
}),
link: format({
pathname: `/app/apm/services/${String(fields[SERVICE_NAME][0])}`,
query: {

View file

@ -10,10 +10,12 @@ import { take } from 'rxjs/operators';
import type {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
ALERT_REASON as ALERT_REASON_TYPED,
} from '@kbn/rule-data-utils';
import {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
ALERT_REASON as ALERT_REASON_NON_TYPED,
// @ts-expect-error
} from '@kbn/rule-data-utils/target_node/technical_field_names';
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
@ -26,6 +28,7 @@ import {
AlertType,
APM_SERVER_FEATURE_ID,
ALERT_TYPES_CONFIG,
formatErrorCountReason,
} from '../../../common/alert_types';
import {
PROCESSOR_EVENT,
@ -41,6 +44,7 @@ import { RegisterRuleDependencies } from './register_apm_alerts';
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
const paramsSchema = schema.object({
windowSize: schema.number(),
@ -158,6 +162,11 @@ export function registerErrorCountAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.error,
[ALERT_EVALUATION_VALUE]: errorCount,
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
[ALERT_REASON]: formatErrorCountReason({
serviceName,
threshold: alertParams.threshold,
measured: errorCount,
}),
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {

View file

@ -11,12 +11,15 @@ import { QueryDslQueryContainer } from '@elastic/elasticsearch/api/types';
import type {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
ALERT_REASON as ALERT_REASON_TYPED,
} from '@kbn/rule-data-utils';
import {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
ALERT_REASON as ALERT_REASON_NON_TYPED,
// @ts-expect-error
} from '@kbn/rule-data-utils/target_node/technical_field_names';
import { asDuration } from '../../../../observability/common/utils/formatters';
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
import {
getEnvironmentLabel,
@ -26,6 +29,7 @@ import {
AlertType,
APM_SERVER_FEATURE_ID,
ALERT_TYPES_CONFIG,
formatTransactionDurationReason,
} from '../../../common/alert_types';
import {
PROCESSOR_EVENT,
@ -43,6 +47,7 @@ import { RegisterRuleDependencies } from './register_apm_alerts';
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
const paramsSchema = schema.object({
serviceName: schema.string(),
@ -178,6 +183,12 @@ export function registerTransactionDurationAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: transactionDuration,
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
[ALERT_REASON]: formatTransactionDurationReason({
measured: transactionDuration,
serviceName: alertParams.serviceName,
threshold: alertParams.threshold,
asDuration,
}),
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {

View file

@ -14,12 +14,14 @@ import type {
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
ALERT_SEVERITY_LEVEL as ALERT_SEVERITY_LEVEL_TYPED,
ALERT_SEVERITY_VALUE as ALERT_SEVERITY_VALUE_TYPED,
ALERT_REASON as ALERT_REASON_TYPED,
} from '@kbn/rule-data-utils';
import {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
ALERT_SEVERITY_LEVEL as ALERT_SEVERITY_LEVEL_NON_TYPED,
ALERT_SEVERITY_VALUE as ALERT_SEVERITY_VALUE_NON_TYPED,
ALERT_REASON as ALERT_REASON_NON_TYPED,
// @ts-expect-error
} from '@kbn/rule-data-utils/target_node/technical_field_names';
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
@ -37,6 +39,7 @@ import {
AlertType,
ALERT_TYPES_CONFIG,
ANOMALY_ALERT_SEVERITY_TYPES,
formatTransactionDurationAnomalyReason,
} from '../../../common/alert_types';
import { getMLJobs } from '../service_map/get_service_anomalies';
import { apmActionVariables } from './action_variables';
@ -50,6 +53,7 @@ const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALER
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
const ALERT_SEVERITY_LEVEL: typeof ALERT_SEVERITY_LEVEL_TYPED = ALERT_SEVERITY_LEVEL_NON_TYPED;
const ALERT_SEVERITY_VALUE: typeof ALERT_SEVERITY_VALUE_TYPED = ALERT_SEVERITY_VALUE_NON_TYPED;
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
const paramsSchema = schema.object({
serviceName: schema.maybe(schema.string()),
@ -258,6 +262,11 @@ export function registerTransactionDurationAnomalyAlertType({
[ALERT_SEVERITY_VALUE]: score,
[ALERT_EVALUATION_VALUE]: score,
[ALERT_EVALUATION_THRESHOLD]: threshold,
[ALERT_REASON]: formatTransactionDurationAnomalyReason({
measured: score,
serviceName,
severityLevel,
}),
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {

View file

@ -10,10 +10,12 @@ import { take } from 'rxjs/operators';
import type {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
ALERT_REASON as ALERT_REASON_TYPED,
} from '@kbn/rule-data-utils';
import {
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
ALERT_REASON as ALERT_REASON_NON_TYPED,
// @ts-expect-error
} from '@kbn/rule-data-utils/target_node/technical_field_names';
import {
@ -26,6 +28,7 @@ import {
AlertType,
ALERT_TYPES_CONFIG,
APM_SERVER_FEATURE_ID,
formatTransactionErrorRateReason,
} from '../../../common/alert_types';
import {
EVENT_OUTCOME,
@ -42,9 +45,11 @@ import { getApmIndices } from '../settings/apm_indices/get_apm_indices';
import { apmActionVariables } from './action_variables';
import { alertingEsClient } from './alerting_es_client';
import { RegisterRuleDependencies } from './register_apm_alerts';
import { asPercent } from '../../../../observability/common/utils/formatters';
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
const paramsSchema = schema.object({
windowSize: schema.number(),
@ -217,6 +222,12 @@ export function registerTransactionErrorRateAlertType({
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
[ALERT_EVALUATION_VALUE]: errorRate,
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
[ALERT_REASON]: formatTransactionErrorRateReason({
threshold: alertParams.threshold,
measured: errorRate,
asPercent,
serviceName,
}),
},
})
.scheduleActions(alertTypeConfig.defaultActionGroupId, {

View file

@ -5,6 +5,8 @@
* 2.0.
*/
export type { AsDuration, AsPercent } from './utils/formatters';
export const casesFeatureId = 'observabilityCases';
// The ID of the observability app. Should more appropriately be called

View file

@ -369,6 +369,9 @@ export default function ApiTest({ getService }: FtrProviderContext) {
"kibana.alert.id": Array [
"apm.transaction_error_rate_opbeans-go_request_ENVIRONMENT_NOT_DEFINED",
],
"kibana.alert.reason": Array [
"Failed transactions rate is greater than 30% (current value is 50%) for opbeans-go",
],
"kibana.alert.rule.category": Array [
"Failed transaction rate threshold",
],
@ -473,6 +476,9 @@ export default function ApiTest({ getService }: FtrProviderContext) {
"kibana.alert.id": Array [
"apm.transaction_error_rate_opbeans-go_request_ENVIRONMENT_NOT_DEFINED",
],
"kibana.alert.reason": Array [
"Failed transactions rate is greater than 30% (current value is 50%) for opbeans-go",
],
"kibana.alert.rule.category": Array [
"Failed transaction rate threshold",
],