[APM] Index reason field for alerts (#108019)
This commit is contained in:
parent
3b4dca1efb
commit
444355cdc3
|
@ -7,6 +7,7 @@
|
|||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import type { ValuesType } from 'utility-types';
|
||||
import type { AsDuration, AsPercent } from '../../observability/common';
|
||||
import type { ActionGroup } from '../../alerting/common';
|
||||
import { ANOMALY_SEVERITY, ANOMALY_THRESHOLD } from './ml_constants';
|
||||
|
||||
|
@ -28,6 +29,89 @@ const THRESHOLD_MET_GROUP: ActionGroup<ThresholdMetActionGroupId> = {
|
|||
}),
|
||||
};
|
||||
|
||||
export function formatErrorCountReason({
|
||||
threshold,
|
||||
measured,
|
||||
serviceName,
|
||||
}: {
|
||||
threshold: number;
|
||||
measured: number;
|
||||
serviceName: string;
|
||||
}) {
|
||||
return i18n.translate('xpack.apm.alertTypes.errorCount.reason', {
|
||||
defaultMessage: `Error count is greater than {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold,
|
||||
measured,
|
||||
serviceName,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function formatTransactionDurationReason({
|
||||
threshold,
|
||||
measured,
|
||||
serviceName,
|
||||
asDuration,
|
||||
}: {
|
||||
threshold: number;
|
||||
measured: number;
|
||||
serviceName: string;
|
||||
asDuration: AsDuration;
|
||||
}) {
|
||||
return i18n.translate('xpack.apm.alertTypes.transactionDuration.reason', {
|
||||
defaultMessage: `Latency is above {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold: asDuration(threshold),
|
||||
measured: asDuration(measured),
|
||||
serviceName,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function formatTransactionErrorRateReason({
|
||||
threshold,
|
||||
measured,
|
||||
serviceName,
|
||||
asPercent,
|
||||
}: {
|
||||
threshold: number;
|
||||
measured: number;
|
||||
serviceName: string;
|
||||
asPercent: AsPercent;
|
||||
}) {
|
||||
return i18n.translate('xpack.apm.alertTypes.transactionErrorRate.reason', {
|
||||
defaultMessage: `Failed transactions rate is greater than {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold: asPercent(threshold, 100),
|
||||
measured: asPercent(measured, 100),
|
||||
serviceName,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export function formatTransactionDurationAnomalyReason({
|
||||
serviceName,
|
||||
severityLevel,
|
||||
measured,
|
||||
}: {
|
||||
serviceName: string;
|
||||
severityLevel: string;
|
||||
measured: number;
|
||||
}) {
|
||||
return i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionDurationAnomaly.reason',
|
||||
{
|
||||
defaultMessage: `{severityLevel} anomaly detected for {serviceName} (score was {measured})`,
|
||||
values: {
|
||||
serviceName,
|
||||
severityLevel,
|
||||
measured,
|
||||
},
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
export const ALERT_TYPES_CONFIG: Record<
|
||||
AlertType,
|
||||
{
|
||||
|
|
|
@ -21,7 +21,13 @@ import {
|
|||
} from '@kbn/rule-data-utils/target_node/technical_field_names';
|
||||
import type { ObservabilityRuleTypeRegistry } from '../../../../observability/public';
|
||||
import { ENVIRONMENT_ALL } from '../../../common/environment_filter_values';
|
||||
import { AlertType } from '../../../common/alert_types';
|
||||
import {
|
||||
AlertType,
|
||||
formatErrorCountReason,
|
||||
formatTransactionDurationAnomalyReason,
|
||||
formatTransactionDurationReason,
|
||||
formatTransactionErrorRateReason,
|
||||
} from '../../../common/alert_types';
|
||||
|
||||
// copied from elasticsearch_fieldnames.ts to limit page load bundle size
|
||||
const SERVICE_ENVIRONMENT = 'service.environment';
|
||||
|
@ -53,13 +59,10 @@ export function registerApmAlerts(
|
|||
}),
|
||||
format: ({ fields }) => {
|
||||
return {
|
||||
reason: i18n.translate('xpack.apm.alertTypes.errorCount.reason', {
|
||||
defaultMessage: `Error count is greater than {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold: fields[ALERT_EVALUATION_THRESHOLD],
|
||||
measured: fields[ALERT_EVALUATION_VALUE],
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
},
|
||||
reason: formatErrorCountReason({
|
||||
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
|
||||
measured: fields[ALERT_EVALUATION_VALUE]!,
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
}),
|
||||
link: format({
|
||||
pathname: `/app/apm/services/${String(
|
||||
|
@ -105,17 +108,12 @@ export function registerApmAlerts(
|
|||
}
|
||||
),
|
||||
format: ({ fields, formatters: { asDuration } }) => ({
|
||||
reason: i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionDuration.reason',
|
||||
{
|
||||
defaultMessage: `Latency is above {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold: asDuration(fields[ALERT_EVALUATION_THRESHOLD]),
|
||||
measured: asDuration(fields[ALERT_EVALUATION_VALUE]),
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
},
|
||||
}
|
||||
),
|
||||
reason: formatTransactionDurationReason({
|
||||
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
|
||||
measured: fields[ALERT_EVALUATION_VALUE]!,
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
asDuration,
|
||||
}),
|
||||
link: format({
|
||||
pathname: `/app/apm/services/${fields[SERVICE_NAME][0]!}`,
|
||||
query: {
|
||||
|
@ -161,17 +159,12 @@ export function registerApmAlerts(
|
|||
}
|
||||
),
|
||||
format: ({ fields, formatters: { asPercent } }) => ({
|
||||
reason: i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionErrorRate.reason',
|
||||
{
|
||||
defaultMessage: `Failed transactions rate is greater than {threshold} (current value is {measured}) for {serviceName}`,
|
||||
values: {
|
||||
threshold: asPercent(fields[ALERT_EVALUATION_THRESHOLD], 100),
|
||||
measured: asPercent(fields[ALERT_EVALUATION_VALUE], 100),
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
},
|
||||
}
|
||||
),
|
||||
reason: formatTransactionErrorRateReason({
|
||||
threshold: fields[ALERT_EVALUATION_THRESHOLD]!,
|
||||
measured: fields[ALERT_EVALUATION_VALUE]!,
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
asPercent,
|
||||
}),
|
||||
link: format({
|
||||
pathname: `/app/apm/services/${String(fields[SERVICE_NAME][0]!)}`,
|
||||
query: {
|
||||
|
@ -216,17 +209,11 @@ export function registerApmAlerts(
|
|||
}
|
||||
),
|
||||
format: ({ fields }) => ({
|
||||
reason: i18n.translate(
|
||||
'xpack.apm.alertTypes.transactionDurationAnomaly.reason',
|
||||
{
|
||||
defaultMessage: `{severityLevel} anomaly detected for {serviceName} (score was {measured})`,
|
||||
values: {
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
severityLevel: String(fields[ALERT_SEVERITY_LEVEL]),
|
||||
measured: Number(fields[ALERT_EVALUATION_VALUE]),
|
||||
},
|
||||
}
|
||||
),
|
||||
reason: formatTransactionDurationAnomalyReason({
|
||||
serviceName: String(fields[SERVICE_NAME][0]),
|
||||
severityLevel: String(fields[ALERT_SEVERITY_LEVEL]),
|
||||
measured: Number(fields[ALERT_EVALUATION_VALUE]),
|
||||
}),
|
||||
link: format({
|
||||
pathname: `/app/apm/services/${String(fields[SERVICE_NAME][0])}`,
|
||||
query: {
|
||||
|
|
|
@ -10,10 +10,12 @@ import { take } from 'rxjs/operators';
|
|||
import type {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_TYPED,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_NON_TYPED,
|
||||
// @ts-expect-error
|
||||
} from '@kbn/rule-data-utils/target_node/technical_field_names';
|
||||
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
|
||||
|
@ -26,6 +28,7 @@ import {
|
|||
AlertType,
|
||||
APM_SERVER_FEATURE_ID,
|
||||
ALERT_TYPES_CONFIG,
|
||||
formatErrorCountReason,
|
||||
} from '../../../common/alert_types';
|
||||
import {
|
||||
PROCESSOR_EVENT,
|
||||
|
@ -41,6 +44,7 @@ import { RegisterRuleDependencies } from './register_apm_alerts';
|
|||
|
||||
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
|
||||
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
|
||||
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
|
||||
|
||||
const paramsSchema = schema.object({
|
||||
windowSize: schema.number(),
|
||||
|
@ -158,6 +162,11 @@ export function registerErrorCountAlertType({
|
|||
[PROCESSOR_EVENT]: ProcessorEvent.error,
|
||||
[ALERT_EVALUATION_VALUE]: errorCount,
|
||||
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
|
||||
[ALERT_REASON]: formatErrorCountReason({
|
||||
serviceName,
|
||||
threshold: alertParams.threshold,
|
||||
measured: errorCount,
|
||||
}),
|
||||
},
|
||||
})
|
||||
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
|
||||
|
|
|
@ -11,12 +11,15 @@ import { QueryDslQueryContainer } from '@elastic/elasticsearch/api/types';
|
|||
import type {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_TYPED,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_NON_TYPED,
|
||||
// @ts-expect-error
|
||||
} from '@kbn/rule-data-utils/target_node/technical_field_names';
|
||||
import { asDuration } from '../../../../observability/common/utils/formatters';
|
||||
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
|
||||
import {
|
||||
getEnvironmentLabel,
|
||||
|
@ -26,6 +29,7 @@ import {
|
|||
AlertType,
|
||||
APM_SERVER_FEATURE_ID,
|
||||
ALERT_TYPES_CONFIG,
|
||||
formatTransactionDurationReason,
|
||||
} from '../../../common/alert_types';
|
||||
import {
|
||||
PROCESSOR_EVENT,
|
||||
|
@ -43,6 +47,7 @@ import { RegisterRuleDependencies } from './register_apm_alerts';
|
|||
|
||||
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
|
||||
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
|
||||
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
|
||||
|
||||
const paramsSchema = schema.object({
|
||||
serviceName: schema.string(),
|
||||
|
@ -178,6 +183,12 @@ export function registerTransactionDurationAlertType({
|
|||
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
|
||||
[ALERT_EVALUATION_VALUE]: transactionDuration,
|
||||
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
|
||||
[ALERT_REASON]: formatTransactionDurationReason({
|
||||
measured: transactionDuration,
|
||||
serviceName: alertParams.serviceName,
|
||||
threshold: alertParams.threshold,
|
||||
asDuration,
|
||||
}),
|
||||
},
|
||||
})
|
||||
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
|
||||
|
|
|
@ -14,12 +14,14 @@ import type {
|
|||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
|
||||
ALERT_SEVERITY_LEVEL as ALERT_SEVERITY_LEVEL_TYPED,
|
||||
ALERT_SEVERITY_VALUE as ALERT_SEVERITY_VALUE_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_TYPED,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
|
||||
ALERT_SEVERITY_LEVEL as ALERT_SEVERITY_LEVEL_NON_TYPED,
|
||||
ALERT_SEVERITY_VALUE as ALERT_SEVERITY_VALUE_NON_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_NON_TYPED,
|
||||
// @ts-expect-error
|
||||
} from '@kbn/rule-data-utils/target_node/technical_field_names';
|
||||
import { createLifecycleRuleTypeFactory } from '../../../../rule_registry/server';
|
||||
|
@ -37,6 +39,7 @@ import {
|
|||
AlertType,
|
||||
ALERT_TYPES_CONFIG,
|
||||
ANOMALY_ALERT_SEVERITY_TYPES,
|
||||
formatTransactionDurationAnomalyReason,
|
||||
} from '../../../common/alert_types';
|
||||
import { getMLJobs } from '../service_map/get_service_anomalies';
|
||||
import { apmActionVariables } from './action_variables';
|
||||
|
@ -50,6 +53,7 @@ const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALER
|
|||
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
|
||||
const ALERT_SEVERITY_LEVEL: typeof ALERT_SEVERITY_LEVEL_TYPED = ALERT_SEVERITY_LEVEL_NON_TYPED;
|
||||
const ALERT_SEVERITY_VALUE: typeof ALERT_SEVERITY_VALUE_TYPED = ALERT_SEVERITY_VALUE_NON_TYPED;
|
||||
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
|
||||
|
||||
const paramsSchema = schema.object({
|
||||
serviceName: schema.maybe(schema.string()),
|
||||
|
@ -258,6 +262,11 @@ export function registerTransactionDurationAnomalyAlertType({
|
|||
[ALERT_SEVERITY_VALUE]: score,
|
||||
[ALERT_EVALUATION_VALUE]: score,
|
||||
[ALERT_EVALUATION_THRESHOLD]: threshold,
|
||||
[ALERT_REASON]: formatTransactionDurationAnomalyReason({
|
||||
measured: score,
|
||||
serviceName,
|
||||
severityLevel,
|
||||
}),
|
||||
},
|
||||
})
|
||||
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
|
||||
|
|
|
@ -10,10 +10,12 @@ import { take } from 'rxjs/operators';
|
|||
import type {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_TYPED,
|
||||
} from '@kbn/rule-data-utils';
|
||||
import {
|
||||
ALERT_EVALUATION_THRESHOLD as ALERT_EVALUATION_THRESHOLD_NON_TYPED,
|
||||
ALERT_EVALUATION_VALUE as ALERT_EVALUATION_VALUE_NON_TYPED,
|
||||
ALERT_REASON as ALERT_REASON_NON_TYPED,
|
||||
// @ts-expect-error
|
||||
} from '@kbn/rule-data-utils/target_node/technical_field_names';
|
||||
import {
|
||||
|
@ -26,6 +28,7 @@ import {
|
|||
AlertType,
|
||||
ALERT_TYPES_CONFIG,
|
||||
APM_SERVER_FEATURE_ID,
|
||||
formatTransactionErrorRateReason,
|
||||
} from '../../../common/alert_types';
|
||||
import {
|
||||
EVENT_OUTCOME,
|
||||
|
@ -42,9 +45,11 @@ import { getApmIndices } from '../settings/apm_indices/get_apm_indices';
|
|||
import { apmActionVariables } from './action_variables';
|
||||
import { alertingEsClient } from './alerting_es_client';
|
||||
import { RegisterRuleDependencies } from './register_apm_alerts';
|
||||
import { asPercent } from '../../../../observability/common/utils/formatters';
|
||||
|
||||
const ALERT_EVALUATION_THRESHOLD: typeof ALERT_EVALUATION_THRESHOLD_TYPED = ALERT_EVALUATION_THRESHOLD_NON_TYPED;
|
||||
const ALERT_EVALUATION_VALUE: typeof ALERT_EVALUATION_VALUE_TYPED = ALERT_EVALUATION_VALUE_NON_TYPED;
|
||||
const ALERT_REASON: typeof ALERT_REASON_TYPED = ALERT_REASON_NON_TYPED;
|
||||
|
||||
const paramsSchema = schema.object({
|
||||
windowSize: schema.number(),
|
||||
|
@ -217,6 +222,12 @@ export function registerTransactionErrorRateAlertType({
|
|||
[PROCESSOR_EVENT]: ProcessorEvent.transaction,
|
||||
[ALERT_EVALUATION_VALUE]: errorRate,
|
||||
[ALERT_EVALUATION_THRESHOLD]: alertParams.threshold,
|
||||
[ALERT_REASON]: formatTransactionErrorRateReason({
|
||||
threshold: alertParams.threshold,
|
||||
measured: errorRate,
|
||||
asPercent,
|
||||
serviceName,
|
||||
}),
|
||||
},
|
||||
})
|
||||
.scheduleActions(alertTypeConfig.defaultActionGroupId, {
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
export type { AsDuration, AsPercent } from './utils/formatters';
|
||||
|
||||
export const casesFeatureId = 'observabilityCases';
|
||||
|
||||
// The ID of the observability app. Should more appropriately be called
|
||||
|
|
|
@ -369,6 +369,9 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
"kibana.alert.id": Array [
|
||||
"apm.transaction_error_rate_opbeans-go_request_ENVIRONMENT_NOT_DEFINED",
|
||||
],
|
||||
"kibana.alert.reason": Array [
|
||||
"Failed transactions rate is greater than 30% (current value is 50%) for opbeans-go",
|
||||
],
|
||||
"kibana.alert.rule.category": Array [
|
||||
"Failed transaction rate threshold",
|
||||
],
|
||||
|
@ -473,6 +476,9 @@ export default function ApiTest({ getService }: FtrProviderContext) {
|
|||
"kibana.alert.id": Array [
|
||||
"apm.transaction_error_rate_opbeans-go_request_ENVIRONMENT_NOT_DEFINED",
|
||||
],
|
||||
"kibana.alert.reason": Array [
|
||||
"Failed transactions rate is greater than 30% (current value is 50%) for opbeans-go",
|
||||
],
|
||||
"kibana.alert.rule.category": Array [
|
||||
"Failed transaction rate threshold",
|
||||
],
|
||||
|
|
Loading…
Reference in a new issue