[ML] Convert APM transaction anomaly detection job to analyze metric data (#111957)

* [ML] Convert APM transaction anomaly detection job to analyze metric data

* [ML] Fix test and edit manifest query and defaultIndexPattern

* Remove queries for ml module, upgrade version

* Update ML anomaly searches

* Add metricset.name term query to datafeed

* Add metricset.name/processor.event queries to module setup

* [ML] Edit setup module API integration test to not start datafeed

* [ML] Edit aggregation names

* Edit home.spec E2E test URLs

Co-authored-by: Dario Gieselaar <dario.gieselaar@elastic.co>
This commit is contained in:
Pete Harverson 2021-09-29 17:52:00 +01:00 committed by GitHub
parent f4a95f9b97
commit 408cf173fa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 216 additions and 81 deletions

View file

@ -33,6 +33,8 @@ export function getSeverityColor(score: number) {
return mlGetSeverityColor(score);
}
export const ML_TRANSACTION_LATENCY_DETECTOR_INDEX = 0;
export const ML_ERRORS = {
INVALID_LICENSE: i18n.translate(
'xpack.apm.anomaly_detection.error.invalid_license',

View file

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export function apmMlAnomalyQuery(detectorIndex: 0 | 1 | 2) {
return [
{
bool: {
filter: [
{
terms: {
result_type: ['model_plot', 'record'],
},
},
{
term: { detector_index: detectorIndex },
},
],
},
},
];
}

View file

@ -17,11 +17,11 @@ const serviceInventoryHref = url.format({
const apisToIntercept = [
{
endpoint: '/api/apm/service',
endpoint: '/api/apm/service?*',
name: 'servicesMainStatistics',
},
{
endpoint: '/api/apm/services/detailed_statistics',
endpoint: '/api/apm/services/detailed_statistics?*',
name: 'servicesDetailedStatistics',
},
];

View file

@ -5,21 +5,21 @@
* 2.0.
*/
import { Logger } from 'kibana/server';
import uuid from 'uuid/v4';
import { snakeCase } from 'lodash';
import Boom from '@hapi/boom';
import { Logger } from 'kibana/server';
import { snakeCase } from 'lodash';
import moment from 'moment';
import uuid from 'uuid/v4';
import { ML_ERRORS } from '../../../common/anomaly_detection';
import { ProcessorEvent } from '../../../common/processor_event';
import { environmentQuery } from '../../../common/utils/environment_query';
import { Setup } from '../helpers/setup_request';
import {
TRANSACTION_DURATION,
METRICSET_NAME,
PROCESSOR_EVENT,
} from '../../../common/elasticsearch_fieldnames';
import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants';
import { ProcessorEvent } from '../../../common/processor_event';
import { environmentQuery } from '../../../common/utils/environment_query';
import { withApmSpan } from '../../utils/with_apm_span';
import { Setup } from '../helpers/setup_request';
import { APM_ML_JOB_GROUP, ML_MODULE_ID_APM_TRANSACTION } from './constants';
import { getAnomalyDetectionJobs } from './get_anomaly_detection_jobs';
export async function createAnomalyDetectionJobs(
@ -50,7 +50,7 @@ export async function createAnomalyDetectionJobs(
`Creating ML anomaly detection jobs for environments: [${uniqueMlJobEnvs}].`
);
const indexPatternName = indices['apm_oss.transactionIndices'];
const indexPatternName = indices['apm_oss.metricsIndices'];
const responses = await Promise.all(
uniqueMlJobEnvs.map((environment) =>
createAnomalyDetectionJob({ ml, environment, indexPatternName })
@ -92,8 +92,8 @@ async function createAnomalyDetectionJob({
query: {
bool: {
filter: [
{ term: { [PROCESSOR_EVENT]: ProcessorEvent.transaction } },
{ exists: { field: TRANSACTION_DURATION } },
{ term: { [PROCESSOR_EVENT]: ProcessorEvent.metric } },
{ term: { [METRICSET_NAME]: 'transaction' } },
...environmentQuery(environment),
],
},
@ -105,7 +105,7 @@ async function createAnomalyDetectionJob({
job_tags: {
environment,
// identifies this as an APM ML job & facilitates future migrations
apm_ml_version: 2,
apm_ml_version: 3,
},
},
},

View file

@ -11,7 +11,11 @@ import { estypes } from '@elastic/elasticsearch';
import { ESSearchResponse } from '../../../../../../src/core/types/elasticsearch';
import { MlPluginSetup } from '../../../../ml/server';
import { PromiseReturnType } from '../../../../observability/typings/common';
import { getSeverity, ML_ERRORS } from '../../../common/anomaly_detection';
import {
getSeverity,
ML_ERRORS,
ML_TRANSACTION_LATENCY_DETECTOR_INDEX,
} from '../../../common/anomaly_detection';
import { ENVIRONMENT_ALL } from '../../../common/environment_filter_values';
import { getServiceHealthStatus } from '../../../common/service_health_status';
import {
@ -22,6 +26,7 @@ import { rangeQuery } from '../../../../observability/server';
import { withApmSpan } from '../../utils/with_apm_span';
import { getMlJobsWithAPMGroup } from '../anomaly_detection/get_ml_jobs_with_apm_group';
import { Setup } from '../helpers/setup_request';
import { apmMlAnomalyQuery } from '../../../common/utils/apm_ml_anomaly_query';
export const DEFAULT_ANOMALIES: ServiceAnomaliesResponse = {
mlJobIds: [],
@ -56,7 +61,7 @@ export async function getServiceAnomalies({
query: {
bool: {
filter: [
{ terms: { result_type: ['model_plot', 'record'] } },
...apmMlAnomalyQuery(ML_TRANSACTION_LATENCY_DETECTOR_INDEX),
...rangeQuery(
Math.min(end - 30 * 60 * 1000, start),
end,

View file

@ -12,6 +12,8 @@ import { rangeQuery } from '../../../../../observability/server';
import { asMutableArray } from '../../../../common/utils/as_mutable_array';
import { withApmSpan } from '../../../utils/with_apm_span';
import { Setup } from '../../helpers/setup_request';
import { apmMlAnomalyQuery } from '../../../../common/utils/apm_ml_anomaly_query';
import { ML_TRANSACTION_LATENCY_DETECTOR_INDEX } from '../../../../common/anomaly_detection';
export type ESResponse = Exclude<
PromiseReturnType<typeof anomalySeriesFetcher>,
@ -40,7 +42,7 @@ export function anomalySeriesFetcher({
query: {
bool: {
filter: [
{ terms: { result_type: ['model_plot', 'record'] } },
...apmMlAnomalyQuery(ML_TRANSACTION_LATENCY_DETECTOR_INDEX),
{ term: { partition_field_value: serviceName } },
{ term: { by_field_value: transactionType } },
...rangeQuery(start, end, 'timestamp'),

View file

@ -269,7 +269,7 @@ export const JobSettingsForm: FC<JobSettingsFormProps> = ({
>
<FormattedMessage
id="xpack.ml.newJob.recognize.createJobButtonLabel"
defaultMessage="Create {numberOfJobs, plural, zero {Job} one {Job} other {Jobs}}"
defaultMessage="Create {numberOfJobs, plural, zero {job} one {job} other {jobs}}"
values={{ numberOfJobs: jobs.length }}
/>
</EuiButton>

View file

@ -1,29 +1,29 @@
{
"id": "apm_transaction",
"title": "APM",
"description": "Detect anomalies in transactions from your APM services.",
"description": "Detect anomalies in transactions from your APM services for metric data.",
"type": "Transaction data",
"logoFile": "logo.json",
"defaultIndexPattern": "apm-*-transaction",
"defaultIndexPattern": "apm-*-metric,metrics-apm*",
"query": {
"bool": {
"filter": [
{ "term": { "processor.event": "transaction" } },
{ "exists": { "field": "transaction.duration" } }
{ "term": { "processor.event": "metric" } },
{ "term": { "metricset.name": "transaction" } }
]
}
},
"jobs": [
{
"id": "high_mean_transaction_duration",
"file": "high_mean_transaction_duration.json"
"id": "apm_metrics",
"file": "apm_metrics.json"
}
],
"datafeeds": [
{
"id": "datafeed-high_mean_transaction_duration",
"file": "datafeed_high_mean_transaction_duration.json",
"job_id": "high_mean_transaction_duration"
"id": "datafeed-apm_metrics",
"file": "datafeed_apm_metrics.json",
"job_id": "apm_metrics"
}
]
}

View file

@ -0,0 +1,53 @@
{
"job_type": "anomaly_detector",
"groups": [
"apm"
],
"description": "Detects anomalies in transaction duration, throughput and error percentage for metric data.",
"analysis_config": {
"bucket_span": "15m",
"summary_count_field_name" : "doc_count",
"detectors" : [
{
"detector_description" : "high duration by transaction type for an APM service",
"function" : "high_mean",
"field_name" : "transaction_duration",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
},
{
"detector_description" : "transactions per minute for an APM service",
"function" : "mean",
"field_name" : "transactions_per_min",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
},
{
"detector_description" : "percent failed for an APM service",
"function" : "high_mean",
"field_name" : "transaction_failure_percentage",
"by_field_name" : "transaction.type",
"partition_field_name" : "service.name"
}
],
"influencers" : [
"transaction.type",
"service.name"
]
},
"analysis_limits": {
"model_memory_limit": "32mb"
},
"data_description": {
"time_field" : "@timestamp",
"time_format" : "epoch_ms"
},
"model_plot_config": {
"enabled" : true,
"annotations_enabled" : true
},
"results_index_name" : "custom-apm",
"custom_settings": {
"created_by": "ml-module-apm-transaction"
}
}

View file

@ -0,0 +1,95 @@
{
"job_id": "JOB_ID",
"indices": [
"INDEX_PATTERN_NAME"
],
"chunking_config" : {
"mode" : "off"
},
"query": {
"bool": {
"filter": [
{ "term": { "processor.event": "metric" } },
{ "term": { "metricset.name": "transaction" } }
]
}
},
"aggregations" : {
"buckets" : {
"composite" : {
"size" : 5000,
"sources" : [
{
"date" : {
"date_histogram" : {
"field" : "@timestamp",
"fixed_interval" : "90s"
}
}
},
{
"transaction.type" : {
"terms" : {
"field" : "transaction.type"
}
}
},
{
"service.name" : {
"terms" : {
"field" : "service.name"
}
}
}
]
},
"aggs" : {
"@timestamp" : {
"max" : {
"field" : "@timestamp"
}
},
"transactions_per_min" : {
"rate" : {
"unit" : "minute"
}
},
"transaction_duration" : {
"avg" : {
"field" : "transaction.duration.histogram"
}
},
"error_count" : {
"filter" : {
"term" : {
"event.outcome" : "failure"
}
},
"aggs" : {
"actual_error_count" : {
"value_count" : {
"field" : "event.outcome"
}
}
}
},
"success_count" : {
"filter" : {
"term" : {
"event.outcome" : "success"
}
}
},
"transaction_failure_percentage" : {
"bucket_script" : {
"buckets_path" : {
"failure_count" : "error_count>_count",
"success_count" : "success_count>_count"
},
"script" : "if ((params.failure_count + params.success_count)==0){return 0;}else{return params.failure_count/(params.failure_count + params.success_count);}"
}
}
}
}
}
}

View file

@ -1,14 +0,0 @@
{
"job_id": "JOB_ID",
"indices": [
"INDEX_PATTERN_NAME"
],
"query": {
"bool": {
"filter": [
{ "term": { "processor.event": "transaction" } },
{ "exists": { "field": "transaction.duration.us" } }
]
}
}
}

View file

@ -1,35 +0,0 @@
{
"job_type": "anomaly_detector",
"groups": [
"apm"
],
"description": "Detect transaction duration anomalies across transaction types for your APM services.",
"analysis_config": {
"bucket_span": "15m",
"detectors": [
{
"detector_description": "high duration by transaction type for an APM service",
"function": "high_mean",
"field_name": "transaction.duration.us",
"by_field_name": "transaction.type",
"partition_field_name": "service.name"
}
],
"influencers": [
"transaction.type",
"service.name"
]
},
"analysis_limits": {
"model_memory_limit": "32mb"
},
"data_description": {
"time_field": "@timestamp"
},
"model_plot_config": {
"enabled": true
},
"custom_settings": {
"created_by": "ml-module-apm-transaction"
}
}

View file

@ -44,7 +44,7 @@ export default ({ getService }: FtrProviderContext) => {
user: USER.ML_POWERUSER,
expected: {
responseCode: 200,
moduleIds: ['apm_jsbase', 'apm_transaction', 'apm_nodejs'],
moduleIds: ['apm_jsbase', 'apm_nodejs'],
},
},
{

View file

@ -187,9 +187,11 @@ export default ({ getService }: FtrProviderContext) => {
dashboards: [] as string[],
},
},
// Set startDatafeed and estimateModelMemory to false for the APM transaction test
// until there is a new data set available with metric data.
{
testTitleSuffix:
'for apm_transaction with prefix, startDatafeed true and estimateModelMemory true',
'for apm_transaction with prefix, startDatafeed false and estimateModelMemory false',
sourceDataArchive: 'x-pack/test/functional/es_archives/ml/module_apm',
indexPattern: { name: 'ft_module_apm', timeField: '@timestamp' },
module: 'apm_transaction',
@ -197,14 +199,14 @@ export default ({ getService }: FtrProviderContext) => {
requestBody: {
prefix: 'pf5_',
indexPatternName: 'ft_module_apm',
startDatafeed: true,
end: Date.now(),
startDatafeed: false,
estimateModelMemory: false,
},
expected: {
responseCode: 200,
jobs: [
{
jobId: 'pf5_high_mean_transaction_duration',
jobId: 'pf5_apm_metrics',
jobState: JOB_STATE.CLOSED,
datafeedState: DATAFEED_STATE.STOPPED,
},