[ML] Telemetry for the Anomaly detection jobs health rule type (#110052)
* [ML] add mappings for the new rule type * [ML] add telemetry for enabled health checks * [ML] update xpack_plugins.json
This commit is contained in:
parent
41f7b429d1
commit
d66397cfe4
|
@ -46,6 +46,7 @@ const byTypeSchema: MakeSchemaFrom<AlertsUsage>['count_by_type'] = {
|
|||
'__geo-containment': { type: 'long' },
|
||||
// ML
|
||||
xpack_ml_anomaly_detection_alert: { type: 'long' },
|
||||
xpack_ml_anomaly_detection_jobs_health: { type: 'long' },
|
||||
};
|
||||
|
||||
export function createAlertsUsageCollector(
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
import type { UsageCollectionSetup } from '../../../../../src/plugins/usage_collection/server';
|
||||
import { ML_ALERT_TYPES } from '../../common/constants/alerts';
|
||||
import { AnomalyResultType } from '../../common/types/anomalies';
|
||||
import { MlAnomalyDetectionJobsHealthRuleParams } from '../../common/types/alerts';
|
||||
import { getResultJobsHealthRuleConfig } from '../../common/util/alerts';
|
||||
|
||||
export interface MlUsageData {
|
||||
alertRules: {
|
||||
|
@ -18,6 +20,14 @@ export interface MlUsageData {
|
|||
influencer: number;
|
||||
};
|
||||
};
|
||||
'xpack.ml.anomaly_detection_jobs_health': {
|
||||
count_by_check_type: {
|
||||
datafeed: number;
|
||||
mml: number;
|
||||
delayedData: number;
|
||||
errorMessages: number;
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -42,6 +52,38 @@ export function registerCollector(usageCollection: UsageCollectionSetup, kibanaI
|
|||
},
|
||||
},
|
||||
},
|
||||
'xpack.ml.anomaly_detection_jobs_health': {
|
||||
count_by_check_type: {
|
||||
datafeed: {
|
||||
type: 'long',
|
||||
_meta: {
|
||||
description:
|
||||
'total number of alerting rules performing the not started datafeed health check',
|
||||
},
|
||||
},
|
||||
mml: {
|
||||
type: 'long',
|
||||
_meta: {
|
||||
description:
|
||||
'total number of alerting rules performing the model memory limit health check',
|
||||
},
|
||||
},
|
||||
delayedData: {
|
||||
type: 'long',
|
||||
_meta: {
|
||||
description:
|
||||
'total number of alerting rules performing the delayed data health check',
|
||||
},
|
||||
},
|
||||
errorMessages: {
|
||||
type: 'long',
|
||||
_meta: {
|
||||
description:
|
||||
'total number of alerting rules performing the error messages health check',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
isReady: () => !!kibanaIndex,
|
||||
|
@ -86,11 +128,65 @@ export function registerCollector(usageCollection: UsageCollectionSetup, kibanaI
|
|||
return acc;
|
||||
}, {} as MlUsageData['alertRules'][typeof ML_ALERT_TYPES.ANOMALY_DETECTION]['count_by_result_type']);
|
||||
|
||||
const jobsHealthRuleInstances = await esClient.search<{
|
||||
alert: {
|
||||
params: MlAnomalyDetectionJobsHealthRuleParams;
|
||||
};
|
||||
}>({
|
||||
index: kibanaIndex,
|
||||
size: 10000,
|
||||
body: {
|
||||
query: {
|
||||
bool: {
|
||||
filter: [
|
||||
{ term: { type: 'alert' } },
|
||||
{
|
||||
term: {
|
||||
'alert.alertTypeId': ML_ALERT_TYPES.AD_JOBS_HEALTH,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const resultsByCheckType = jobsHealthRuleInstances.body.hits.hits.reduce(
|
||||
(acc, curr) => {
|
||||
const doc = curr._source;
|
||||
if (!doc) return acc;
|
||||
|
||||
const {
|
||||
alert: {
|
||||
params: { testsConfig },
|
||||
},
|
||||
} = doc;
|
||||
|
||||
const resultConfig = getResultJobsHealthRuleConfig(testsConfig);
|
||||
|
||||
acc.datafeed += resultConfig.datafeed.enabled ? 1 : 0;
|
||||
acc.mml += resultConfig.mml.enabled ? 1 : 0;
|
||||
acc.delayedData += resultConfig.delayedData.enabled ? 1 : 0;
|
||||
acc.errorMessages += resultConfig.errorMessages.enabled ? 1 : 0;
|
||||
|
||||
return acc;
|
||||
},
|
||||
{
|
||||
datafeed: 0,
|
||||
mml: 0,
|
||||
delayedData: 0,
|
||||
errorMessages: 0,
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
alertRules: {
|
||||
[ML_ALERT_TYPES.ANOMALY_DETECTION]: {
|
||||
count_by_result_type: countByResultType,
|
||||
},
|
||||
[ML_ALERT_TYPES.AD_JOBS_HEALTH]: {
|
||||
count_by_check_type: resultsByCheckType,
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
|
|
|
@ -228,6 +228,9 @@
|
|||
},
|
||||
"xpack_ml_anomaly_detection_alert": {
|
||||
"type": "long"
|
||||
},
|
||||
"xpack_ml_anomaly_detection_jobs_health": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
@ -307,6 +310,9 @@
|
|||
},
|
||||
"xpack_ml_anomaly_detection_alert": {
|
||||
"type": "long"
|
||||
},
|
||||
"xpack_ml_anomaly_detection_jobs_health": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3804,6 +3810,38 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"xpack.ml.anomaly_detection_jobs_health": {
|
||||
"properties": {
|
||||
"count_by_check_type": {
|
||||
"properties": {
|
||||
"datafeed": {
|
||||
"type": "long",
|
||||
"_meta": {
|
||||
"description": "total number of alerting rules performing the not started datafeed health check"
|
||||
}
|
||||
},
|
||||
"mml": {
|
||||
"type": "long",
|
||||
"_meta": {
|
||||
"description": "total number of alerting rules performing the model memory limit health check"
|
||||
}
|
||||
},
|
||||
"delayedData": {
|
||||
"type": "long",
|
||||
"_meta": {
|
||||
"description": "total number of alerting rules performing the delayed data health check"
|
||||
}
|
||||
},
|
||||
"errorMessages": {
|
||||
"type": "long",
|
||||
"_meta": {
|
||||
"description": "total number of alerting rules performing the error messages health check"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue