[ML] Use a new ML endpoint to estimate a model memory (#60376)

* [ML] refactor calculate_model_memory_limit route, use estimateModelMemory endpoint

* [ML] refactor validate_model_memory_limit, migrate tests to jest

* [ML] fix typing issue

* [ML] start estimateModelMemory url with /

* [ML] fix typo, filter mlcategory

* [ML] extract getCardinalities function

* [ML] fields_service.ts

* [ML] wip getMaxBucketCardinality

* [ML] refactor and comments

* [ML] fix aggs keys with special characters, fix integration tests

* [ML] use pre-defined job types

* [ML] fallback to 0 in case max bucket cardinality receives null

* [ML] calculateModelMemoryLimit on influencers change

* [ML] fix maxModelMemoryLimit

* [ML] cap aggregation to max 1000 buckets

* [ML] rename intervalDuration
This commit is contained in:
Dima Arnautov 2020-03-19 16:45:40 +01:00 committed by GitHub
parent ae0e35041e
commit 7aa4651292
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 817 additions and 632 deletions

View file

@ -81,7 +81,7 @@ export interface ModelPlotConfig {
// TODO, finish this when it's needed
export interface CustomRule {
actions: any;
scope: object;
conditions: object;
actions: string[];
scope?: object;
conditions: any[];
}

View file

@ -88,16 +88,13 @@ export class MultiMetricJobCreator extends JobCreator {
// called externally to set the model memory limit based current detector configuration
public async calculateModelMemoryLimit() {
if (this._splitField === null) {
// not split field, use the default
if (this.jobConfig.analysis_config.detectors.length === 0) {
this.modelMemoryLimit = DEFAULT_MODEL_MEMORY_LIMIT;
} else {
const { modelMemoryLimit } = await ml.calculateModelMemoryLimit({
analysisConfig: this.jobConfig.analysis_config,
indexPattern: this._indexPatternTitle,
splitFieldName: this._splitField.name,
query: this._datafeed_config.query,
fieldNames: this.fields.map(f => f.id),
influencerNames: this._influencers,
timeFieldName: this._job_config.data_description.time_field,
earliestMs: this._start,
latestMs: this._end,

View file

@ -22,6 +22,7 @@ import {
Datafeed,
CombinedJob,
Detector,
AnalysisConfig,
} from '../../../../common/types/anomaly_detection_jobs';
import { ES_AGGREGATION } from '../../../../common/constants/aggregation_types';
import { FieldRequestConfig } from '../../datavisualizer/index_based/common';
@ -532,30 +533,24 @@ export const ml = {
},
calculateModelMemoryLimit({
analysisConfig,
indexPattern,
splitFieldName,
query,
fieldNames,
influencerNames,
timeFieldName,
earliestMs,
latestMs,
}: {
analysisConfig: AnalysisConfig;
indexPattern: string;
splitFieldName: string;
query: any;
fieldNames: string[];
influencerNames: string[];
timeFieldName: string;
earliestMs: number;
latestMs: number;
}) {
const body = JSON.stringify({
analysisConfig,
indexPattern,
splitFieldName,
query,
fieldNames,
influencerNames,
timeFieldName,
earliestMs,
latestMs,

View file

@ -413,6 +413,14 @@ export const elasticsearchJsPlugin = (Client: any, config: any, components: any)
method: 'POST',
});
ml.estimateModelMemory = ca({
url: {
fmt: '/_ml/anomaly_detectors/_estimate_model_memory',
},
needBody: true,
method: 'POST',
});
ml.datafeedPreview = ca({
url: {
fmt: '/_ml/datafeeds/<%=datafeedId%>/_preview',

View file

@ -1,20 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { APICaller } from 'kibana/server';
export function calculateModelMemoryLimitProvider(
callAsCurrentUser: APICaller
): (
indexPattern: string,
splitFieldName: string,
query: any,
fieldNames: any,
influencerNames: any, // string[] ?
timeFieldName: string,
earliestMs: number,
latestMs: number
) => Promise<any>;

View file

@ -1,117 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
// calculates the size of the model memory limit used in the job config
// based on the cardinality of the field being used to split the data.
// the limit should be 10MB plus 20kB per series, rounded up to the nearest MB.
import numeral from '@elastic/numeral';
import { fieldsServiceProvider } from '../fields_service';
export function calculateModelMemoryLimitProvider(callAsCurrentUser) {
const fieldsService = fieldsServiceProvider(callAsCurrentUser);
return function calculateModelMemoryLimit(
indexPattern,
splitFieldName,
query,
fieldNames,
influencerNames,
timeFieldName,
earliestMs,
latestMs,
allowMMLGreaterThanMax = false
) {
return new Promise((response, reject) => {
const limits = {};
callAsCurrentUser('ml.info')
.then(resp => {
if (resp.limits !== undefined && resp.limits.max_model_memory_limit !== undefined) {
limits.max_model_memory_limit = resp.limits.max_model_memory_limit;
}
})
.catch(error => {
reject(error);
});
// find the cardinality of the split field
function splitFieldCardinality() {
return fieldsService.getCardinalityOfFields(
indexPattern,
[splitFieldName],
query,
timeFieldName,
earliestMs,
latestMs
);
}
// find the cardinality of an influencer field
function influencerCardinality(influencerName) {
return fieldsService.getCardinalityOfFields(
indexPattern,
[influencerName],
query,
timeFieldName,
earliestMs,
latestMs
);
}
const calculations = [
splitFieldCardinality(),
...influencerNames.map(inf => influencerCardinality(inf)),
];
Promise.all(calculations)
.then(responses => {
let mmlMB = 0;
const MB = 1000;
responses.forEach((resp, i) => {
let mmlKB = 0;
if (i === 0) {
// first in the list is the basic calculation.
// a base of 10MB plus 64KB per series per detector
// i.e. 10000KB + (64KB * cardinality of split field * number or detectors)
const cardinality = resp[splitFieldName];
mmlKB = 10000;
const SERIES_MULTIPLIER = 64;
const numberOfFields = fieldNames.length;
if (cardinality !== undefined) {
mmlKB += SERIES_MULTIPLIER * cardinality * numberOfFields;
}
} else {
// the rest of the calculations are for influencers fields
// 10KB per series of influencer field
// i.e. 10KB * cardinality of influencer field
const cardinality = resp[splitFieldName];
mmlKB = 0;
const SERIES_MULTIPLIER = 10;
if (cardinality !== undefined) {
mmlKB = SERIES_MULTIPLIER * cardinality;
}
}
// convert the total to MB, rounding up.
mmlMB += Math.ceil(mmlKB / MB);
});
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (allowMMLGreaterThanMax === false && limits.max_model_memory_limit !== undefined) {
const maxBytes = numeral(limits.max_model_memory_limit.toUpperCase()).value();
const mmlBytes = numeral(`${mmlMB}MB`).value();
if (mmlBytes > maxBytes) {
mmlMB = Math.floor(maxBytes / numeral('1MB').value());
}
}
response({ modelMemoryLimit: `${mmlMB}MB` });
})
.catch(error => {
reject(error);
});
});
};
}

View file

@ -0,0 +1,187 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import numeral from '@elastic/numeral';
import { APICaller } from 'kibana/server';
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
import { fieldsServiceProvider } from '../fields_service';
interface ModelMemoryEstimationResult {
/**
* Result model memory limit
*/
modelMemoryLimit: string;
/**
* Estimated model memory by elasticsearch ml endpoint
*/
estimatedModelMemoryLimit: string;
/**
* Maximum model memory limit
*/
maxModelMemoryLimit?: string;
}
/**
* Response of the _estimate_model_memory endpoint.
*/
export interface ModelMemoryEstimate {
model_memory_estimate: string;
}
/**
* Retrieves overall and max bucket cardinalities.
*/
async function getCardinalities(
callAsCurrentUser: APICaller,
analysisConfig: AnalysisConfig,
indexPattern: string,
query: any,
timeFieldName: string,
earliestMs: number,
latestMs: number
): Promise<{
overallCardinality: { [key: string]: number };
maxBucketCardinality: { [key: string]: number };
}> {
/**
* Fields not involved in cardinality check
*/
const excludedKeywords = new Set<string>(
/**
* The keyword which is used to mean the output of categorization,
* so it will have cardinality zero in the actual input data.
*/
'mlcategory'
);
const fieldsService = fieldsServiceProvider(callAsCurrentUser);
const { detectors, influencers, bucket_span: bucketSpan } = analysisConfig;
let overallCardinality = {};
let maxBucketCardinality = {};
const overallCardinalityFields: Set<string> = detectors.reduce(
(
acc,
{
by_field_name: byFieldName,
partition_field_name: partitionFieldName,
over_field_name: overFieldName,
}
) => {
[byFieldName, partitionFieldName, overFieldName]
.filter(field => field !== undefined && field !== '' && !excludedKeywords.has(field))
.forEach(key => {
acc.add(key as string);
});
return acc;
},
new Set<string>()
);
const maxBucketFieldCardinalities: string[] = influencers.filter(
influencerField =>
typeof influencerField === 'string' &&
!excludedKeywords.has(influencerField) &&
!!influencerField &&
!overallCardinalityFields.has(influencerField)
) as string[];
if (overallCardinalityFields.size > 0) {
overallCardinality = await fieldsService.getCardinalityOfFields(
indexPattern,
[...overallCardinalityFields],
query,
timeFieldName,
earliestMs,
latestMs
);
}
if (maxBucketFieldCardinalities.length > 0) {
maxBucketCardinality = await fieldsService.getMaxBucketCardinalities(
indexPattern,
maxBucketFieldCardinalities,
query,
timeFieldName,
earliestMs,
latestMs,
bucketSpan
);
}
return {
overallCardinality,
maxBucketCardinality,
};
}
export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) {
/**
* Retrieves an estimated size of the model memory limit used in the job config
* based on the cardinality of the fields being used to split the data
* and influencers.
*/
return async function calculateModelMemoryLimit(
analysisConfig: AnalysisConfig,
indexPattern: string,
query: any,
timeFieldName: string,
earliestMs: number,
latestMs: number,
allowMMLGreaterThanMax = false
): Promise<ModelMemoryEstimationResult> {
let maxModelMemoryLimit;
try {
const resp = await callAsCurrentUser('ml.info');
if (resp?.limits?.max_model_memory_limit !== undefined) {
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
}
} catch (e) {
throw new Error('Unable to retrieve max model memory limit');
}
const { overallCardinality, maxBucketCardinality } = await getCardinalities(
callAsCurrentUser,
analysisConfig,
indexPattern,
query,
timeFieldName,
earliestMs,
latestMs
);
const estimatedModelMemoryLimit = (
await callAsCurrentUser<ModelMemoryEstimate>('ml.estimateModelMemory', {
body: {
analysis_config: analysisConfig,
overall_cardinality: overallCardinality,
max_bucket_cardinality: maxBucketCardinality,
},
})
).model_memory_estimate.toUpperCase();
let modelMemoryLimit: string = estimatedModelMemoryLimit;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
// @ts-ignore
const maxBytes = numeral(maxModelMemoryLimit).value();
// @ts-ignore
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
if (mmlBytes > maxBytes) {
// @ts-ignore
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
}
}
return {
estimatedModelMemoryLimit,
modelMemoryLimit,
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
};
};
}

View file

@ -1,21 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { APICaller } from 'kibana/server';
export function fieldsServiceProvider(
callAsCurrentUser: APICaller
): {
getCardinalityOfFields: (
index: string[] | string,
fieldNames: string[],
query: any,
timeFieldName: string,
earliestMs: number,
latestMs: number
) => Promise<any>;
getTimeFieldRange: (index: string[] | string, timeFieldName: string, query: any) => Promise<any>;
};

View file

@ -1,148 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
// Service for carrying out queries to obtain data
// specific to fields in Elasticsearch indices.
export function fieldsServiceProvider(callAsCurrentUser) {
// Obtains the cardinality of one or more fields.
// Returns an Object whose keys are the names of the fields,
// with values equal to the cardinality of the field.
// Any of the supplied fieldNames which are not aggregatable will
// be omitted from the returned Object.
function getCardinalityOfFields(index, fieldNames, query, timeFieldName, earliestMs, latestMs) {
// First check that each of the supplied fieldNames are aggregatable,
// then obtain the cardinality for each of the aggregatable fields.
return new Promise((resolve, reject) => {
callAsCurrentUser('fieldCaps', {
index,
fields: fieldNames,
})
.then(fieldCapsResp => {
const aggregatableFields = [];
fieldNames.forEach(fieldName => {
const fieldInfo = fieldCapsResp.fields[fieldName];
const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
if (typeKeys.length > 0) {
const fieldType = typeKeys[0];
const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
if (isFieldAggregatable === true) {
aggregatableFields.push(fieldName);
}
}
});
if (aggregatableFields.length > 0) {
// Build the criteria to use in the bool filter part of the request.
// Add criteria for the time range and the datafeed config query.
const mustCriteria = [
{
range: {
[timeFieldName]: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
];
if (query) {
mustCriteria.push(query);
}
const aggs = aggregatableFields.reduce((obj, field) => {
obj[field] = { cardinality: { field } };
return obj;
}, {});
const body = {
query: {
bool: {
must: mustCriteria,
},
},
size: 0,
_source: {
excludes: [],
},
aggs,
};
callAsCurrentUser('search', {
index,
body,
})
.then(resp => {
const aggregations = resp.aggregations;
if (aggregations !== undefined) {
const results = aggregatableFields.reduce((obj, field) => {
obj[field] = (aggregations[field] || { value: 0 }).value;
return obj;
}, {});
resolve(results);
} else {
resolve({});
}
})
.catch(resp => {
reject(resp);
});
} else {
// None of the fields are aggregatable. Return empty Object.
resolve({});
}
})
.catch(resp => {
reject(resp);
});
});
}
function getTimeFieldRange(index, timeFieldName, query) {
return new Promise((resolve, reject) => {
const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
callAsCurrentUser('search', {
index,
size: 0,
body: {
query,
aggs: {
earliest: {
min: {
field: timeFieldName,
},
},
latest: {
max: {
field: timeFieldName,
},
},
},
},
})
.then(resp => {
if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
obj.start.epoch = resp.aggregations.earliest.value;
obj.start.string = resp.aggregations.earliest.value_as_string;
obj.end.epoch = resp.aggregations.latest.value;
obj.end.string = resp.aggregations.latest.value_as_string;
}
resolve(obj);
})
.catch(resp => {
reject(resp);
});
});
}
return {
getCardinalityOfFields,
getTimeFieldRange,
};
}

View file

@ -0,0 +1,296 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import Boom from 'boom';
import { APICaller } from 'kibana/server';
import { parseInterval } from '../../../common/util/parse_interval';
/**
* Service for carrying out queries to obtain data
* specific to fields in Elasticsearch indices.
*/
export function fieldsServiceProvider(callAsCurrentUser: APICaller) {
/**
* Gets aggregatable fields.
*/
async function getAggregatableFields(
index: string | string[],
fieldNames: string[]
): Promise<string[]> {
const fieldCapsResp = await callAsCurrentUser('fieldCaps', {
index,
fields: fieldNames,
});
const aggregatableFields: string[] = [];
fieldNames.forEach(fieldName => {
const fieldInfo = fieldCapsResp.fields[fieldName];
const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
if (typeKeys.length > 0) {
const fieldType = typeKeys[0];
const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
if (isFieldAggregatable === true) {
aggregatableFields.push(fieldName);
}
}
});
return aggregatableFields;
}
// Obtains the cardinality of one or more fields.
// Returns an Object whose keys are the names of the fields,
// with values equal to the cardinality of the field.
// Any of the supplied fieldNames which are not aggregatable will
// be omitted from the returned Object.
async function getCardinalityOfFields(
index: string[] | string,
fieldNames: string[],
query: any,
timeFieldName: string,
earliestMs: number,
latestMs: number
): Promise<{ [key: string]: number }> {
const aggregatableFields = await getAggregatableFields(index, fieldNames);
if (aggregatableFields.length === 0) {
return {};
}
// Build the criteria to use in the bool filter part of the request.
// Add criteria for the time range and the datafeed config query.
const mustCriteria = [
{
range: {
[timeFieldName]: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
];
if (query) {
mustCriteria.push(query);
}
const aggs = aggregatableFields.reduce((obj, field) => {
obj[field] = { cardinality: { field } };
return obj;
}, {} as { [field: string]: { cardinality: { field: string } } });
const body = {
query: {
bool: {
must: mustCriteria,
},
},
size: 0,
_source: {
excludes: [],
},
aggs,
};
const aggregations = (
await callAsCurrentUser('search', {
index,
body,
})
)?.aggregations;
if (!aggregations) {
return {};
}
return aggregatableFields.reduce((obj, field) => {
obj[field] = (aggregations[field] || { value: 0 }).value;
return obj;
}, {} as { [field: string]: number });
}
function getTimeFieldRange(
index: string[] | string,
timeFieldName: string,
query: any
): Promise<any> {
return new Promise((resolve, reject) => {
const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
callAsCurrentUser('search', {
index,
size: 0,
body: {
query,
aggs: {
earliest: {
min: {
field: timeFieldName,
},
},
latest: {
max: {
field: timeFieldName,
},
},
},
},
})
.then(resp => {
if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
obj.start.epoch = resp.aggregations.earliest.value;
obj.start.string = resp.aggregations.earliest.value_as_string;
obj.end.epoch = resp.aggregations.latest.value;
obj.end.string = resp.aggregations.latest.value_as_string;
}
resolve(obj);
})
.catch(resp => {
reject(resp);
});
});
}
/**
* Caps provided time boundaries based on the interval.
* @param earliestMs
* @param latestMs
* @param interval
*/
function getSafeTimeRange(
earliestMs: number,
latestMs: number,
interval: string
): { start: number; end: number } {
const maxNumberOfBuckets = 1000;
const end = latestMs;
const intervalDuration = parseInterval(interval);
if (intervalDuration === null) {
throw Boom.badRequest('Interval is invalid');
}
const start = Math.max(
earliestMs,
latestMs - maxNumberOfBuckets * intervalDuration.asMilliseconds()
);
return { start, end };
}
/**
* Retrieves max cardinalities for provided fields from date interval buckets
* using max bucket pipeline aggregation.
*
* @param index
* @param fieldNames - fields to perform cardinality aggregation on
* @param query
* @param timeFieldName
* @param earliestMs
* @param latestMs
* @param interval - a fixed interval for the date histogram aggregation
*/
async function getMaxBucketCardinalities(
index: string[] | string,
fieldNames: string[],
query: any,
timeFieldName: string,
earliestMs: number,
latestMs: number,
interval: string | undefined
): Promise<{ [key: string]: number }> {
if (!interval) {
throw new Error('Interval is required to retrieve max bucket cardinalities.');
}
const aggregatableFields = await getAggregatableFields(index, fieldNames);
if (aggregatableFields.length === 0) {
return {};
}
const { start, end } = getSafeTimeRange(earliestMs, latestMs, interval);
const mustCriteria = [
{
range: {
[timeFieldName]: {
gte: start,
lte: end,
format: 'epoch_millis',
},
},
},
];
if (query) {
mustCriteria.push(query);
}
const dateHistogramAggKey = 'bucket_span_buckets';
/**
* Replace any non-word characters
*/
const getSafeAggName = (field: string) => field.replace(/\W/g, '');
const getMaxBucketAggKey = (field: string) => `max_bucket_${field}`;
const fieldsCardinalityAggs = aggregatableFields.reduce((obj, field) => {
obj[getSafeAggName(field)] = { cardinality: { field } };
return obj;
}, {} as { [field: string]: { cardinality: { field: string } } });
const maxBucketCardinalitiesAggs = Object.keys(fieldsCardinalityAggs).reduce((acc, field) => {
acc[getMaxBucketAggKey(field)] = {
max_bucket: {
buckets_path: `${dateHistogramAggKey}>${field}`,
},
};
return acc;
}, {} as { [key: string]: { max_bucket: { buckets_path: string } } });
const body = {
query: {
bool: {
filter: mustCriteria,
},
},
size: 0,
aggs: {
[dateHistogramAggKey]: {
date_histogram: {
field: timeFieldName,
fixed_interval: interval,
},
aggs: fieldsCardinalityAggs,
},
...maxBucketCardinalitiesAggs,
},
};
const aggregations = (
await callAsCurrentUser('search', {
index,
body,
})
)?.aggregations;
if (!aggregations) {
return {};
}
return aggregatableFields.reduce((obj, field) => {
obj[field] = (aggregations[getMaxBucketAggKey(field)] || { value: 0 }).value ?? 0;
return obj;
}, {} as { [field: string]: number });
}
return {
getCardinalityOfFields,
getTimeFieldRange,
getMaxBucketCardinalities,
};
}

View file

@ -5,8 +5,9 @@
*/
import { i18n } from '@kbn/i18n';
import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
export function validateJobObject(job) {
export function validateJobObject(job: CombinedJob | null) {
if (job === null || typeof job !== 'object') {
throw new Error(
i18n.translate('xpack.ml.models.jobValidation.validateJobObject.jobIsNotObjectErrorMessage', {

View file

@ -1,170 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import numeral from '@elastic/numeral';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../../models/calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
// The minimum value the backend expects is 1MByte
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
export async function validateModelMemoryLimit(callWithRequest, job, duration) {
validateJobObject(job);
// retrieve the max_model_memory_limit value from the server
// this will be unset unless the user has set this on their cluster
const mlInfo = await callWithRequest('ml.info');
const maxModelMemoryLimit =
typeof mlInfo.limits === 'undefined' ? undefined : mlInfo.limits.max_model_memory_limit;
// retrieve the model memory limit specified by the user in the job config.
// note, this will probably be the auto generated value, unless the user has
// over written it.
const mml =
typeof job.analysis_limits !== 'undefined' &&
typeof job.analysis_limits.model_memory_limit !== 'undefined'
? job.analysis_limits.model_memory_limit.toUpperCase()
: null;
const splitFieldNames = {};
let splitFieldName = '';
const fieldNames = [];
let runCalcModelMemoryTest = true;
let validModelMemoryLimit = true;
// extract the field names and partition field names from the detectors
// we only want to estimate the mml for multi-metric jobs.
// a multi-metric job will have one partition field, one or more field names
// and no over or by fields
job.analysis_config.detectors.forEach(d => {
if (typeof d.field_name !== 'undefined') {
fieldNames.push(d.field_name);
}
// create a deduplicated list of partition field names.
if (typeof d.partition_field_name !== 'undefined') {
splitFieldNames[d.partition_field_name] = null;
}
// if an over or by field is present, do not run the estimate test
if (typeof d.over_field_name !== 'undefined' || typeof d.by_field_name !== 'undefined') {
runCalcModelMemoryTest = false;
}
});
// if there are no or more than one partition fields, do not run the test
if (Object.keys(splitFieldNames).length === 1) {
splitFieldName = Object.keys(splitFieldNames)[0];
} else {
runCalcModelMemoryTest = false;
}
// if there is no duration, do not run the estimate test
if (
typeof duration === 'undefined' ||
typeof duration.start === 'undefined' ||
typeof duration.end === 'undefined'
) {
runCalcModelMemoryTest = false;
}
const messages = [];
// check that mml is a valid data format
if (mml !== null) {
const mmlSplit = mml.match(/\d+(\w+)/);
const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
messages.push({
id: 'mml_value_invalid',
mml,
});
// mml is not a valid data format.
// abort all other tests
validModelMemoryLimit = false;
}
}
if (validModelMemoryLimit) {
if (runCalcModelMemoryTest) {
const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
job.datafeed_config.indices.join(','),
splitFieldName,
job.datafeed_config.query,
fieldNames,
job.analysis_config.influencers,
job.data_description.time_field,
duration.start,
duration.end,
true
);
const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
let runEstimateGreaterThenMml = true;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (typeof maxModelMemoryLimit !== 'undefined') {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
if (mmlEstimateBytes > maxMmlBytes) {
runEstimateGreaterThenMml = false;
messages.push({
id: 'estimated_mml_greater_than_max_mml',
maxModelMemoryLimit,
mmlEstimate,
});
}
}
// check to see if the estimated mml is greater that the user
// specified mml
// do not run this if we've already found that it's larger than
// the max mml
if (runEstimateGreaterThenMml && mml !== null) {
const mmlBytes = numeral(mml).value();
if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
messages.push({
id: 'mml_value_invalid',
mml,
});
} else if (mmlEstimateBytes / 2 > mmlBytes) {
messages.push({
id: 'half_estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml,
});
} else if (mmlEstimateBytes > mmlBytes) {
messages.push({
id: 'estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml,
});
}
}
}
// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});
}
}
}
if (messages.length === 0 && runCalcModelMemoryTest === true) {
messages.push({ id: 'success_mml' });
}
return Promise.resolve(messages);
}

View file

@ -4,8 +4,10 @@
* you may not use this file except in compliance with the Elastic License.
*/
import expect from '@kbn/expect';
import { validateModelMemoryLimit } from '../validate_model_memory_limit';
import { APICaller } from 'kibana/server';
import { CombinedJob, Detector } from '../../../common/types/anomaly_detection_jobs';
import { ModelMemoryEstimate } from '../calculate_model_memory_limit/calculate_model_memory_limit';
import { validateModelMemoryLimit } from './validate_model_memory_limit';
describe('ML - validateModelMemoryLimit', () => {
// mock info endpoint response
@ -61,29 +63,43 @@ describe('ML - validateModelMemoryLimit', () => {
},
};
// mock estimate model memory
const modelMemoryEstimateResponse: ModelMemoryEstimate = {
model_memory_estimate: '40mb',
};
interface MockAPICallResponse {
'ml.estimateModelMemory'?: ModelMemoryEstimate;
}
// mock callWithRequest
// used in three places:
// - to retrieve the info endpoint
// - to search for cardinality of split field
// - to retrieve field capabilities used in search for split field cardinality
function callWithRequest(call) {
if (typeof call === undefined) {
return Promise.reject();
}
const getMockCallWithRequest = ({
'ml.estimateModelMemory': estimateModelMemory,
}: MockAPICallResponse = {}) =>
((call: string) => {
if (typeof call === undefined) {
return Promise.reject();
}
let response = {};
if (call === 'ml.info') {
response = mlInfoResponse;
} else if (call === 'search') {
response = cardinalitySearchResponse;
} else if (call === 'fieldCaps') {
response = fieldCapsResponse;
}
return Promise.resolve(response);
}
let response = {};
if (call === 'ml.info') {
response = mlInfoResponse;
} else if (call === 'search') {
response = cardinalitySearchResponse;
} else if (call === 'fieldCaps') {
response = fieldCapsResponse;
} else if (call === 'ml.estimateModelMemory') {
response = estimateModelMemory || modelMemoryEstimateResponse;
}
return Promise.resolve(response);
}) as APICaller;
function getJobConfig(influencers = [], detectors = []) {
return {
function getJobConfig(influencers: string[] = [], detectors: Detector[] = []) {
return ({
analysis_config: { detectors, influencers },
data_description: { time_field: '@timestamp' },
datafeed_config: {
@ -92,11 +108,11 @@ describe('ML - validateModelMemoryLimit', () => {
analysis_limits: {
model_memory_limit: '20mb',
},
};
} as unknown) as CombinedJob;
}
// create a specified number of mock detectors
function createDetectors(numberOfDetectors) {
function createDetectors(numberOfDetectors: number): Detector[] {
const dtrs = [];
for (let i = 0; i < numberOfDetectors; i++) {
dtrs.push({
@ -105,28 +121,28 @@ describe('ML - validateModelMemoryLimit', () => {
partition_field_name: 'instance',
});
}
return dtrs;
return dtrs as Detector[];
}
// tests
it('Called with no duration or split and mml within limit', () => {
const job = getJobConfig();
const duration = undefined;
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql([]);
expect(ids).toEqual([]);
});
});
it('Called with no duration or split and mml above limit', () => {
const job = getJobConfig();
const duration = undefined;
// @ts-ignore
job.analysis_limits.model_memory_limit = '31mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_greater_than_max_mml']);
expect(ids).toEqual(['mml_greater_than_max_mml']);
});
});
@ -134,11 +150,16 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(10);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '20mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '66mb' } }),
job,
duration
).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['estimated_mml_greater_than_max_mml']);
expect(ids).toEqual(['estimated_mml_greater_than_max_mml']);
});
});
@ -146,11 +167,16 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(2);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '30mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '24mb' } }),
job,
duration
).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['success_mml']);
expect(ids).toEqual(['success_mml']);
});
});
@ -158,11 +184,16 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(2);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '10mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '22mb' } }),
job,
duration
).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
});
});
@ -171,11 +202,12 @@ describe('ML - validateModelMemoryLimit', () => {
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
delete mlInfoResponse.limits.max_model_memory_limit;
// @ts-ignore
job.analysis_limits.model_memory_limit = '10mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
});
});
@ -183,11 +215,16 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '20mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '19mb' } }),
job,
duration
).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['success_mml']);
expect(ids).toEqual(['success_mml']);
});
});
@ -195,11 +232,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '0mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -207,11 +245,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '10mbananas';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -219,11 +258,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '10';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -231,11 +271,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = 'mb';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -243,11 +284,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = 'asdf';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -255,11 +297,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '1023KB';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['mml_value_invalid']);
expect(ids).toEqual(['mml_value_invalid']);
});
});
@ -267,11 +310,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '1024KB';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
});
});
@ -279,11 +323,12 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '6MB';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
});
});
@ -291,11 +336,16 @@ describe('ML - validateModelMemoryLimit', () => {
const dtrs = createDetectors(1);
const job = getJobConfig(['instance'], dtrs);
const duration = { start: 0, end: 1 };
// @ts-ignore
job.analysis_limits.model_memory_limit = '20MB';
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
return validateModelMemoryLimit(
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '20mb' } }),
job,
duration
).then(messages => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['success_mml']);
expect(ids).toEqual(['success_mml']);
});
});
});

View file

@ -0,0 +1,135 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import numeral from '@elastic/numeral';
import { APICaller } from 'kibana/server';
import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import { validateJobObject } from './validate_job_object';
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
// The minimum value the backend expects is 1MByte
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
export async function validateModelMemoryLimit(
callWithRequest: APICaller,
job: CombinedJob,
duration?: { start?: number; end?: number }
) {
validateJobObject(job);
// retrieve the model memory limit specified by the user in the job config.
// note, this will probably be the auto generated value, unless the user has
// over written it.
const mml = job?.analysis_limits?.model_memory_limit?.toUpperCase() ?? null;
const messages = [];
// check that mml is a valid data format
if (mml !== null) {
const mmlSplit = mml.match(/\d+(\w+)/);
const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
if (unit === null || !ALLOWED_DATA_UNITS.includes(unit)) {
messages.push({
id: 'mml_value_invalid',
mml,
});
// mml is not a valid data format.
// abort all other tests
return messages;
}
}
// if there is no duration, do not run the estimate test
const runCalcModelMemoryTest =
duration && typeof duration?.start !== undefined && duration?.end !== undefined;
// retrieve the max_model_memory_limit value from the server
// this will be unset unless the user has set this on their cluster
const maxModelMemoryLimit: string | undefined = (
await callWithRequest('ml.info')
)?.limits?.max_model_memory_limit?.toUpperCase();
if (runCalcModelMemoryTest) {
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
job.analysis_config,
job.datafeed_config.indices.join(','),
job.datafeed_config.query,
job.data_description.time_field,
duration!.start as number,
duration!.end as number,
true
);
// @ts-ignore
const mmlEstimateBytes: number = numeral(modelMemoryLimit).value();
let runEstimateGreaterThenMml = true;
// if max_model_memory_limit has been set,
// make sure the estimated value is not greater than it.
if (typeof maxModelMemoryLimit !== 'undefined') {
// @ts-ignore
const maxMmlBytes: number = numeral(maxModelMemoryLimit).value();
if (mmlEstimateBytes > maxMmlBytes) {
runEstimateGreaterThenMml = false;
messages.push({
id: 'estimated_mml_greater_than_max_mml',
maxModelMemoryLimit,
modelMemoryLimit,
});
}
}
// check to see if the estimated mml is greater that the user
// specified mml
// do not run this if we've already found that it's larger than
// the max mml
if (runEstimateGreaterThenMml && mml !== null) {
// @ts-ignore
const mmlBytes: number = numeral(mml).value();
if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
messages.push({
id: 'mml_value_invalid',
mml,
});
} else if (mmlEstimateBytes / 2 > mmlBytes) {
messages.push({
id: 'half_estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml,
});
} else if (mmlEstimateBytes > mmlBytes) {
messages.push({
id: 'estimated_mml_greater_than_mml',
maxModelMemoryLimit,
mml,
});
}
}
}
// if max_model_memory_limit has been set,
// make sure the user defined MML is not greater than it
if (maxModelMemoryLimit !== undefined && mml !== null) {
// @ts-ignore
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
// @ts-ignore
const mmlBytes = numeral(mml).value();
if (mmlBytes > maxMmlBytes) {
messages.push({
id: 'mml_greater_than_max_mml',
maxModelMemoryLimit,
mml,
});
}
}
if (messages.length === 0 && runCalcModelMemoryTest === true) {
messages.push({ id: 'success_mml' });
}
return messages;
}

View file

@ -148,7 +148,7 @@ export function jobRoutes({ router, mlLicense }: RouteInitialization) {
params: schema.object({
jobId: schema.string(),
}),
body: schema.object({ ...anomalyDetectionJobSchema }),
body: schema.object(anomalyDetectionJobSchema),
},
},
mlLicense.fullLicenseAPIGuard(async (context, request, response) => {

View file

@ -7,6 +7,7 @@
import Boom from 'boom';
import { RequestHandlerContext } from 'kibana/server';
import { schema, TypeOf } from '@kbn/config-schema';
import { AnalysisConfig } from '../../common/types/anomaly_detection_jobs';
import { wrapError } from '../client/error_wrapper';
import { RouteInitialization } from '../types';
import {
@ -29,23 +30,12 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
context: RequestHandlerContext,
payload: CalculateModelMemoryLimitPayload
) {
const {
indexPattern,
splitFieldName,
query,
fieldNames,
influencerNames,
timeFieldName,
earliestMs,
latestMs,
} = payload;
const { analysisConfig, indexPattern, query, timeFieldName, earliestMs, latestMs } = payload;
return calculateModelMemoryLimitProvider(context.ml!.mlClient.callAsCurrentUser)(
analysisConfig as AnalysisConfig,
indexPattern,
splitFieldName,
query,
fieldNames,
influencerNames,
timeFieldName,
earliestMs,
latestMs
@ -102,7 +92,7 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
*
* @api {post} /api/ml/validate/calculate_model_memory_limit Calculates model memory limit
* @apiName CalculateModelMemoryLimit
* @apiDescription Calculates the model memory limit
* @apiDescription Calls _estimate_model_memory endpoint to retrieve model memory estimation.
*
* @apiSuccess {String} modelMemoryLimit
*/

View file

@ -63,14 +63,16 @@ export const anomalyDetectionUpdateJobSchema = {
groups: schema.maybe(schema.arrayOf(schema.maybe(schema.string()))),
};
export const analysisConfigSchema = schema.object({
bucket_span: schema.maybe(schema.string()),
summary_count_field_name: schema.maybe(schema.string()),
detectors: schema.arrayOf(detectorSchema),
influencers: schema.arrayOf(schema.maybe(schema.string())),
categorization_field_name: schema.maybe(schema.string()),
});
export const anomalyDetectionJobSchema = {
analysis_config: schema.object({
bucket_span: schema.maybe(schema.string()),
summary_count_field_name: schema.maybe(schema.string()),
detectors: schema.arrayOf(detectorSchema),
influencers: schema.arrayOf(schema.maybe(schema.string())),
categorization_field_name: schema.maybe(schema.string()),
}),
analysis_config: analysisConfigSchema,
analysis_limits: schema.maybe(
schema.object({
categorization_examples_limit: schema.maybe(schema.number()),

View file

@ -5,7 +5,7 @@
*/
import { schema } from '@kbn/config-schema';
import { anomalyDetectionJobSchema } from './anomaly_detectors_schema';
import { analysisConfigSchema, anomalyDetectionJobSchema } from './anomaly_detectors_schema';
import { datafeedConfigSchema } from './datafeeds_schema';
export const estimateBucketSpanSchema = schema.object({
@ -20,11 +20,9 @@ export const estimateBucketSpanSchema = schema.object({
});
export const modelMemoryLimitSchema = schema.object({
analysisConfig: analysisConfigSchema,
indexPattern: schema.string(),
splitFieldName: schema.string(),
query: schema.any(),
fieldNames: schema.arrayOf(schema.string()),
influencerNames: schema.arrayOf(schema.maybe(schema.string())),
timeFieldName: schema.string(),
earliestMs: schema.number(),
latestMs: schema.number(),

View file

@ -21,14 +21,20 @@ export default ({ getService }: FtrProviderContext) => {
const testDataList = [
{
testTitleSuffix: 'with 0 metrics, 0 influencers and no split field',
testTitleSuffix: 'when no partition field is provided with regular function',
user: USER.ML_POWERUSER,
requestBody: {
indexPattern: 'ecommerce',
splitFieldName: '',
analysisConfig: {
bucket_span: '15m',
detectors: [
{
function: 'mean',
},
],
influencers: [],
},
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
fieldNames: ['__ml_event_rate_count__'],
influencerNames: [],
timeFieldName: 'order_date',
earliestMs: 1560297859000,
latestMs: 1562975136000,
@ -38,7 +44,8 @@ export default ({ getService }: FtrProviderContext) => {
responseBody: {
statusCode: 400,
error: 'Bad Request',
message: "[illegal_argument_exception] specified fields can't be null or empty",
message:
'[status_exception] Unless a count or temporal function is used one of field_name, by_field_name or over_field_name must be set',
},
},
},
@ -47,72 +54,79 @@ export default ({ getService }: FtrProviderContext) => {
user: USER.ML_POWERUSER,
requestBody: {
indexPattern: 'ecommerce',
splitFieldName: 'geoip.city_name',
analysisConfig: {
bucket_span: '15m',
detectors: [
{
function: 'avg',
field_name: 'geoip.city_name',
by_field_name: 'geoip.city_name',
},
],
influencers: ['geoip.city_name'],
},
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
fieldNames: ['products.base_price'],
influencerNames: ['geoip.city_name'],
timeFieldName: 'order_date',
earliestMs: 1560297859000,
latestMs: 1562975136000,
},
expected: {
responseCode: 200,
responseBody: { modelMemoryLimit: '12MB' },
responseBody: { modelMemoryLimit: '11MB', estimatedModelMemoryLimit: '11MB' },
},
},
{
testTitleSuffix: 'with 3 metrics, 3 influencers, split by city',
testTitleSuffix: 'with 3 influencers, split by city',
user: USER.ML_POWERUSER,
requestBody: {
indexPattern: 'ecommerce',
splitFieldName: 'geoip.city_name',
analysisConfig: {
bucket_span: '15m',
detectors: [
{
function: 'mean',
by_field_name: 'geoip.city_name',
field_name: 'geoip.city_name',
},
],
influencers: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
},
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
fieldNames: ['products.base_price', 'taxful_total_price', 'products.discount_amount'],
influencerNames: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
timeFieldName: 'order_date',
earliestMs: 1560297859000,
latestMs: 1562975136000,
},
expected: {
responseCode: 200,
responseBody: { modelMemoryLimit: '14MB' },
responseBody: { estimatedModelMemoryLimit: '11MB', modelMemoryLimit: '11MB' },
},
},
{
testTitleSuffix: 'with 4 metrics, 4 influencers, split by customer_id',
testTitleSuffix: '4 influencers, split by customer_id and filtering by country code',
user: USER.ML_POWERUSER,
requestBody: {
indexPattern: 'ecommerce',
splitFieldName: 'customer_id',
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
fieldNames: [
'geoip.country_iso_code',
'taxless_total_price',
'taxful_total_price',
'products.discount_amount',
],
influencerNames: [
'customer_id',
'geoip.country_iso_code',
'products.discount_percentage',
'products.discount_amount',
],
timeFieldName: 'order_date',
earliestMs: 1560297859000,
latestMs: 1562975136000,
},
expected: {
responseCode: 200,
responseBody: { modelMemoryLimit: '23MB' },
},
},
{
testTitleSuffix:
'with 4 metrics, 4 influencers, split by customer_id and filtering by country code',
user: USER.ML_POWERUSER,
requestBody: {
indexPattern: 'ecommerce',
splitFieldName: 'customer_id',
analysisConfig: {
bucket_span: '2d',
detectors: [
{
function: 'mean',
by_field_name: 'customer_id.city_name',
field_name: 'customer_id.city_name',
},
{
function: 'avg',
by_field_name: 'manufacturer.keyword',
field_name: 'manufacturer.keyword',
},
],
influencers: [
'geoip.country_iso_code',
'products.discount_percentage',
'products.discount_amount',
'day_of_week',
],
},
query: {
bool: {
filter: {
@ -122,25 +136,13 @@ export default ({ getService }: FtrProviderContext) => {
},
},
},
fieldNames: [
'geoip.country_iso_code',
'taxless_total_price',
'taxful_total_price',
'products.discount_amount',
],
influencerNames: [
'customer_id',
'geoip.country_iso_code',
'products.discount_percentage',
'products.discount_amount',
],
timeFieldName: 'order_date',
earliestMs: 1560297859000,
latestMs: 1562975136000,
},
expected: {
responseCode: 200,
responseBody: { modelMemoryLimit: '14MB' },
responseBody: { estimatedModelMemoryLimit: '12MB', modelMemoryLimit: '12MB' },
},
},
];