[ML] Fixes anomaly chart and validation for one week bucket span (#69671)

* [ML] Fixes anomaly chart and validation for one week bucket span

* [ML] Fix interval Jest tests
This commit is contained in:
Pete Harverson 2020-06-24 09:20:38 +01:00 committed by GitHub
parent 33fb3e832c
commit b270321ff3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 99 additions and 46 deletions

View file

@ -6,6 +6,7 @@
import _ from 'lodash';
import semver from 'semver';
import { Duration } from 'moment';
// @ts-ignore
import numeral from '@elastic/numeral';
@ -433,7 +434,7 @@ export function basicJobValidation(
messages.push({ id: 'bucket_span_empty' });
valid = false;
} else {
if (isValidTimeFormat(job.analysis_config.bucket_span)) {
if (isValidTimeInterval(job.analysis_config.bucket_span)) {
messages.push({
id: 'bucket_span_valid',
bucketSpan: job.analysis_config.bucket_span,
@ -490,14 +491,14 @@ export function basicDatafeedValidation(datafeed: Datafeed): ValidationResults {
if (datafeed) {
let queryDelayMessage = { id: 'query_delay_valid' };
if (isValidTimeFormat(datafeed.query_delay) === false) {
if (isValidTimeInterval(datafeed.query_delay) === false) {
queryDelayMessage = { id: 'query_delay_invalid' };
valid = false;
}
messages.push(queryDelayMessage);
let frequencyMessage = { id: 'frequency_valid' };
if (isValidTimeFormat(datafeed.frequency) === false) {
if (isValidTimeInterval(datafeed.frequency) === false) {
frequencyMessage = { id: 'frequency_invalid' };
valid = false;
}
@ -591,12 +592,33 @@ export function validateGroupNames(job: Job): ValidationResults {
};
}
function isValidTimeFormat(value: string | undefined): boolean {
/**
* Parses the supplied string to a time interval suitable for use in an ML anomaly
* detection job or datafeed.
* @param value the string to parse
* @return {Duration} the parsed interval, or null if it does not represent a valid
* time interval.
*/
export function parseTimeIntervalForJob(value: string | undefined): Duration | null {
if (value === undefined) {
return null;
}
// Must be a valid interval, greater than zero,
// and if specified in ms must be a multiple of 1000ms.
const interval = parseInterval(value, true);
return interval !== null && interval.asMilliseconds() !== 0 && interval.milliseconds() === 0
? interval
: null;
}
// Checks that the value for a field which represents a time interval,
// such as a job bucket span or datafeed query delay, is valid.
function isValidTimeInterval(value: string | undefined): boolean {
if (value === undefined) {
return true;
}
const interval = parseInterval(value);
return interval !== null && interval.asMilliseconds() !== 0;
return parseTimeIntervalForJob(value) !== null;
}
// Returns the latest of the last source data and last processed bucket timestamp,

View file

@ -7,7 +7,7 @@
import { parseInterval } from './parse_interval';
describe('ML parse interval util', () => {
test('correctly parses an interval containing unit and value', () => {
test('should correctly parse an interval containing a valid unit and value', () => {
expect(parseInterval('1d')!.as('d')).toBe(1);
expect(parseInterval('2y')!.as('y')).toBe(2);
expect(parseInterval('5M')!.as('M')).toBe(5);
@ -20,15 +20,25 @@ describe('ML parse interval util', () => {
expect(parseInterval('0s')!.as('h')).toBe(0);
});
test('correctly handles zero value intervals', () => {
test('should correctly handle zero value intervals', () => {
expect(parseInterval('0h')!.as('h')).toBe(0);
expect(parseInterval('0d')).toBe(null);
});
test('returns null for an invalid interval', () => {
test('should return null for an invalid interval', () => {
expect(parseInterval('')).toBe(null);
expect(parseInterval('234asdf')).toBe(null);
expect(parseInterval('m')).toBe(null);
expect(parseInterval('1.5h')).toBe(null);
});
test('should correctly check for whether the interval units are valid Elasticsearch time units', () => {
expect(parseInterval('100s', true)!.as('s')).toBe(100);
expect(parseInterval('5m', true)!.as('m')).toBe(5);
expect(parseInterval('24h', true)!.as('h')).toBe(24);
expect(parseInterval('7d', true)!.as('d')).toBe(7);
expect(parseInterval('1w', true)).toBe(null);
expect(parseInterval('1M', true)).toBe(null);
expect(parseInterval('1y', true)).toBe(null);
});
});

View file

@ -16,7 +16,15 @@ const INTERVAL_STRING_RE = new RegExp('^([0-9]*)\\s*(' + dateMath.units.join('|'
// for units of hour or less.
const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];
// List of time units which are supported for use in Elasticsearch durations
// (such as anomaly detection job bucket spans)
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
const SUPPORT_ES_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h', 'd'];
// Parses an interval String, such as 7d, 1h or 30m to a moment duration.
// Optionally carries out an additional check that the interval is supported as a
// time unit by Elasticsearch, as units greater than 'd' for example cannot be used
// for anomaly detection job bucket spans.
// Differs from the Kibana ui/utils/parse_interval in the following ways:
// 1. A value-less interval such as 'm' is not allowed - in line with the ML back-end
// not accepting such interval Strings for the bucket span of a job.
@ -25,7 +33,7 @@ const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];
// to work with units less than 'day'.
// 3. Fractional intervals e.g. 1.5h or 4.5d are not allowed, in line with the behaviour
// of the Elasticsearch date histogram aggregation.
export function parseInterval(interval: string): Duration | null {
export function parseInterval(interval: string, checkValidEsUnit = false): Duration | null {
const matches = String(interval).trim().match(INTERVAL_STRING_RE);
if (!Array.isArray(matches) || matches.length < 3) {
return null;
@ -36,8 +44,13 @@ export function parseInterval(interval: string): Duration | null {
const unit = matches[2] as SupportedUnits;
// In line with moment.js, only allow zero value intervals when the unit is less than 'day'.
// And check for isNaN as e.g. valueless 'm' will pass the regex test.
if (isNaN(value) || (value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1)) {
// And check for isNaN as e.g. valueless 'm' will pass the regex test,
// plus an optional check that the unit is not w/M/y which are not fully supported by ES.
if (
isNaN(value) ||
(value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1) ||
(checkValidEsUnit === true && SUPPORT_ES_DURATION_UNITS.indexOf(unit) === -1)
) {
return null;
}

View file

@ -155,7 +155,7 @@ export class JobCreator {
}
protected _setBucketSpanMs(bucketSpan: BucketSpan) {
const bs = parseInterval(bucketSpan);
const bs = parseInterval(bucketSpan, true);
this._bucketSpanMs = bs === null ? 0 : bs.asMilliseconds();
}

View file

@ -76,7 +76,7 @@ export class SingleMetricJobCreator extends JobCreator {
const functionName = this._aggs[0].dslName;
const timeField = this._job_config.data_description.time_field;
const duration = parseInterval(this._job_config.analysis_config.bucket_span);
const duration = parseInterval(this._job_config.analysis_config.bucket_span, true);
if (duration === null) {
return;
}

View file

@ -142,7 +142,7 @@ export function populateValidationMessages(
basicValidations.bucketSpan.message = msg;
} else if (validationResults.contains('bucket_span_invalid')) {
basicValidations.bucketSpan.valid = false;
basicValidations.bucketSpan.message = invalidTimeFormatMessage(
basicValidations.bucketSpan.message = invalidTimeIntervalMessage(
jobConfig.analysis_config.bucket_span
);
}
@ -163,12 +163,12 @@ export function populateValidationMessages(
if (validationResults.contains('query_delay_invalid')) {
basicValidations.queryDelay.valid = false;
basicValidations.queryDelay.message = invalidTimeFormatMessage(datafeedConfig.query_delay);
basicValidations.queryDelay.message = invalidTimeIntervalMessage(datafeedConfig.query_delay);
}
if (validationResults.contains('frequency_invalid')) {
basicValidations.frequency.valid = false;
basicValidations.frequency.message = invalidTimeFormatMessage(datafeedConfig.frequency);
basicValidations.frequency.message = invalidTimeIntervalMessage(datafeedConfig.frequency);
}
}
@ -202,16 +202,18 @@ export function checkForExistingJobAndGroupIds(
};
}
function invalidTimeFormatMessage(value: string | undefined) {
function invalidTimeIntervalMessage(value: string | undefined) {
return i18n.translate(
'xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage',
{
defaultMessage:
'{value} is not a valid time interval format e.g. {tenMinutes}, {oneHour}. It also needs to be higher than zero.',
'{value} is not a valid time interval format e.g. {thirtySeconds}, {tenMinutes}, {oneHour}, {sevenDays}. It also needs to be higher than zero.',
values: {
value,
thirtySeconds: '30s',
tenMinutes: '10m',
oneHour: '1h',
sevenDays: '7d',
},
}
);

View file

@ -14,7 +14,11 @@ import { getFieldFormats, getUiSettings } from './dependency_cache';
import { FIELD_FORMAT_IDS, UI_SETTINGS } from '../../../../../../src/plugins/data/public';
const unitsDesc = dateMath.unitsDesc;
const largeMax = unitsDesc.indexOf('w'); // Multiple units of week or longer converted to days for ES intervals.
// Index of the list of time interval units at which larger units (i.e. weeks, months, years) need
// need to be converted to multiples of the largest unit supported in ES aggregation intervals (i.e. days).
// Note that similarly the largest interval supported for ML bucket spans is 'd'.
const timeUnitsMaxSupportedIndex = unitsDesc.indexOf('w');
const calcAuto = timeBucketsCalcAutoIntervalProvider();
@ -383,9 +387,11 @@ export function calcEsInterval(duration) {
const val = duration.as(unit);
// find a unit that rounds neatly
if (val >= 1 && Math.floor(val) === val) {
// if the unit is "large", like years, but isn't set to 1, ES will throw an error.
// Apart from for date histograms, ES only supports time units up to 'd',
// meaning we can't for example use 'w' for job bucket spans.
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
// So keep going until we get out of the "large" units.
if (i <= largeMax && val !== 1) {
if (i <= timeUnitsMaxSupportedIndex) {
continue;
}

View file

@ -232,14 +232,14 @@ describe('ML - time buckets', () => {
expression: '3d',
});
expect(calcEsInterval(moment.duration(7, 'd'))).toEqual({
value: 1,
unit: 'w',
expression: '1w',
value: 7,
unit: 'd',
expression: '7d',
});
expect(calcEsInterval(moment.duration(1, 'w'))).toEqual({
value: 1,
unit: 'w',
expression: '1w',
value: 7,
unit: 'd',
expression: '7d',
});
expect(calcEsInterval(moment.duration(4, 'w'))).toEqual({
value: 28,
@ -247,19 +247,19 @@ describe('ML - time buckets', () => {
expression: '28d',
});
expect(calcEsInterval(moment.duration(1, 'M'))).toEqual({
value: 1,
unit: 'M',
expression: '1M',
value: 30,
unit: 'd',
expression: '30d',
});
expect(calcEsInterval(moment.duration(12, 'M'))).toEqual({
value: 1,
unit: 'y',
expression: '1y',
value: 365,
unit: 'd',
expression: '365d',
});
expect(calcEsInterval(moment.duration(1, 'y'))).toEqual({
value: 1,
unit: 'y',
expression: '1y',
value: 365,
unit: 'd',
expression: '365d',
});
});
});

View file

@ -133,11 +133,11 @@ describe('ML - validateJob', () => {
});
};
it('invalid bucket span formats', () => {
const invalidBucketSpanFormats = ['a', '10', '$'];
const invalidBucketSpanFormats = ['a', '10', '$', '500ms', '1w', '2M', '1y'];
return bucketSpanFormatTests(invalidBucketSpanFormats, 'bucket_span_invalid');
});
it('valid bucket span formats', () => {
const validBucketSpanFormats = ['1s', '4h', '10d', '6w', '2m', '3y'];
const validBucketSpanFormats = ['5000ms', '1s', '2m', '4h', '10d'];
return bucketSpanFormatTests(validBucketSpanFormats, 'bucket_span_valid');
});

View file

@ -5,9 +5,8 @@
*/
import { estimateBucketSpanFactory } from '../../models/bucket_span_estimator';
import { mlFunctionToESAggregation } from '../../../common/util/job_utils';
import { mlFunctionToESAggregation, parseTimeIntervalForJob } from '../../../common/util/job_utils';
import { SKIP_BUCKET_SPAN_ESTIMATION } from '../../../common/constants/validation';
import { parseInterval } from '../../../common/util/parse_interval';
import { validateJobObject } from './validate_job_object';
@ -65,8 +64,11 @@ export async function validateBucketSpan(
}
const messages = [];
const parsedBucketSpan = parseInterval(job.analysis_config.bucket_span);
if (parsedBucketSpan === null || parsedBucketSpan.asMilliseconds() === 0) {
// Bucket span must be a valid interval, greater than 0,
// and if specified in ms must be a multiple of 1000ms
const parsedBucketSpan = parseTimeIntervalForJob(job.analysis_config.bucket_span);
if (parsedBucketSpan === null) {
messages.push({ id: 'bucket_span_invalid' });
return messages;
}

View file

@ -78,7 +78,7 @@ export async function validateTimeRange(
}
// check for minimum time range (25 buckets or 2 hours, whichever is longer)
const interval = parseInterval(job.analysis_config.bucket_span);
const interval = parseInterval(job.analysis_config.bucket_span, true);
if (interval === null) {
messages.push({ id: 'bucket_span_invalid' });
} else {

View file

@ -10691,7 +10691,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "開始日",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "バケットスパンを設定する必要があります",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "重複する検知器が検出されました。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} は有効な時間間隔のフォーマット (例: {tenMinutes}、{oneHour}) ではありません。また、0 よりも大きい数字である必要があります。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "グループ ID が既に存在します。グループ ID は既存のジョブやグループと同じにできません。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "ジョブグループ名にはアルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "ジョブグループ名は {maxLength, plural, one {# 文字} other {# 文字}} 以内でなければなりません。",

View file

@ -10695,7 +10695,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "开始日期",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "必须设置存储桶跨度",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "找到重复的检测工具。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} 不是有效的时间间隔格式,例如,{tenMinutes}、{oneHour}。还需要大于零。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "组 ID 已存在。组 ID 不能与现有作业或组相同。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "作业组名称可以包含小写字母数字a-z 和 0-9、连字符或下划线必须以字母数字字符开头和结尾",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "作业组名称的长度不得超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",