[ML] Fixes anomaly chart and validation for one week bucket span (#69671)

* [ML] Fixes anomaly chart and validation for one week bucket span

* [ML] Fix interval Jest tests
This commit is contained in:
Pete Harverson 2020-06-24 09:20:38 +01:00 committed by GitHub
parent 33fb3e832c
commit b270321ff3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 99 additions and 46 deletions

View file

@ -6,6 +6,7 @@
import _ from 'lodash'; import _ from 'lodash';
import semver from 'semver'; import semver from 'semver';
import { Duration } from 'moment';
// @ts-ignore // @ts-ignore
import numeral from '@elastic/numeral'; import numeral from '@elastic/numeral';
@ -433,7 +434,7 @@ export function basicJobValidation(
messages.push({ id: 'bucket_span_empty' }); messages.push({ id: 'bucket_span_empty' });
valid = false; valid = false;
} else { } else {
if (isValidTimeFormat(job.analysis_config.bucket_span)) { if (isValidTimeInterval(job.analysis_config.bucket_span)) {
messages.push({ messages.push({
id: 'bucket_span_valid', id: 'bucket_span_valid',
bucketSpan: job.analysis_config.bucket_span, bucketSpan: job.analysis_config.bucket_span,
@ -490,14 +491,14 @@ export function basicDatafeedValidation(datafeed: Datafeed): ValidationResults {
if (datafeed) { if (datafeed) {
let queryDelayMessage = { id: 'query_delay_valid' }; let queryDelayMessage = { id: 'query_delay_valid' };
if (isValidTimeFormat(datafeed.query_delay) === false) { if (isValidTimeInterval(datafeed.query_delay) === false) {
queryDelayMessage = { id: 'query_delay_invalid' }; queryDelayMessage = { id: 'query_delay_invalid' };
valid = false; valid = false;
} }
messages.push(queryDelayMessage); messages.push(queryDelayMessage);
let frequencyMessage = { id: 'frequency_valid' }; let frequencyMessage = { id: 'frequency_valid' };
if (isValidTimeFormat(datafeed.frequency) === false) { if (isValidTimeInterval(datafeed.frequency) === false) {
frequencyMessage = { id: 'frequency_invalid' }; frequencyMessage = { id: 'frequency_invalid' };
valid = false; valid = false;
} }
@ -591,12 +592,33 @@ export function validateGroupNames(job: Job): ValidationResults {
}; };
} }
function isValidTimeFormat(value: string | undefined): boolean { /**
* Parses the supplied string to a time interval suitable for use in an ML anomaly
* detection job or datafeed.
* @param value the string to parse
* @return {Duration} the parsed interval, or null if it does not represent a valid
* time interval.
*/
export function parseTimeIntervalForJob(value: string | undefined): Duration | null {
if (value === undefined) {
return null;
}
// Must be a valid interval, greater than zero,
// and if specified in ms must be a multiple of 1000ms.
const interval = parseInterval(value, true);
return interval !== null && interval.asMilliseconds() !== 0 && interval.milliseconds() === 0
? interval
: null;
}
// Checks that the value for a field which represents a time interval,
// such as a job bucket span or datafeed query delay, is valid.
function isValidTimeInterval(value: string | undefined): boolean {
if (value === undefined) { if (value === undefined) {
return true; return true;
} }
const interval = parseInterval(value); return parseTimeIntervalForJob(value) !== null;
return interval !== null && interval.asMilliseconds() !== 0;
} }
// Returns the latest of the last source data and last processed bucket timestamp, // Returns the latest of the last source data and last processed bucket timestamp,

View file

@ -7,7 +7,7 @@
import { parseInterval } from './parse_interval'; import { parseInterval } from './parse_interval';
describe('ML parse interval util', () => { describe('ML parse interval util', () => {
test('correctly parses an interval containing unit and value', () => { test('should correctly parse an interval containing a valid unit and value', () => {
expect(parseInterval('1d')!.as('d')).toBe(1); expect(parseInterval('1d')!.as('d')).toBe(1);
expect(parseInterval('2y')!.as('y')).toBe(2); expect(parseInterval('2y')!.as('y')).toBe(2);
expect(parseInterval('5M')!.as('M')).toBe(5); expect(parseInterval('5M')!.as('M')).toBe(5);
@ -20,15 +20,25 @@ describe('ML parse interval util', () => {
expect(parseInterval('0s')!.as('h')).toBe(0); expect(parseInterval('0s')!.as('h')).toBe(0);
}); });
test('correctly handles zero value intervals', () => { test('should correctly handle zero value intervals', () => {
expect(parseInterval('0h')!.as('h')).toBe(0); expect(parseInterval('0h')!.as('h')).toBe(0);
expect(parseInterval('0d')).toBe(null); expect(parseInterval('0d')).toBe(null);
}); });
test('returns null for an invalid interval', () => { test('should return null for an invalid interval', () => {
expect(parseInterval('')).toBe(null); expect(parseInterval('')).toBe(null);
expect(parseInterval('234asdf')).toBe(null); expect(parseInterval('234asdf')).toBe(null);
expect(parseInterval('m')).toBe(null); expect(parseInterval('m')).toBe(null);
expect(parseInterval('1.5h')).toBe(null); expect(parseInterval('1.5h')).toBe(null);
}); });
test('should correctly check for whether the interval units are valid Elasticsearch time units', () => {
expect(parseInterval('100s', true)!.as('s')).toBe(100);
expect(parseInterval('5m', true)!.as('m')).toBe(5);
expect(parseInterval('24h', true)!.as('h')).toBe(24);
expect(parseInterval('7d', true)!.as('d')).toBe(7);
expect(parseInterval('1w', true)).toBe(null);
expect(parseInterval('1M', true)).toBe(null);
expect(parseInterval('1y', true)).toBe(null);
});
}); });

View file

@ -16,7 +16,15 @@ const INTERVAL_STRING_RE = new RegExp('^([0-9]*)\\s*(' + dateMath.units.join('|'
// for units of hour or less. // for units of hour or less.
const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h']; const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];
// List of time units which are supported for use in Elasticsearch durations
// (such as anomaly detection job bucket spans)
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
const SUPPORT_ES_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h', 'd'];
// Parses an interval String, such as 7d, 1h or 30m to a moment duration. // Parses an interval String, such as 7d, 1h or 30m to a moment duration.
// Optionally carries out an additional check that the interval is supported as a
// time unit by Elasticsearch, as units greater than 'd' for example cannot be used
// for anomaly detection job bucket spans.
// Differs from the Kibana ui/utils/parse_interval in the following ways: // Differs from the Kibana ui/utils/parse_interval in the following ways:
// 1. A value-less interval such as 'm' is not allowed - in line with the ML back-end // 1. A value-less interval such as 'm' is not allowed - in line with the ML back-end
// not accepting such interval Strings for the bucket span of a job. // not accepting such interval Strings for the bucket span of a job.
@ -25,7 +33,7 @@ const SUPPORT_ZERO_DURATION_UNITS: SupportedUnits[] = ['ms', 's', 'm', 'h'];
// to work with units less than 'day'. // to work with units less than 'day'.
// 3. Fractional intervals e.g. 1.5h or 4.5d are not allowed, in line with the behaviour // 3. Fractional intervals e.g. 1.5h or 4.5d are not allowed, in line with the behaviour
// of the Elasticsearch date histogram aggregation. // of the Elasticsearch date histogram aggregation.
export function parseInterval(interval: string): Duration | null { export function parseInterval(interval: string, checkValidEsUnit = false): Duration | null {
const matches = String(interval).trim().match(INTERVAL_STRING_RE); const matches = String(interval).trim().match(INTERVAL_STRING_RE);
if (!Array.isArray(matches) || matches.length < 3) { if (!Array.isArray(matches) || matches.length < 3) {
return null; return null;
@ -36,8 +44,13 @@ export function parseInterval(interval: string): Duration | null {
const unit = matches[2] as SupportedUnits; const unit = matches[2] as SupportedUnits;
// In line with moment.js, only allow zero value intervals when the unit is less than 'day'. // In line with moment.js, only allow zero value intervals when the unit is less than 'day'.
// And check for isNaN as e.g. valueless 'm' will pass the regex test. // And check for isNaN as e.g. valueless 'm' will pass the regex test,
if (isNaN(value) || (value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1)) { // plus an optional check that the unit is not w/M/y which are not fully supported by ES.
if (
isNaN(value) ||
(value < 1 && SUPPORT_ZERO_DURATION_UNITS.indexOf(unit) === -1) ||
(checkValidEsUnit === true && SUPPORT_ES_DURATION_UNITS.indexOf(unit) === -1)
) {
return null; return null;
} }

View file

@ -155,7 +155,7 @@ export class JobCreator {
} }
protected _setBucketSpanMs(bucketSpan: BucketSpan) { protected _setBucketSpanMs(bucketSpan: BucketSpan) {
const bs = parseInterval(bucketSpan); const bs = parseInterval(bucketSpan, true);
this._bucketSpanMs = bs === null ? 0 : bs.asMilliseconds(); this._bucketSpanMs = bs === null ? 0 : bs.asMilliseconds();
} }

View file

@ -76,7 +76,7 @@ export class SingleMetricJobCreator extends JobCreator {
const functionName = this._aggs[0].dslName; const functionName = this._aggs[0].dslName;
const timeField = this._job_config.data_description.time_field; const timeField = this._job_config.data_description.time_field;
const duration = parseInterval(this._job_config.analysis_config.bucket_span); const duration = parseInterval(this._job_config.analysis_config.bucket_span, true);
if (duration === null) { if (duration === null) {
return; return;
} }

View file

@ -142,7 +142,7 @@ export function populateValidationMessages(
basicValidations.bucketSpan.message = msg; basicValidations.bucketSpan.message = msg;
} else if (validationResults.contains('bucket_span_invalid')) { } else if (validationResults.contains('bucket_span_invalid')) {
basicValidations.bucketSpan.valid = false; basicValidations.bucketSpan.valid = false;
basicValidations.bucketSpan.message = invalidTimeFormatMessage( basicValidations.bucketSpan.message = invalidTimeIntervalMessage(
jobConfig.analysis_config.bucket_span jobConfig.analysis_config.bucket_span
); );
} }
@ -163,12 +163,12 @@ export function populateValidationMessages(
if (validationResults.contains('query_delay_invalid')) { if (validationResults.contains('query_delay_invalid')) {
basicValidations.queryDelay.valid = false; basicValidations.queryDelay.valid = false;
basicValidations.queryDelay.message = invalidTimeFormatMessage(datafeedConfig.query_delay); basicValidations.queryDelay.message = invalidTimeIntervalMessage(datafeedConfig.query_delay);
} }
if (validationResults.contains('frequency_invalid')) { if (validationResults.contains('frequency_invalid')) {
basicValidations.frequency.valid = false; basicValidations.frequency.valid = false;
basicValidations.frequency.message = invalidTimeFormatMessage(datafeedConfig.frequency); basicValidations.frequency.message = invalidTimeIntervalMessage(datafeedConfig.frequency);
} }
} }
@ -202,16 +202,18 @@ export function checkForExistingJobAndGroupIds(
}; };
} }
function invalidTimeFormatMessage(value: string | undefined) { function invalidTimeIntervalMessage(value: string | undefined) {
return i18n.translate( return i18n.translate(
'xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage', 'xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage',
{ {
defaultMessage: defaultMessage:
'{value} is not a valid time interval format e.g. {tenMinutes}, {oneHour}. It also needs to be higher than zero.', '{value} is not a valid time interval format e.g. {thirtySeconds}, {tenMinutes}, {oneHour}, {sevenDays}. It also needs to be higher than zero.',
values: { values: {
value, value,
thirtySeconds: '30s',
tenMinutes: '10m', tenMinutes: '10m',
oneHour: '1h', oneHour: '1h',
sevenDays: '7d',
}, },
} }
); );

View file

@ -14,7 +14,11 @@ import { getFieldFormats, getUiSettings } from './dependency_cache';
import { FIELD_FORMAT_IDS, UI_SETTINGS } from '../../../../../../src/plugins/data/public'; import { FIELD_FORMAT_IDS, UI_SETTINGS } from '../../../../../../src/plugins/data/public';
const unitsDesc = dateMath.unitsDesc; const unitsDesc = dateMath.unitsDesc;
const largeMax = unitsDesc.indexOf('w'); // Multiple units of week or longer converted to days for ES intervals.
// Index of the list of time interval units at which larger units (i.e. weeks, months, years) need
// need to be converted to multiples of the largest unit supported in ES aggregation intervals (i.e. days).
// Note that similarly the largest interval supported for ML bucket spans is 'd'.
const timeUnitsMaxSupportedIndex = unitsDesc.indexOf('w');
const calcAuto = timeBucketsCalcAutoIntervalProvider(); const calcAuto = timeBucketsCalcAutoIntervalProvider();
@ -383,9 +387,11 @@ export function calcEsInterval(duration) {
const val = duration.as(unit); const val = duration.as(unit);
// find a unit that rounds neatly // find a unit that rounds neatly
if (val >= 1 && Math.floor(val) === val) { if (val >= 1 && Math.floor(val) === val) {
// if the unit is "large", like years, but isn't set to 1, ES will throw an error. // Apart from for date histograms, ES only supports time units up to 'd',
// meaning we can't for example use 'w' for job bucket spans.
// See https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units
// So keep going until we get out of the "large" units. // So keep going until we get out of the "large" units.
if (i <= largeMax && val !== 1) { if (i <= timeUnitsMaxSupportedIndex) {
continue; continue;
} }

View file

@ -232,14 +232,14 @@ describe('ML - time buckets', () => {
expression: '3d', expression: '3d',
}); });
expect(calcEsInterval(moment.duration(7, 'd'))).toEqual({ expect(calcEsInterval(moment.duration(7, 'd'))).toEqual({
value: 1, value: 7,
unit: 'w', unit: 'd',
expression: '1w', expression: '7d',
}); });
expect(calcEsInterval(moment.duration(1, 'w'))).toEqual({ expect(calcEsInterval(moment.duration(1, 'w'))).toEqual({
value: 1, value: 7,
unit: 'w', unit: 'd',
expression: '1w', expression: '7d',
}); });
expect(calcEsInterval(moment.duration(4, 'w'))).toEqual({ expect(calcEsInterval(moment.duration(4, 'w'))).toEqual({
value: 28, value: 28,
@ -247,19 +247,19 @@ describe('ML - time buckets', () => {
expression: '28d', expression: '28d',
}); });
expect(calcEsInterval(moment.duration(1, 'M'))).toEqual({ expect(calcEsInterval(moment.duration(1, 'M'))).toEqual({
value: 1, value: 30,
unit: 'M', unit: 'd',
expression: '1M', expression: '30d',
}); });
expect(calcEsInterval(moment.duration(12, 'M'))).toEqual({ expect(calcEsInterval(moment.duration(12, 'M'))).toEqual({
value: 1, value: 365,
unit: 'y', unit: 'd',
expression: '1y', expression: '365d',
}); });
expect(calcEsInterval(moment.duration(1, 'y'))).toEqual({ expect(calcEsInterval(moment.duration(1, 'y'))).toEqual({
value: 1, value: 365,
unit: 'y', unit: 'd',
expression: '1y', expression: '365d',
}); });
}); });
}); });

View file

@ -133,11 +133,11 @@ describe('ML - validateJob', () => {
}); });
}; };
it('invalid bucket span formats', () => { it('invalid bucket span formats', () => {
const invalidBucketSpanFormats = ['a', '10', '$']; const invalidBucketSpanFormats = ['a', '10', '$', '500ms', '1w', '2M', '1y'];
return bucketSpanFormatTests(invalidBucketSpanFormats, 'bucket_span_invalid'); return bucketSpanFormatTests(invalidBucketSpanFormats, 'bucket_span_invalid');
}); });
it('valid bucket span formats', () => { it('valid bucket span formats', () => {
const validBucketSpanFormats = ['1s', '4h', '10d', '6w', '2m', '3y']; const validBucketSpanFormats = ['5000ms', '1s', '2m', '4h', '10d'];
return bucketSpanFormatTests(validBucketSpanFormats, 'bucket_span_valid'); return bucketSpanFormatTests(validBucketSpanFormats, 'bucket_span_valid');
}); });

View file

@ -5,9 +5,8 @@
*/ */
import { estimateBucketSpanFactory } from '../../models/bucket_span_estimator'; import { estimateBucketSpanFactory } from '../../models/bucket_span_estimator';
import { mlFunctionToESAggregation } from '../../../common/util/job_utils'; import { mlFunctionToESAggregation, parseTimeIntervalForJob } from '../../../common/util/job_utils';
import { SKIP_BUCKET_SPAN_ESTIMATION } from '../../../common/constants/validation'; import { SKIP_BUCKET_SPAN_ESTIMATION } from '../../../common/constants/validation';
import { parseInterval } from '../../../common/util/parse_interval';
import { validateJobObject } from './validate_job_object'; import { validateJobObject } from './validate_job_object';
@ -65,8 +64,11 @@ export async function validateBucketSpan(
} }
const messages = []; const messages = [];
const parsedBucketSpan = parseInterval(job.analysis_config.bucket_span);
if (parsedBucketSpan === null || parsedBucketSpan.asMilliseconds() === 0) { // Bucket span must be a valid interval, greater than 0,
// and if specified in ms must be a multiple of 1000ms
const parsedBucketSpan = parseTimeIntervalForJob(job.analysis_config.bucket_span);
if (parsedBucketSpan === null) {
messages.push({ id: 'bucket_span_invalid' }); messages.push({ id: 'bucket_span_invalid' });
return messages; return messages;
} }

View file

@ -78,7 +78,7 @@ export async function validateTimeRange(
} }
// check for minimum time range (25 buckets or 2 hours, whichever is longer) // check for minimum time range (25 buckets or 2 hours, whichever is longer)
const interval = parseInterval(job.analysis_config.bucket_span); const interval = parseInterval(job.analysis_config.bucket_span, true);
if (interval === null) { if (interval === null) {
messages.push({ id: 'bucket_span_invalid' }); messages.push({ id: 'bucket_span_invalid' });
} else { } else {

View file

@ -10691,7 +10691,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "開始日", "xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "開始日",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "バケットスパンを設定する必要があります", "xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "バケットスパンを設定する必要があります",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "重複する検知器が検出されました。", "xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "重複する検知器が検出されました。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} は有効な時間間隔のフォーマット (例: {tenMinutes}、{oneHour}) ではありません。また、0 よりも大きい数字である必要があります。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "グループ ID が既に存在します。グループ ID は既存のジョブやグループと同じにできません。", "xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "グループ ID が既に存在します。グループ ID は既存のジョブやグループと同じにできません。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "ジョブグループ名にはアルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります", "xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "ジョブグループ名にはアルファベットの小文字 (a-z と 0-9)、ハイフンまたはアンダーラインが使用でき、最初と最後を英数字にする必要があります",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "ジョブグループ名は {maxLength, plural, one {# 文字} other {# 文字}} 以内でなければなりません。", "xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "ジョブグループ名は {maxLength, plural, one {# 文字} other {# 文字}} 以内でなければなりません。",

View file

@ -10695,7 +10695,6 @@
"xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "开始日期", "xpack.ml.newJob.wizard.timeRangeStep.timeRangePicker.startDateLabel": "开始日期",
"xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "必须设置存储桶跨度", "xpack.ml.newJob.wizard.validateJob.bucketSpanMustBeSetErrorMessage": "必须设置存储桶跨度",
"xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "找到重复的检测工具。", "xpack.ml.newJob.wizard.validateJob.duplicatedDetectorsErrorMessage": "找到重复的检测工具。",
"xpack.ml.newJob.wizard.validateJob.frequencyInvalidTimeIntervalFormatErrorMessage": "{value} 不是有效的时间间隔格式,例如,{tenMinutes}、{oneHour}。还需要大于零。",
"xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "组 ID 已存在。组 ID 不能与现有作业或组相同。", "xpack.ml.newJob.wizard.validateJob.groupNameAlreadyExists": "组 ID 已存在。组 ID 不能与现有作业或组相同。",
"xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "作业组名称可以包含小写字母数字a-z 和 0-9、连字符或下划线必须以字母数字字符开头和结尾", "xpack.ml.newJob.wizard.validateJob.jobGroupAllowedCharactersDescription": "作业组名称可以包含小写字母数字a-z 和 0-9、连字符或下划线必须以字母数字字符开头和结尾",
"xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "作业组名称的长度不得超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。", "xpack.ml.newJob.wizard.validateJob.jobGroupMaxLengthDescription": "作业组名称的长度不得超过 {maxLength, plural, one {# 个字符} other {# 个字符}}。",