kibana/x-pack/plugins/ml/common/util/job_utils.test.ts
Quynh Nguyen 7868a569eb
[ML] Fix datafeed start time is incorrect when the job has trailing empty buckets (#71976)
Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
2020-07-16 14:42:34 -05:00

605 lines
22 KiB
TypeScript

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import {
calculateDatafeedFrequencyDefaultSeconds,
isTimeSeriesViewJob,
isTimeSeriesViewDetector,
isSourceDataChartableForDetector,
isModelPlotChartableForDetector,
getPartitioningFieldNames,
isModelPlotEnabled,
isJobVersionGte,
mlFunctionToESAggregation,
isJobIdValid,
prefixDatafeedId,
getSafeAggregationName,
getLatestDataOrBucketTimestamp,
getEarliestDatafeedStartTime,
} from './job_utils';
import { CombinedJob, Job } from '../types/anomaly_detection_jobs';
import moment from 'moment';
describe('ML - job utils', () => {
describe('calculateDatafeedFrequencyDefaultSeconds', () => {
test('returns correct frequency for 119', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(119);
expect(result).toBe(60);
});
test('returns correct frequency for 120', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(120);
expect(result).toBe(60);
});
test('returns correct frequency for 300', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(300);
expect(result).toBe(150);
});
test('returns correct frequency for 601', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(601);
expect(result).toBe(300);
});
test('returns correct frequency for 43200', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(43200);
expect(result).toBe(600);
});
test('returns correct frequency for 43201', () => {
const result = calculateDatafeedFrequencyDefaultSeconds(43201);
expect(result).toBe(3600);
});
});
describe('isTimeSeriesViewJob', () => {
test('returns true when job has a single detector with a metric function', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'high_count',
partition_field_name: 'status',
detector_description: 'High count status code',
},
],
},
} as unknown) as CombinedJob;
expect(isTimeSeriesViewJob(job)).toBe(true);
});
test('returns true when job has at least one detector with a metric function', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'high_count',
partition_field_name: 'status',
detector_description: 'High count status code',
},
{
function: 'freq_rare',
by_field_name: 'uri',
over_field_name: 'clientip',
detector_description: 'Freq rare URI',
},
],
},
} as unknown) as CombinedJob;
expect(isTimeSeriesViewJob(job)).toBe(true);
});
test('returns false when job does not have at least one detector with a metric function', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'varp',
by_field_name: 'responsetime',
detector_description: 'Varp responsetime',
},
{
function: 'freq_rare',
by_field_name: 'uri',
over_field_name: 'clientip',
detector_description: 'Freq rare URI',
},
],
},
} as unknown) as CombinedJob;
expect(isTimeSeriesViewJob(job)).toBe(false);
});
test('returns false when job has a single count by category detector', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'count',
by_field_name: 'mlcategory',
detector_description: 'Count by category',
},
],
},
} as unknown) as CombinedJob;
expect(isTimeSeriesViewJob(job)).toBe(false);
});
});
describe('isTimeSeriesViewDetector', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'sum',
field_name: 'bytes',
partition_field_name: 'clientip',
detector_description: 'High bytes client IP',
},
{
function: 'freq_rare',
by_field_name: 'uri',
over_field_name: 'clientip',
detector_description: 'Freq rare URI',
},
{
function: 'count',
by_field_name: 'mlcategory',
detector_description: 'Count by category',
},
{ function: 'count', by_field_name: 'hrd', detector_description: 'count by hrd' },
{ function: 'mean', field_name: 'NetworkDiff', detector_description: 'avg NetworkDiff' },
],
},
datafeed_config: {
script_fields: {
hrd: {
script: {
inline: 'return domainSplit(doc["query"].value, params).get(1);',
lang: 'painless',
},
},
NetworkDiff: {
script: {
source: 'doc["NetworkOut"].value - doc["NetworkIn"].value',
lang: 'painless',
},
},
},
},
} as unknown) as CombinedJob;
test('returns true for a detector with a metric function', () => {
expect(isTimeSeriesViewDetector(job, 0)).toBe(true);
});
test('returns false for a detector with a non-metric function', () => {
expect(isTimeSeriesViewDetector(job, 1)).toBe(false);
});
test('returns false for a detector using count on an mlcategory field', () => {
expect(isTimeSeriesViewDetector(job, 2)).toBe(false);
});
test('returns false for a detector using a script field as a by field', () => {
expect(isTimeSeriesViewDetector(job, 3)).toBe(false);
});
test('returns false for a detector using a script field as a metric field_name', () => {
expect(isTimeSeriesViewDetector(job, 4)).toBe(false);
});
});
describe('isSourceDataChartableForDetector', () => {
const job = ({
analysis_config: {
detectors: [
{ function: 'count' }, // 0
{ function: 'low_count' }, // 1
{ function: 'high_count' }, // 2
{ function: 'non_zero_count' }, // 3
{ function: 'low_non_zero_count' }, // 4
{ function: 'high_non_zero_count' }, // 5
{ function: 'distinct_count' }, // 6
{ function: 'low_distinct_count' }, // 7
{ function: 'high_distinct_count' }, // 8
{ function: 'metric' }, // 9
{ function: 'mean' }, // 10
{ function: 'low_mean' }, // 11
{ function: 'high_mean' }, // 12
{ function: 'median' }, // 13
{ function: 'low_median' }, // 14
{ function: 'high_median' }, // 15
{ function: 'min' }, // 16
{ function: 'max' }, // 17
{ function: 'sum' }, // 18
{ function: 'low_sum' }, // 19
{ function: 'high_sum' }, // 20
{ function: 'non_null_sum' }, // 21
{ function: 'low_non_null_sum' }, // 22
{ function: 'high_non_null_sum' }, // 23
{ function: 'rare' }, // 24
{ function: 'count', by_field_name: 'mlcategory' }, // 25
{ function: 'count', by_field_name: 'hrd' }, // 26
{ function: 'freq_rare' }, // 27
{ function: 'info_content' }, // 28
{ function: 'low_info_content' }, // 29
{ function: 'high_info_content' }, // 30
{ function: 'varp' }, // 31
{ function: 'low_varp' }, // 32
{ function: 'high_varp' }, // 33
{ function: 'time_of_day' }, // 34
{ function: 'time_of_week' }, // 35
{ function: 'lat_long' }, // 36
{ function: 'mean', field_name: 'NetworkDiff' }, // 37
],
},
datafeed_config: {
script_fields: {
hrd: {
script: {
inline: 'return domainSplit(doc["query"].value, params).get(1);',
lang: 'painless',
},
},
NetworkDiff: {
script: {
source: 'doc["NetworkOut"].value - doc["NetworkIn"].value',
lang: 'painless',
},
},
},
},
} as unknown) as CombinedJob;
test('returns true for expected detectors', () => {
expect(isSourceDataChartableForDetector(job, 0)).toBe(true);
expect(isSourceDataChartableForDetector(job, 1)).toBe(true);
expect(isSourceDataChartableForDetector(job, 2)).toBe(true);
expect(isSourceDataChartableForDetector(job, 3)).toBe(true);
expect(isSourceDataChartableForDetector(job, 4)).toBe(true);
expect(isSourceDataChartableForDetector(job, 5)).toBe(true);
expect(isSourceDataChartableForDetector(job, 6)).toBe(true);
expect(isSourceDataChartableForDetector(job, 7)).toBe(true);
expect(isSourceDataChartableForDetector(job, 8)).toBe(true);
expect(isSourceDataChartableForDetector(job, 9)).toBe(true);
expect(isSourceDataChartableForDetector(job, 10)).toBe(true);
expect(isSourceDataChartableForDetector(job, 11)).toBe(true);
expect(isSourceDataChartableForDetector(job, 12)).toBe(true);
expect(isSourceDataChartableForDetector(job, 13)).toBe(true);
expect(isSourceDataChartableForDetector(job, 14)).toBe(true);
expect(isSourceDataChartableForDetector(job, 15)).toBe(true);
expect(isSourceDataChartableForDetector(job, 16)).toBe(true);
expect(isSourceDataChartableForDetector(job, 17)).toBe(true);
expect(isSourceDataChartableForDetector(job, 18)).toBe(true);
expect(isSourceDataChartableForDetector(job, 19)).toBe(true);
expect(isSourceDataChartableForDetector(job, 20)).toBe(true);
expect(isSourceDataChartableForDetector(job, 21)).toBe(true);
expect(isSourceDataChartableForDetector(job, 22)).toBe(true);
expect(isSourceDataChartableForDetector(job, 23)).toBe(true);
expect(isSourceDataChartableForDetector(job, 24)).toBe(true);
});
test('returns false for expected detectors', () => {
expect(isSourceDataChartableForDetector(job, 25)).toBe(false);
expect(isSourceDataChartableForDetector(job, 26)).toBe(false);
expect(isSourceDataChartableForDetector(job, 27)).toBe(false);
expect(isSourceDataChartableForDetector(job, 28)).toBe(false);
expect(isSourceDataChartableForDetector(job, 29)).toBe(false);
expect(isSourceDataChartableForDetector(job, 30)).toBe(false);
expect(isSourceDataChartableForDetector(job, 31)).toBe(false);
expect(isSourceDataChartableForDetector(job, 32)).toBe(false);
expect(isSourceDataChartableForDetector(job, 33)).toBe(false);
expect(isSourceDataChartableForDetector(job, 34)).toBe(false);
expect(isSourceDataChartableForDetector(job, 35)).toBe(false);
expect(isSourceDataChartableForDetector(job, 36)).toBe(false);
expect(isSourceDataChartableForDetector(job, 37)).toBe(false);
});
});
describe('isModelPlotChartableForDetector', () => {
const job1 = ({
analysis_config: {
detectors: [{ function: 'count' }],
},
} as unknown) as Job;
const job2 = ({
analysis_config: {
detectors: [
{ function: 'count' },
{ function: 'info_content' },
{
function: 'rare',
by_field_name: 'mlcategory',
},
],
},
model_plot_config: {
enabled: true,
},
} as unknown) as Job;
test('returns false when model plot is not enabled', () => {
expect(isModelPlotChartableForDetector(job1, 0)).toBe(false);
});
test('returns true for count detector when model plot is enabled', () => {
expect(isModelPlotChartableForDetector(job2, 0)).toBe(true);
});
test('returns true for info_content detector when model plot is enabled', () => {
expect(isModelPlotChartableForDetector(job2, 1)).toBe(true);
});
test('returns false for rare by mlcategory when model plot is enabled', () => {
expect(isModelPlotChartableForDetector(job2, 2)).toBe(false);
});
});
describe('getPartitioningFieldNames', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'count',
detector_description: 'count',
},
{
function: 'count',
partition_field_name: 'clientip',
detector_description: 'Count by clientip',
},
{
function: 'freq_rare',
by_field_name: 'uri',
over_field_name: 'clientip',
detector_description: 'Freq rare URI',
},
{
function: 'sum',
field_name: 'bytes',
by_field_name: 'uri',
over_field_name: 'clientip',
partition_field_name: 'method',
detector_description: 'sum bytes',
},
],
},
} as unknown) as CombinedJob;
test('returns empty array for a detector with no partitioning fields', () => {
const resp = getPartitioningFieldNames(job, 0);
expect(resp).toEqual([]);
});
test('returns expected array for a detector with a partition field', () => {
const resp = getPartitioningFieldNames(job, 1);
expect(resp).toEqual(['clientip']);
});
test('returns expected array for a detector with by and over fields', () => {
const resp = getPartitioningFieldNames(job, 2);
expect(resp).toEqual(['uri', 'clientip']);
});
test('returns expected array for a detector with partition, by and over fields', () => {
const resp = getPartitioningFieldNames(job, 3);
expect(resp).toEqual(['method', 'uri', 'clientip']);
});
});
describe('isModelPlotEnabled', () => {
test('returns true for a job in which model plot has been enabled', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'high_count',
partition_field_name: 'status',
detector_description: 'High count status code',
},
],
},
model_plot_config: {
enabled: true,
},
} as unknown) as Job;
expect(isModelPlotEnabled(job, 0)).toBe(true);
});
test('returns expected values for a job in which model plot has been enabled with terms', () => {
const job = ({
analysis_config: {
detectors: [
{
function: 'max',
field_name: 'responsetime',
partition_field_name: 'country',
by_field_name: 'airline',
},
],
},
model_plot_config: {
enabled: true,
terms: 'US,AAL',
},
} as unknown) as Job;
expect(
isModelPlotEnabled(job, 0, [
{ fieldName: 'country', fieldValue: 'US' },
{ fieldName: 'airline', fieldValue: 'AAL' },
])
).toBe(true);
expect(isModelPlotEnabled(job, 0, [{ fieldName: 'country', fieldValue: 'US' }])).toBe(false);
expect(
isModelPlotEnabled(job, 0, [
{ fieldName: 'country', fieldValue: 'GB' },
{ fieldName: 'airline', fieldValue: 'AAL' },
])
).toBe(false);
expect(
isModelPlotEnabled(job, 0, [
{ fieldName: 'country', fieldValue: 'JP' },
{ fieldName: 'airline', fieldValue: 'JAL' },
])
).toBe(false);
});
test('returns true for jobs in which model plot has not been enabled', () => {
const job1 = ({
analysis_config: {
detectors: [
{
function: 'high_count',
partition_field_name: 'status',
detector_description: 'High count status code',
},
],
},
model_plot_config: {
enabled: false,
},
} as unknown) as CombinedJob;
const job2 = ({} as unknown) as CombinedJob;
expect(isModelPlotEnabled(job1, 0)).toBe(false);
expect(isModelPlotEnabled(job2, 0)).toBe(false);
});
});
describe('isJobVersionGte', () => {
const job = ({
job_version: '6.1.1',
} as unknown) as CombinedJob;
test('returns true for later job version', () => {
expect(isJobVersionGte(job, '6.1.0')).toBe(true);
});
test('returns true for equal job version', () => {
expect(isJobVersionGte(job, '6.1.1')).toBe(true);
});
test('returns false for earlier job version', () => {
expect(isJobVersionGte(job, '6.1.2')).toBe(false);
});
});
describe('mlFunctionToESAggregation', () => {
test('returns correct ES aggregation type for ML function', () => {
expect(mlFunctionToESAggregation('count')).toBe('count');
expect(mlFunctionToESAggregation('low_count')).toBe('count');
expect(mlFunctionToESAggregation('high_count')).toBe('count');
expect(mlFunctionToESAggregation('non_zero_count')).toBe('count');
expect(mlFunctionToESAggregation('low_non_zero_count')).toBe('count');
expect(mlFunctionToESAggregation('high_non_zero_count')).toBe('count');
expect(mlFunctionToESAggregation('distinct_count')).toBe('cardinality');
expect(mlFunctionToESAggregation('low_distinct_count')).toBe('cardinality');
expect(mlFunctionToESAggregation('high_distinct_count')).toBe('cardinality');
expect(mlFunctionToESAggregation('metric')).toBe('avg');
expect(mlFunctionToESAggregation('mean')).toBe('avg');
expect(mlFunctionToESAggregation('low_mean')).toBe('avg');
expect(mlFunctionToESAggregation('high_mean')).toBe('avg');
expect(mlFunctionToESAggregation('min')).toBe('min');
expect(mlFunctionToESAggregation('max')).toBe('max');
expect(mlFunctionToESAggregation('sum')).toBe('sum');
expect(mlFunctionToESAggregation('low_sum')).toBe('sum');
expect(mlFunctionToESAggregation('high_sum')).toBe('sum');
expect(mlFunctionToESAggregation('non_null_sum')).toBe('sum');
expect(mlFunctionToESAggregation('low_non_null_sum')).toBe('sum');
expect(mlFunctionToESAggregation('high_non_null_sum')).toBe('sum');
expect(mlFunctionToESAggregation('rare')).toBe('count');
expect(mlFunctionToESAggregation('freq_rare')).toBe(null);
expect(mlFunctionToESAggregation('info_content')).toBe(null);
expect(mlFunctionToESAggregation('low_info_content')).toBe(null);
expect(mlFunctionToESAggregation('high_info_content')).toBe(null);
expect(mlFunctionToESAggregation('median')).toBe('percentiles');
expect(mlFunctionToESAggregation('low_median')).toBe('percentiles');
expect(mlFunctionToESAggregation('high_median')).toBe('percentiles');
expect(mlFunctionToESAggregation('varp')).toBe(null);
expect(mlFunctionToESAggregation('low_varp')).toBe(null);
expect(mlFunctionToESAggregation('high_varp')).toBe(null);
expect(mlFunctionToESAggregation('time_of_day')).toBe(null);
expect(mlFunctionToESAggregation('time_of_week')).toBe(null);
expect(mlFunctionToESAggregation('lat_long')).toBe(null);
});
});
describe('isJobIdValid', () => {
test('returns true for job id: "good_job-name"', () => {
expect(isJobIdValid('good_job-name')).toBe(true);
});
test('returns false for job id: "_bad_job-name"', () => {
expect(isJobIdValid('_bad_job-name')).toBe(false);
});
test('returns false for job id: "bad_job-name_"', () => {
expect(isJobIdValid('bad_job-name_')).toBe(false);
});
test('returns false for job id: "-bad_job-name"', () => {
expect(isJobIdValid('-bad_job-name')).toBe(false);
});
test('returns false for job id: "bad_job-name-"', () => {
expect(isJobIdValid('bad_job-name-')).toBe(false);
});
test('returns false for job id: "bad&job-name"', () => {
expect(isJobIdValid('bad&job-name')).toBe(false);
});
});
describe('prefixDatafeedId', () => {
test('returns datafeed-prefix-job from datafeed-job"', () => {
expect(prefixDatafeedId('datafeed-job', 'prefix-')).toBe('datafeed-prefix-job');
});
test('returns datafeed-prefix-job from job"', () => {
expect(prefixDatafeedId('job', 'prefix-')).toBe('datafeed-prefix-job');
});
});
describe('getSafeAggregationName', () => {
test('"foo" should be "foo"', () => {
expect(getSafeAggregationName('foo', 0)).toBe('foo');
});
test('"foo.bar" should be "foo.bar"', () => {
expect(getSafeAggregationName('foo.bar', 0)).toBe('foo.bar');
});
test('"foo&bar" should be "field_0"', () => {
expect(getSafeAggregationName('foo&bar', 0)).toBe('field_0');
});
});
describe('getLatestDataOrBucketTimestamp', () => {
test('returns expected value when no gap in data at end of bucket processing', () => {
expect(getLatestDataOrBucketTimestamp(1549929594000, 1549928700000)).toBe(1549929594000);
});
test('returns expected value when there is a gap in data at end of bucket processing', () => {
expect(getLatestDataOrBucketTimestamp(1549929594000, 1562256600000)).toBe(1562256600000);
});
test('returns expected value when job has not run', () => {
expect(getLatestDataOrBucketTimestamp(undefined, undefined)).toBe(undefined);
});
});
describe('getEarliestDatafeedStartTime', () => {
test('returns expected value when no gap in data at end of bucket processing', () => {
expect(getEarliestDatafeedStartTime(1549929594000, 1549928700000)).toBe(1549929594000);
});
test('returns expected value when there is a gap in data at end of bucket processing', () => {
expect(getEarliestDatafeedStartTime(1549929594000, 1562256600000)).toBe(1562256600000);
});
test('returns expected value when bucket span is provided', () => {
expect(
getEarliestDatafeedStartTime(1549929594000, 1562256600000, moment.duration(1, 'h'))
).toBe(1562260200000);
});
test('returns expected value when job has not run', () => {
expect(getLatestDataOrBucketTimestamp(undefined, undefined)).toBe(undefined);
});
});
});