[ML] Add support for date_nanos time field in anomaly job wizard (#59017)

* [ML] Add support for date_nanos time field in anomaly job wizard

* [ML] Edits following review

* [ML] Add functional test for creating job off date_nanos data
This commit is contained in:
Pete Harverson 2020-03-05 09:28:31 +00:00 committed by GitHub
parent b83f81458c
commit b104980b88
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 2023 additions and 81 deletions

View file

@ -495,7 +495,7 @@ export const getMessages = () => {
time_field_invalid: {
status: 'ERROR',
text: i18n.translate('xpack.ml.models.jobValidation.messages.timeFieldInvalidMessage', {
defaultMessage: `{timeField} cannot be used as the time-field because it's not a valid field of type 'date'.`,
defaultMessage: `{timeField} cannot be used as the time field because it is not a field of type 'date' or 'date_nanos'.`,
values: {
timeField: `'{{timeField}}'`,
},

View file

@ -1,80 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import _ from 'lodash';
import { ES_FIELD_TYPES } from '../../../../../../src/plugins/data/server';
import { parseInterval } from '../../../../../legacy/plugins/ml/common/util/parse_interval';
import { validateJobObject } from './validate_job_object';
const BUCKET_SPAN_COMPARE_FACTOR = 25;
const MIN_TIME_SPAN_MS = 7200000;
const MIN_TIME_SPAN_READABLE = '2 hours';
export async function isValidTimeField(callWithRequest, job) {
const index = job.datafeed_config.indices.join(',');
const timeField = job.data_description.time_field;
// check if time_field is of type 'date'
const fieldCaps = await callWithRequest('fieldCaps', {
index,
fields: [timeField],
});
// get the field's type with the following notation
// because a nested field could contain dots and confuse _.get
const fieldType = _.get(fieldCaps, `fields['${timeField}'].date.type`);
return fieldType === ES_FIELD_TYPES.DATE;
}
export async function validateTimeRange(callWithRequest, job, duration) {
const messages = [];
validateJobObject(job);
// check if time_field is of type 'date'
if (!(await isValidTimeField(callWithRequest, job))) {
messages.push({
id: 'time_field_invalid',
timeField: job.data_description.time_field,
});
// if the time field is invalid, skip all other checks
return Promise.resolve(messages);
}
// if there is no duration, do not run the estimate test
if (
typeof duration === 'undefined' ||
typeof duration.start === 'undefined' ||
typeof duration.end === 'undefined'
) {
return Promise.resolve(messages);
}
// check if time range is after the Unix epoch start
if (duration.start < 0 || duration.end < 0) {
messages.push({ id: 'time_range_before_epoch' });
}
// check for minimum time range (25 buckets or 2 hours, whichever is longer)
const bucketSpan = parseInterval(job.analysis_config.bucket_span).valueOf();
const minTimeSpanBasedOnBucketSpan = bucketSpan * BUCKET_SPAN_COMPARE_FACTOR;
const timeSpan = duration.end - duration.start;
const minRequiredTimeSpan = Math.max(MIN_TIME_SPAN_MS, minTimeSpanBasedOnBucketSpan);
if (minRequiredTimeSpan > timeSpan) {
messages.push({
id: 'time_range_short',
minTimeSpanReadable: MIN_TIME_SPAN_READABLE,
bucketSpanCompareFactor: BUCKET_SPAN_COMPARE_FACTOR,
});
}
if (messages.length === 0) {
messages.push({ id: 'success_time_range' });
}
return messages;
}

View file

@ -0,0 +1,104 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { APICaller } from 'src/core/server';
import { ES_FIELD_TYPES } from '../../../../../../src/plugins/data/server';
import { parseInterval } from '../../../../../legacy/plugins/ml/common/util/parse_interval';
import { CombinedJob } from '../../../../../legacy/plugins/ml/public/application/jobs/new_job/common/job_creator/configs';
// @ts-ignore
import { validateJobObject } from './validate_job_object';
interface ValidateTimeRangeMessage {
id: string;
timeField?: string;
minTimeSpanReadable?: string;
bucketSpanCompareFactor?: number;
}
interface TimeRange {
start: number;
end: number;
}
const BUCKET_SPAN_COMPARE_FACTOR = 25;
const MIN_TIME_SPAN_MS = 7200000;
const MIN_TIME_SPAN_READABLE = '2 hours';
export async function isValidTimeField(callAsCurrentUser: APICaller, job: CombinedJob) {
const index = job.datafeed_config.indices.join(',');
const timeField = job.data_description.time_field;
// check if time_field is of type 'date' or 'date_nanos'
const fieldCaps = await callAsCurrentUser('fieldCaps', {
index,
fields: [timeField],
});
let fieldType = fieldCaps.fields[timeField]?.date?.type;
if (fieldType === undefined) {
fieldType = fieldCaps.fields[timeField]?.date_nanos?.type;
}
return fieldType === ES_FIELD_TYPES.DATE || fieldType === ES_FIELD_TYPES.DATE_NANOS;
}
export async function validateTimeRange(
callAsCurrentUser: APICaller,
job: CombinedJob,
timeRange: TimeRange | undefined
) {
const messages: ValidateTimeRangeMessage[] = [];
validateJobObject(job);
// check if time_field is a date type
if (!(await isValidTimeField(callAsCurrentUser, job))) {
messages.push({
id: 'time_field_invalid',
timeField: job.data_description.time_field,
});
// if the time field is invalid, skip all other checks
return messages;
}
// if there is no duration, do not run the estimate test
if (
typeof timeRange === 'undefined' ||
typeof timeRange.start === 'undefined' ||
typeof timeRange.end === 'undefined'
) {
return messages;
}
// check if time range is after the Unix epoch start
if (timeRange.start < 0 || timeRange.end < 0) {
messages.push({ id: 'time_range_before_epoch' });
}
// check for minimum time range (25 buckets or 2 hours, whichever is longer)
const interval = parseInterval(job.analysis_config.bucket_span);
if (interval === null) {
messages.push({ id: 'bucket_span_invalid' });
} else {
const bucketSpan: number = interval.asMilliseconds();
const minTimeSpanBasedOnBucketSpan = bucketSpan * BUCKET_SPAN_COMPARE_FACTOR;
const timeSpan = timeRange.end - timeRange.start;
const minRequiredTimeSpan = Math.max(MIN_TIME_SPAN_MS, minTimeSpanBasedOnBucketSpan);
if (minRequiredTimeSpan > timeSpan) {
messages.push({
id: 'time_range_short',
minTimeSpanReadable: MIN_TIME_SPAN_READABLE,
bucketSpanCompareFactor: BUCKET_SPAN_COMPARE_FACTOR,
});
}
}
if (messages.length === 0) {
messages.push({ id: 'success_time_range' });
}
return messages;
}

View file

@ -0,0 +1,440 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import expect from '@kbn/expect';
import { FtrProviderContext } from '../../../ftr_provider_context';
interface Detector {
identifier: string;
function: string;
field?: string;
byField?: string;
overField?: string;
partitionField?: string;
excludeFrequent?: string;
description?: string;
}
interface DatafeedConfig {
queryDelay?: string;
frequency?: string;
scrollSize?: string;
}
interface PickFieldsConfig {
detectors: Detector[];
influencers: string[];
bucketSpan: string;
memoryLimit: string;
summaryCountField?: string;
}
// type guards
// Detector
const isDetectorWithField = (arg: any): arg is Required<Pick<Detector, 'field'>> => {
return arg.hasOwnProperty('field');
};
const isDetectorWithByField = (arg: any): arg is Required<Pick<Detector, 'byField'>> => {
return arg.hasOwnProperty('byField');
};
const isDetectorWithOverField = (arg: any): arg is Required<Pick<Detector, 'overField'>> => {
return arg.hasOwnProperty('overField');
};
const isDetectorWithPartitionField = (
arg: any
): arg is Required<Pick<Detector, 'partitionField'>> => {
return arg.hasOwnProperty('partitionField');
};
const isDetectorWithExcludeFrequent = (
arg: any
): arg is Required<Pick<Detector, 'excludeFrequent'>> => {
return arg.hasOwnProperty('excludeFrequent');
};
const isDetectorWithDescription = (arg: any): arg is Required<Pick<Detector, 'description'>> => {
return arg.hasOwnProperty('description');
};
// DatafeedConfig
const isDatafeedConfigWithQueryDelay = (
arg: any
): arg is Required<Pick<DatafeedConfig, 'queryDelay'>> => {
return arg.hasOwnProperty('queryDelay');
};
const isDatafeedConfigWithFrequency = (
arg: any
): arg is Required<Pick<DatafeedConfig, 'frequency'>> => {
return arg.hasOwnProperty('frequency');
};
const isDatafeedConfigWithScrollSize = (
arg: any
): arg is Required<Pick<DatafeedConfig, 'scrollSize'>> => {
return arg.hasOwnProperty('scrollSize');
};
// PickFieldsConfig
const isPickFieldsConfigWithSummaryCountField = (
arg: any
): arg is Required<Pick<PickFieldsConfig, 'summaryCountField'>> => {
return arg.hasOwnProperty('summaryCountField');
};
// eslint-disable-next-line import/no-default-export
export default function({ getService }: FtrProviderContext) {
const esArchiver = getService('esArchiver');
const ml = getService('ml');
const defaultValues = {
datafeedQuery: `{
"bool": {
"must": [
{
"match_all": {}
}
]
}
}`,
queryDelay: '60s',
frequency: '450s',
scrollSize: '1000',
};
const testDataList = [
{
suiteTitle: 'with count detector and model plot disabled',
jobSource: 'event_rate_gen_trend_nanos',
jobId: `event_rate_nanos_count_1_${Date.now()}`,
jobDescription:
'Create advanced job based on the event rate dataset with a date_nanos time field, 30m bucketspan and count',
jobGroups: ['automated', 'event-rate', 'date-nanos'],
pickFieldsConfig: {
detectors: [
{
identifier: 'count',
function: 'count',
description: 'event rate',
} as Detector,
],
summaryCountField: 'count',
influencers: [],
bucketSpan: '30m',
memoryLimit: '10mb',
} as PickFieldsConfig,
datafeedConfig: {} as DatafeedConfig,
expected: {
wizard: {
timeField: '@timestamp',
},
row: {
recordCount: '105,120',
memoryStatus: 'ok',
jobState: 'closed',
datafeedState: 'stopped',
latestTimestamp: '2016-01-01 00:00:00',
},
counts: {
processed_record_count: '105,120',
processed_field_count: '105,120',
input_bytes: '4.2 MB',
input_field_count: '105,120',
invalid_date_count: '0',
missing_field_count: '0',
out_of_order_timestamp_count: '0',
empty_bucket_count: '0',
sparse_bucket_count: '0',
bucket_count: '17,520',
earliest_record_timestamp: '2015-01-01 00:10:00',
latest_record_timestamp: '2016-01-01 00:00:00',
input_record_count: '105,120',
latest_bucket_timestamp: '2016-01-01 00:00:00',
},
modelSizeStats: {
result_type: 'model_size_stats',
model_bytes_exceeded: '0.0 B',
model_bytes_memory_limit: '10.0 MB',
total_by_field_count: '3',
total_over_field_count: '0',
total_partition_field_count: '2',
bucket_allocation_failures_count: '0',
memory_status: 'ok',
timestamp: '2015-12-31 23:30:00',
},
},
},
];
describe('job on data set with date_nanos time field', function() {
this.tags(['smoke', 'mlqa']);
before(async () => {
await esArchiver.load('ml/event_rate_nanos');
await ml.securityUI.loginAsMlPowerUser();
});
after(async () => {
await esArchiver.unload('ml/event_rate_nanos');
await ml.api.cleanMlIndices();
});
for (const testData of testDataList) {
describe(`${testData.suiteTitle}`, function() {
it('job creation loads the job management page', async () => {
await ml.navigation.navigateToMl();
await ml.navigation.navigateToJobManagement();
});
it('job creation loads the new job source selection page', async () => {
await ml.jobManagement.navigateToNewJobSourceSelection();
});
it('job creation loads the job type selection page', async () => {
await ml.jobSourceSelection.selectSourceForAnomalyDetectionJob(testData.jobSource);
});
it('job creation loads the advanced job wizard page', async () => {
await ml.jobTypeSelection.selectAdvancedJob();
});
it('job creation displays the configure datafeed step', async () => {
await ml.jobWizardCommon.assertConfigureDatafeedSectionExists();
});
it('job creation pre-fills the datafeed query editor', async () => {
await ml.jobWizardAdvanced.assertDatafeedQueryEditorExists();
await ml.jobWizardAdvanced.assertDatafeedQueryEditorValue(defaultValues.datafeedQuery);
});
it('job creation inputs the query delay', async () => {
await ml.jobWizardAdvanced.assertQueryDelayInputExists();
await ml.jobWizardAdvanced.assertQueryDelayValue(defaultValues.queryDelay);
if (isDatafeedConfigWithQueryDelay(testData.datafeedConfig)) {
await ml.jobWizardAdvanced.setQueryDelay(testData.datafeedConfig.queryDelay);
}
});
it('job creation inputs the frequency', async () => {
await ml.jobWizardAdvanced.assertFrequencyInputExists();
await ml.jobWizardAdvanced.assertFrequencyValue(defaultValues.frequency);
if (isDatafeedConfigWithFrequency(testData.datafeedConfig)) {
await ml.jobWizardAdvanced.setFrequency(testData.datafeedConfig.frequency);
}
});
it('job creation inputs the scroll size', async () => {
await ml.jobWizardAdvanced.assertScrollSizeInputExists();
await ml.jobWizardAdvanced.assertScrollSizeValue(defaultValues.scrollSize);
if (isDatafeedConfigWithScrollSize(testData.datafeedConfig)) {
await ml.jobWizardAdvanced.setScrollSize(testData.datafeedConfig.scrollSize);
}
});
it('job creation pre-fills the time field', async () => {
await ml.jobWizardAdvanced.assertTimeFieldInputExists();
await ml.jobWizardAdvanced.assertTimeFieldSelection([testData.expected.wizard.timeField]);
});
it('job creation displays the pick fields step', async () => {
await ml.jobWizardCommon.advanceToPickFieldsSection();
});
it('job creation selects the summary count field', async () => {
await ml.jobWizardAdvanced.assertSummaryCountFieldInputExists();
if (isPickFieldsConfigWithSummaryCountField(testData.pickFieldsConfig)) {
await ml.jobWizardAdvanced.selectSummaryCountField(
testData.pickFieldsConfig.summaryCountField
);
} else {
await ml.jobWizardAdvanced.assertSummaryCountFieldSelection([]);
}
});
it('job creation adds detectors', async () => {
for (const detector of testData.pickFieldsConfig.detectors) {
await ml.jobWizardAdvanced.openCreateDetectorModal();
await ml.jobWizardAdvanced.assertDetectorFunctionInputExists();
await ml.jobWizardAdvanced.assertDetectorFunctionSelection([]);
await ml.jobWizardAdvanced.assertDetectorFieldInputExists();
await ml.jobWizardAdvanced.assertDetectorFieldSelection([]);
await ml.jobWizardAdvanced.assertDetectorByFieldInputExists();
await ml.jobWizardAdvanced.assertDetectorByFieldSelection([]);
await ml.jobWizardAdvanced.assertDetectorOverFieldInputExists();
await ml.jobWizardAdvanced.assertDetectorOverFieldSelection([]);
await ml.jobWizardAdvanced.assertDetectorPartitionFieldInputExists();
await ml.jobWizardAdvanced.assertDetectorPartitionFieldSelection([]);
await ml.jobWizardAdvanced.assertDetectorExcludeFrequentInputExists();
await ml.jobWizardAdvanced.assertDetectorExcludeFrequentSelection([]);
await ml.jobWizardAdvanced.assertDetectorDescriptionInputExists();
await ml.jobWizardAdvanced.assertDetectorDescriptionValue('');
await ml.jobWizardAdvanced.selectDetectorFunction(detector.function);
if (isDetectorWithField(detector)) {
await ml.jobWizardAdvanced.selectDetectorField(detector.field);
}
if (isDetectorWithByField(detector)) {
await ml.jobWizardAdvanced.selectDetectorByField(detector.byField);
}
if (isDetectorWithOverField(detector)) {
await ml.jobWizardAdvanced.selectDetectorOverField(detector.overField);
}
if (isDetectorWithPartitionField(detector)) {
await ml.jobWizardAdvanced.selectDetectorPartitionField(detector.partitionField);
}
if (isDetectorWithExcludeFrequent(detector)) {
await ml.jobWizardAdvanced.selectDetectorExcludeFrequent(detector.excludeFrequent);
}
if (isDetectorWithDescription(detector)) {
await ml.jobWizardAdvanced.setDetectorDescription(detector.description);
}
await ml.jobWizardAdvanced.confirmAddDetectorModal();
}
});
it('job creation displays detector entries', async () => {
for (const [index, detector] of testData.pickFieldsConfig.detectors.entries()) {
await ml.jobWizardAdvanced.assertDetectorEntryExists(
index,
detector.identifier,
isDetectorWithDescription(detector) ? detector.description : undefined
);
}
});
it('job creation inputs the bucket span', async () => {
await ml.jobWizardCommon.assertBucketSpanInputExists();
await ml.jobWizardCommon.setBucketSpan(testData.pickFieldsConfig.bucketSpan);
});
it('job creation inputs influencers', async () => {
await ml.jobWizardCommon.assertInfluencerInputExists();
await ml.jobWizardCommon.assertInfluencerSelection([]);
for (const influencer of testData.pickFieldsConfig.influencers) {
await ml.jobWizardCommon.addInfluencer(influencer);
}
});
it('job creation inputs the model memory limit', async () => {
await ml.jobWizardCommon.assertModelMemoryLimitInputExists({
withAdvancedSection: false,
});
await ml.jobWizardCommon.setModelMemoryLimit(testData.pickFieldsConfig.memoryLimit, {
withAdvancedSection: false,
});
});
it('job creation displays the job details step', async () => {
await ml.jobWizardCommon.advanceToJobDetailsSection();
});
it('job creation inputs the job id', async () => {
await ml.jobWizardCommon.assertJobIdInputExists();
await ml.jobWizardCommon.setJobId(testData.jobId);
});
it('job creation inputs the job description', async () => {
await ml.jobWizardCommon.assertJobDescriptionInputExists();
await ml.jobWizardCommon.setJobDescription(testData.jobDescription);
});
it('job creation inputs job groups', async () => {
await ml.jobWizardCommon.assertJobGroupInputExists();
for (const jobGroup of testData.jobGroups) {
await ml.jobWizardCommon.addJobGroup(jobGroup);
}
await ml.jobWizardCommon.assertJobGroupSelection(testData.jobGroups);
});
it('job creation opens the additional settings section', async () => {
await ml.jobWizardCommon.ensureAdditionalSettingsSectionOpen();
});
it('job creation displays the model plot switch', async () => {
await ml.jobWizardCommon.assertModelPlotSwitchExists({ withAdvancedSection: false });
});
it('job creation enables the dedicated index switch', async () => {
await ml.jobWizardCommon.assertDedicatedIndexSwitchExists({ withAdvancedSection: false });
await ml.jobWizardCommon.activateDedicatedIndexSwitch({ withAdvancedSection: false });
});
it('job creation displays the validation step', async () => {
await ml.jobWizardCommon.advanceToValidationSection();
});
it('job creation displays the summary step', async () => {
await ml.jobWizardCommon.advanceToSummarySection();
});
it('job creation creates the job and finishes processing', async () => {
await ml.jobWizardCommon.assertCreateJobButtonExists();
await ml.jobWizardAdvanced.createJob();
await ml.jobManagement.assertStartDatafeedModalExists();
await ml.jobManagement.confirmStartDatafeedModal();
await ml.jobManagement.waitForJobCompletion(testData.jobId);
});
it('job creation displays the created job in the job list', async () => {
await ml.jobTable.refreshJobList();
await ml.jobTable.filterWithSearchString(testData.jobId);
const rows = await ml.jobTable.parseJobTable();
expect(rows.filter(row => row.id === testData.jobId)).to.have.length(1);
});
it('job creation displays details for the created job in the job list', async () => {
await ml.jobTable.assertJobRowFields(testData.jobId, {
id: testData.jobId,
description: testData.jobDescription,
jobGroups: [...new Set(testData.jobGroups)].sort(),
recordCount: testData.expected.row.recordCount,
memoryStatus: testData.expected.row.memoryStatus,
jobState: testData.expected.row.jobState,
datafeedState: testData.expected.row.datafeedState,
latestTimestamp: testData.expected.row.latestTimestamp,
});
await ml.jobTable.assertJobRowDetailsCounts(
testData.jobId,
{
job_id: testData.jobId,
processed_record_count: testData.expected.counts.processed_record_count,
processed_field_count: testData.expected.counts.processed_field_count,
input_bytes: testData.expected.counts.input_bytes,
input_field_count: testData.expected.counts.input_field_count,
invalid_date_count: testData.expected.counts.invalid_date_count,
missing_field_count: testData.expected.counts.missing_field_count,
out_of_order_timestamp_count: testData.expected.counts.out_of_order_timestamp_count,
empty_bucket_count: testData.expected.counts.empty_bucket_count,
sparse_bucket_count: testData.expected.counts.sparse_bucket_count,
bucket_count: testData.expected.counts.bucket_count,
earliest_record_timestamp: testData.expected.counts.earliest_record_timestamp,
latest_record_timestamp: testData.expected.counts.latest_record_timestamp,
input_record_count: testData.expected.counts.input_record_count,
latest_bucket_timestamp: testData.expected.counts.latest_bucket_timestamp,
},
{
job_id: testData.jobId,
result_type: testData.expected.modelSizeStats.result_type,
model_bytes_exceeded: testData.expected.modelSizeStats.model_bytes_exceeded,
model_bytes_memory_limit: testData.expected.modelSizeStats.model_bytes_memory_limit,
total_by_field_count: testData.expected.modelSizeStats.total_by_field_count,
total_over_field_count: testData.expected.modelSizeStats.total_over_field_count,
total_partition_field_count:
testData.expected.modelSizeStats.total_partition_field_count,
bucket_allocation_failures_count:
testData.expected.modelSizeStats.bucket_allocation_failures_count,
memory_status: testData.expected.modelSizeStats.memory_status,
timestamp: testData.expected.modelSizeStats.timestamp,
}
);
});
it('job creation has detector results', async () => {
for (let i = 0; i < testData.pickFieldsConfig.detectors.length; i++) {
await ml.api.assertDetectorResultsExist(testData.jobId, i);
}
});
});
}
});
}

View file

@ -17,5 +17,6 @@ export default function({ loadTestFile }: FtrProviderContext) {
loadTestFile(require.resolve('./single_metric_viewer'));
loadTestFile(require.resolve('./anomaly_explorer'));
loadTestFile(require.resolve('./categorization_job'));
loadTestFile(require.resolve('./date_nanos_job'));
});
}

File diff suppressed because it is too large Load diff