[ML] Datafeed preview based job validation check (#109080)

* [ML] Datafeed preview based job validation check

* updating warning text

* fix tests

* adding jest test

* updating tests

* fixing translation ids

* fixing more tests

* changes based on review

* disabled validation step next button when validation fails

* disabling nano job test

* adding test skip comment

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
James Gowdy 2021-09-03 15:36:03 +01:00 committed by GitHub
parent 66cb058fa7
commit a18cc31924
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 209 additions and 40 deletions

View file

@ -78,4 +78,7 @@ export const nonBasicIssuesMessages = [
{
id: 'missing_summary_count_field_name',
},
{
id: 'datafeed_preview_failed',
},
];

View file

@ -173,6 +173,12 @@ describe('Constants: Messages parseMessages()', () => {
text:
'A job configured with a datafeed with aggregations must set summary_count_field_name; use doc_count or suitable alternative.',
},
{
id: 'datafeed_preview_failed',
status: 'error',
text:
'The datafeed preview failed. This may be due to an error in the job or datafeed configurations.',
},
]);
});
});

View file

@ -626,6 +626,30 @@ export const getMessages = once((docLinks?: DocLinksStart) => {
'the UNIX epoch beginning. Timestamps before 01/01/1970 00:00:00 (UTC) are not supported for machine learning jobs.',
}),
},
datafeed_preview_no_documents: {
status: VALIDATION_STATUS.WARNING,
heading: i18n.translate(
'xpack.ml.models.jobValidation.messages.datafeedPreviewNoDocumentsHeading',
{
defaultMessage: 'Datafeed preview',
}
),
text: i18n.translate(
'xpack.ml.models.jobValidation.messages.datafeedPreviewNoDocumentsMessage',
{
defaultMessage:
'Running the datafeed preview over the current job configuration produces no results. ' +
'If the index contains no documents this warning can be ignored, otherwise the job may be misconfigured.',
}
),
},
datafeed_preview_failed: {
status: VALIDATION_STATUS.ERROR,
text: i18n.translate('xpack.ml.models.jobValidation.messages.datafeedPreviewFailedMessage', {
defaultMessage:
'The datafeed preview failed. This may be due to an error in the job or datafeed configurations.',
}),
},
};
});

View file

@ -5,7 +5,7 @@
* 2.0.
*/
import React, { Fragment, FC, useContext, useEffect } from 'react';
import React, { Fragment, FC, useContext, useEffect, useState } from 'react';
import { WizardNav } from '../wizard_nav';
import { WIZARD_STEPS, StepProps } from '../step_types';
import { JobCreatorContext } from '../job_creator_context';
@ -22,6 +22,7 @@ const idFilterList = [
export const ValidationStep: FC<StepProps> = ({ setCurrentStep, isCurrentStep }) => {
const { jobCreator, jobCreatorUpdate, jobValidator } = useContext(JobCreatorContext);
const [nextActive, setNextActive] = useState(false);
if (jobCreator.type === JOB_TYPE.ADVANCED) {
// for advanced jobs, ignore time range warning as the
@ -52,6 +53,7 @@ export const ValidationStep: FC<StepProps> = ({ setCurrentStep, isCurrentStep })
// keep a record of the advanced validation in the jobValidator
function setIsValid(valid: boolean) {
jobValidator.advancedValid = valid;
setNextActive(valid);
}
return (
@ -69,7 +71,7 @@ export const ValidationStep: FC<StepProps> = ({ setCurrentStep, isCurrentStep })
<WizardNav
previous={() => setCurrentStep(WIZARD_STEPS.JOB_DETAILS)}
next={() => setCurrentStep(WIZARD_STEPS.SUMMARY)}
nextActive={true}
nextActive={nextActive}
/>
</Fragment>
)}

View file

@ -10,6 +10,7 @@ import { IScopedClusterClient } from 'kibana/server';
import { validateJob, ValidateJobPayload } from './job_validation';
import { ES_CLIENT_TOTAL_HITS_RELATION } from '../../../common/types/es_client';
import type { MlClient } from '../../lib/ml_client';
import type { AuthorizationHeader } from '../../lib/request_authorization';
const callAs = {
fieldCaps: () => Promise.resolve({ body: { fields: [] } }),
@ -19,6 +20,8 @@ const callAs = {
}),
};
const authHeader: AuthorizationHeader = {};
const mlClusterClient = ({
asCurrentUser: callAs,
asInternalUser: callAs,
@ -34,18 +37,19 @@ const mlClient = ({
},
},
}),
previewDatafeed: () => Promise.resolve({ body: [{}] }),
} as unknown) as MlClient;
// Note: The tests cast `payload` as any
// so we can simulate possible runtime payloads
// that don't satisfy the TypeScript specs.
describe('ML - validateJob', () => {
it('basic validation messages', () => {
it('basic validation messages', async () => {
const payload = ({
job: { analysis_config: { detectors: [] } },
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
@ -58,14 +62,14 @@ describe('ML - validateJob', () => {
});
const jobIdTests = (testIds: string[], messageId: string) => {
const promises = testIds.map((id) => {
const promises = testIds.map(async (id) => {
const payload = ({
job: {
analysis_config: { detectors: [] },
job_id: id,
},
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).catch(() => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).catch(() => {
new Error('Promise should not fail for jobIdTests.');
});
});
@ -86,7 +90,7 @@ describe('ML - validateJob', () => {
job: { analysis_config: { detectors: [] }, groups: testIds },
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids.includes(messageId)).toBe(true);
});
@ -126,7 +130,7 @@ describe('ML - validateJob', () => {
const payload = ({
job: { analysis_config: { bucket_span: format, detectors: [] } },
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).catch(() => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).catch(() => {
new Error('Promise should not fail for bucketSpanFormatTests.');
});
});
@ -150,7 +154,7 @@ describe('ML - validateJob', () => {
return bucketSpanFormatTests(validBucketSpanFormats, 'bucket_span_valid');
});
it('at least one detector function is empty', () => {
it('at least one detector function is empty', async () => {
const payload = ({
job: { analysis_config: { detectors: [] as Array<{ function?: string }> } },
} as unknown) as ValidateJobPayload;
@ -165,13 +169,13 @@ describe('ML - validateJob', () => {
function: undefined,
});
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids.includes('detectors_function_empty')).toBe(true);
});
});
it('detector function is not empty', () => {
it('detector function is not empty', async () => {
const payload = ({
job: { analysis_config: { detectors: [] as Array<{ function?: string }> } },
} as unknown) as ValidateJobPayload;
@ -179,37 +183,37 @@ describe('ML - validateJob', () => {
function: 'count',
});
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids.includes('detectors_function_not_empty')).toBe(true);
});
});
it('invalid index fields', () => {
it('invalid index fields', async () => {
const payload = ({
job: { analysis_config: { detectors: [] } },
fields: {},
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids.includes('index_fields_invalid')).toBe(true);
});
});
it('valid index fields', () => {
it('valid index fields', async () => {
const payload = ({
job: { analysis_config: { detectors: [] } },
fields: { testField: {} },
} as unknown) as ValidateJobPayload;
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids.includes('index_fields_valid')).toBe(true);
});
});
const getBasicPayload = (): any => ({
const getBasicPayload = (): ValidateJobPayload => ({
job: {
job_id: 'test',
analysis_config: {
@ -231,7 +235,7 @@ describe('ML - validateJob', () => {
const payload = getBasicPayload() as any;
delete payload.job.analysis_config.influencers;
validateJob(mlClusterClient, mlClient, payload).then(
validateJob(mlClusterClient, mlClient, payload, authHeader).then(
() =>
done(
new Error('Promise should not resolve for this test when influencers is not an Array.')
@ -240,10 +244,10 @@ describe('ML - validateJob', () => {
);
});
it('detect duplicate detectors', () => {
it('detect duplicate detectors', async () => {
const payload = getBasicPayload() as any;
payload.job.analysis_config.detectors.push({ function: 'count' });
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -256,7 +260,7 @@ describe('ML - validateJob', () => {
});
});
it('dedupe duplicate messages', () => {
it('dedupe duplicate messages', async () => {
const payload = getBasicPayload() as any;
// in this test setup, the following configuration passes
// the duplicate detectors check, but would return the same
@ -266,7 +270,7 @@ describe('ML - validateJob', () => {
{ function: 'count', by_field_name: 'airline' },
{ function: 'count', partition_field_name: 'airline' },
];
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -278,9 +282,9 @@ describe('ML - validateJob', () => {
});
});
it('basic validation passes, extended checks return some messages', () => {
it('basic validation passes, extended checks return some messages', async () => {
const payload = getBasicPayload();
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -291,8 +295,8 @@ describe('ML - validateJob', () => {
});
});
it('categorization job using mlcategory passes aggregatable field check', () => {
const payload: any = {
it('categorization job using mlcategory passes aggregatable field check', async () => {
const payload: ValidateJobPayload = {
job: {
job_id: 'categorization_test',
analysis_config: {
@ -312,7 +316,7 @@ describe('ML - validateJob', () => {
fields: { testField: {} },
};
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -325,8 +329,8 @@ describe('ML - validateJob', () => {
});
});
it('non-existent field reported as non aggregatable', () => {
const payload: any = {
it('non-existent field reported as non aggregatable', async () => {
const payload: ValidateJobPayload = {
job: {
job_id: 'categorization_test',
analysis_config: {
@ -345,7 +349,7 @@ describe('ML - validateJob', () => {
fields: { testField: {} },
};
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -357,8 +361,8 @@ describe('ML - validateJob', () => {
});
});
it('script field not reported as non aggregatable', () => {
const payload: any = {
it('script field not reported as non aggregatable', async () => {
const payload: ValidateJobPayload = {
job: {
job_id: 'categorization_test',
analysis_config: {
@ -387,7 +391,7 @@ describe('ML - validateJob', () => {
fields: { testField: {} },
};
return validateJob(mlClusterClient, mlClient, payload).then((messages) => {
return validateJob(mlClusterClient, mlClient, payload, authHeader).then((messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
@ -399,4 +403,88 @@ describe('ML - validateJob', () => {
]);
});
});
it('datafeed preview contains no docs', async () => {
const payload: ValidateJobPayload = {
job: {
job_id: 'categorization_test',
analysis_config: {
bucket_span: '15m',
detectors: [
{
function: 'count',
partition_field_name: 'custom_script_field',
},
],
influencers: [''],
},
data_description: { time_field: '@timestamp' },
datafeed_config: {
indices: [],
},
},
fields: { testField: {} },
};
const mlClientEmptyDatafeedPreview = ({
...mlClient,
previewDatafeed: () => Promise.resolve({ body: [] }),
} as unknown) as MlClient;
return validateJob(mlClusterClient, mlClientEmptyDatafeedPreview, payload, authHeader).then(
(messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
'detectors_function_not_empty',
'index_fields_valid',
'field_not_aggregatable',
'time_field_invalid',
'datafeed_preview_no_documents',
]);
}
);
});
it('datafeed preview failed', async () => {
const payload: ValidateJobPayload = {
job: {
job_id: 'categorization_test',
analysis_config: {
bucket_span: '15m',
detectors: [
{
function: 'count',
partition_field_name: 'custom_script_field',
},
],
influencers: [''],
},
data_description: { time_field: '@timestamp' },
datafeed_config: {
indices: [],
},
},
fields: { testField: {} },
};
const mlClientEmptyDatafeedPreview = ({
...mlClient,
previewDatafeed: () => Promise.reject({}),
} as unknown) as MlClient;
return validateJob(mlClusterClient, mlClientEmptyDatafeedPreview, payload, authHeader).then(
(messages) => {
const ids = messages.map((m) => m.id);
expect(ids).toStrictEqual([
'job_id_valid',
'detectors_function_not_empty',
'index_fields_valid',
'field_not_aggregatable',
'time_field_invalid',
'datafeed_preview_failed',
]);
}
);
});
});

View file

@ -6,7 +6,7 @@
*/
import Boom from '@hapi/boom';
import { IScopedClusterClient } from 'kibana/server';
import type { IScopedClusterClient } from 'kibana/server';
import { TypeOf } from '@kbn/config-schema';
import { fieldsServiceProvider } from '../fields_service';
import { getMessages, MessageId, JobValidationMessage } from '../../../common/constants/messages';
@ -17,12 +17,14 @@ import { basicJobValidation, uniqWithIsEqual } from '../../../common/util/job_ut
import { validateBucketSpan } from './validate_bucket_span';
import { validateCardinality } from './validate_cardinality';
import { validateInfluencers } from './validate_influencers';
import { validateDatafeedPreview } from './validate_datafeed_preview';
import { validateModelMemoryLimit } from './validate_model_memory_limit';
import { validateTimeRange, isValidTimeField } from './validate_time_range';
import { validateJobSchema } from '../../routes/schemas/job_validation_schema';
import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import type { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import type { MlClient } from '../../lib/ml_client';
import { getDatafeedAggregations } from '../../../common/util/datafeed_utils';
import type { AuthorizationHeader } from '../../lib/request_authorization';
export type ValidateJobPayload = TypeOf<typeof validateJobSchema>;
@ -34,6 +36,7 @@ export async function validateJob(
client: IScopedClusterClient,
mlClient: MlClient,
payload: ValidateJobPayload,
authHeader: AuthorizationHeader,
isSecurityDisabled?: boolean
) {
const messages = getMessages();
@ -107,6 +110,8 @@ export async function validateJob(
if (datafeedAggregations !== undefined && !job.analysis_config?.summary_count_field_name) {
validationMessages.push({ id: 'missing_summary_count_field_name' });
}
validationMessages.push(...(await validateDatafeedPreview(mlClient, authHeader, job)));
} else {
validationMessages = basicValidation.messages;
validationMessages.push({ id: 'skipped_extended_tests' });

View file

@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { MlClient } from '../../lib/ml_client';
import type { AuthorizationHeader } from '../../lib/request_authorization';
import type { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
import type { JobValidationMessage } from '../../../common/constants/messages';
export async function validateDatafeedPreview(
mlClient: MlClient,
authHeader: AuthorizationHeader,
job: CombinedJob
): Promise<JobValidationMessage[]> {
const { datafeed_config: datafeed, ...tempJob } = job;
try {
const { body } = ((await mlClient.previewDatafeed(
{
body: {
job_config: tempJob,
datafeed_config: datafeed,
},
},
authHeader
// previewDatafeed response type is incorrect
)) as unknown) as { body: unknown[] };
if (Array.isArray(body) === false || body.length === 0) {
return [{ id: 'datafeed_preview_no_documents' }];
}
return [];
} catch (error) {
return [{ id: 'datafeed_preview_failed' }];
}
}

View file

@ -8,9 +8,9 @@
import Boom from '@hapi/boom';
import { IScopedClusterClient } from 'kibana/server';
import { TypeOf } from '@kbn/config-schema';
import { AnalysisConfig, Datafeed } from '../../common/types/anomaly_detection_jobs';
import type { AnalysisConfig, Datafeed } from '../../common/types/anomaly_detection_jobs';
import { wrapError } from '../client/error_wrapper';
import { RouteInitialization } from '../types';
import type { RouteInitialization } from '../types';
import {
estimateBucketSpanSchema,
modelMemoryLimitSchema,
@ -20,6 +20,7 @@ import {
import { estimateBucketSpanFactory } from '../models/bucket_span_estimator';
import { calculateModelMemoryLimitProvider } from '../models/calculate_model_memory_limit';
import { validateJob, validateCardinality } from '../models/job_validation';
import { getAuthorizationHeader } from '../lib/request_authorization';
import type { MlClient } from '../lib/ml_client';
type CalculateModelMemoryLimitPayload = TypeOf<typeof modelMemoryLimitSchema>;
@ -192,6 +193,7 @@ export function jobValidationRoutes({ router, mlLicense, routeGuard }: RouteInit
client,
mlClient,
request.body,
getAuthorizationHeader(request),
mlLicense.isSecurityEnabled() === false
);

View file

@ -52,7 +52,7 @@ export const datafeedConfigSchema = schema.object({
runtime_mappings: schema.maybe(schema.any()),
scroll_size: schema.maybe(schema.number()),
delayed_data_check_config: schema.maybe(schema.any()),
indices_options: indicesOptionsSchema,
indices_options: schema.maybe(indicesOptionsSchema),
});
export const datafeedIdSchema = schema.object({ datafeedId: schema.string() });

View file

@ -184,7 +184,7 @@ export default ({ getService }: FtrProviderContext) => {
expect(body.length).to.eql(
expectedResponse.length,
`Response body should have ${expectedResponse.length} entries (got ${body})`
`Response body should have ${expectedResponse.length} entries (got ${JSON.stringify(body)})`
);
for (const entry of expectedResponse) {
const responseEntry = body.find((obj: any) => obj.id === entry.id);

View file

@ -114,7 +114,8 @@ export default function ({ getService }: FtrProviderContext) {
},
];
describe('job on data set with date_nanos time field', function () {
// test skipped until https://github.com/elastic/elasticsearch/pull/77109 is fixed
describe.skip('job on data set with date_nanos time field', function () {
this.tags(['mlqa']);
before(async () => {
await esArchiver.loadIfNeeded('x-pack/test/functional/es_archives/ml/event_rate_nanos');