[ML] Job validation uses fieldCaps to check aggregatable fields to avoid triggering Elasticsearch errors. (#21087)

While the output in the UI was fine, certain job configurations containing non-aggregatable fields could trigger errors on the Elasticsearch side.
This PR fixes it by adding an additional query for fieldCaps to check first which fields are actually aggregatable.
This commit is contained in:
Walter Rafelsberger 2018-07-23 20:35:10 +02:00 committed by GitHub
parent 9e71f5cb5c
commit 3dc5b30f7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 25 deletions

View file

@ -0,0 +1,18 @@
{
"fields": {
"_source": {
"_source": {
"type": "_source",
"searchable": false,
"aggregatable": false
}
},
"airline": {
"keyword": {
"type": "keyword",
"searchable": false,
"aggregatable": true
}
}
}
}

View file

@ -11,15 +11,22 @@ import expect from 'expect.js';
import { validateCardinality } from '../validate_cardinality';
import mockFareQuoteCardinality from './mock_farequote_cardinality';
import mockFieldCaps from './mock_field_caps';
const mockResponses = {
search: mockFareQuoteCardinality,
fieldCaps: mockFieldCaps
};
// mock callWithRequestFactory
const callWithRequestFactory = (mockSearchResponse, fail = false) => {
return () => {
const callWithRequestFactory = (responses, fail = false) => {
return (requestName) => {
return new Promise((resolve, reject) => {
const response = responses[requestName];
if (fail) {
reject(mockSearchResponse);
reject(response);
} else {
resolve(mockSearchResponse);
resolve(response);
}
});
};
@ -28,21 +35,21 @@ const callWithRequestFactory = (mockSearchResponse, fail = false) => {
describe('ML - validateCardinality', () => {
it('called without arguments', (done) => {
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality)).then(
validateCardinality(callWithRequestFactory(mockResponses)).then(
() => done(new Error('Promise should not resolve for this test without job argument.')),
() => done()
);
});
it('called with non-valid job argument #1, missing analysis_config', (done) => {
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), {}).then(
validateCardinality(callWithRequestFactory(mockResponses), {}).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
});
it('called with non-valid job argument #2, missing datafeed_config', (done) => {
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), { analysis_config: {} }).then(
validateCardinality(callWithRequestFactory(mockResponses), { analysis_config: {} }).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
@ -50,7 +57,7 @@ describe('ML - validateCardinality', () => {
it('called with non-valid job argument #3, missing datafeed_config.indices', (done) => {
const job = { analysis_config: {}, datafeed_config: {} };
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then(
validateCardinality(callWithRequestFactory(mockResponses), job).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
@ -58,7 +65,7 @@ describe('ML - validateCardinality', () => {
it('called with non-valid job argument #4, missing data_description', (done) => {
const job = { analysis_config: {}, datafeed_config: { indices: [] } };
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then(
validateCardinality(callWithRequestFactory(mockResponses), job).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
@ -66,7 +73,7 @@ describe('ML - validateCardinality', () => {
it('called with non-valid job argument #5, missing data_description.time_field', (done) => {
const job = { analysis_config: {}, data_description: {}, datafeed_config: { indices: [] } };
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then(
validateCardinality(callWithRequestFactory(mockResponses), job).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
@ -76,7 +83,7 @@ describe('ML - validateCardinality', () => {
const job = {
analysis_config: {}, datafeed_config: { indices: [] }, data_description: { time_field: '@timestamp' }
};
validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then(
validateCardinality(callWithRequestFactory(mockResponses), job).then(
() => done(new Error('Promise should not resolve for this test without valid job argument.')),
() => done()
);
@ -91,7 +98,7 @@ describe('ML - validateCardinality', () => {
}
};
return validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then(
return validateCardinality(callWithRequestFactory(mockResponses), job).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql([]);
@ -117,8 +124,8 @@ describe('ML - validateCardinality', () => {
const testCardinality = (fieldName, cardinality, test) => {
const job = getJobConfig(fieldName);
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job, {}).then(
(messages) => {
const ids = messages.map(m => m.id);
@ -127,6 +134,27 @@ describe('ML - validateCardinality', () => {
);
};
it(`field '_source' not aggregatable`, () => {
const job = getJobConfig('partition_field_name');
job.analysis_config.detectors[0].partition_field_name = '_source';
return validateCardinality(callWithRequestFactory(mockResponses), job).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['field_not_aggregatable']);
}
);
});
it(`field 'airline' aggregatable`, () => {
const job = getJobConfig('partition_field_name');
return validateCardinality(callWithRequestFactory(mockResponses), job).then(
(messages) => {
const ids = messages.map(m => m.id);
expect(ids).to.eql(['success_cardinality']);
}
);
});
it('field not aggregatable', () => {
const job = getJobConfig('partition_field_name');
return validateCardinality(callWithRequestFactory({}), job).then(
@ -198,8 +226,8 @@ describe('ML - validateCardinality', () => {
it(`disabled model_plot, over field cardinality of ${cardinality} doesn't trigger a warning`, () => {
const job = getJobConfig('over_field_name');
job.model_plot_config = { enabled: false };
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job).then(
(messages) => {
const ids = messages.map(m => m.id);
@ -211,8 +239,8 @@ describe('ML - validateCardinality', () => {
it(`enabled model_plot, over field cardinality of ${cardinality} triggers a model plot warning`, () => {
const job = getJobConfig('over_field_name');
job.model_plot_config = { enabled: true };
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job).then(
(messages) => {
const ids = messages.map(m => m.id);
@ -224,8 +252,8 @@ describe('ML - validateCardinality', () => {
it(`disabled model_plot, by field cardinality of ${cardinality} triggers a field cardinality warning`, () => {
const job = getJobConfig('by_field_name');
job.model_plot_config = { enabled: false };
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job).then(
(messages) => {
const ids = messages.map(m => m.id);
@ -237,8 +265,8 @@ describe('ML - validateCardinality', () => {
it(`enabled model_plot, by field cardinality of ${cardinality} triggers a model plot warning and field cardinality warning`, () => {
const job = getJobConfig('by_field_name');
job.model_plot_config = { enabled: true };
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job).then(
(messages) => {
const ids = messages.map(m => m.id);
@ -250,8 +278,8 @@ describe('ML - validateCardinality', () => {
it(`enabled model_plot with terms, by field cardinality of ${cardinality} triggers just field cardinality warning`, () => {
const job = getJobConfig('by_field_name');
job.model_plot_config = { enabled: true, terms: 'AAL,AAB' };
const mockCardinality = _.cloneDeep(mockFareQuoteCardinality);
mockCardinality.aggregations.airline_cardinality.value = cardinality;
const mockCardinality = _.cloneDeep(mockResponses);
mockCardinality.search.aggregations.airline_cardinality.value = cardinality;
return validateCardinality(callWithRequestFactory(mockCardinality), job).then(
(messages) => {
const ids = messages.map(m => m.id);

View file

@ -42,10 +42,23 @@ const validateFactory = (callWithRequest, job) => {
if (relevantDetectors.length > 0) {
try {
const uniqueFieldNames = _.uniq(relevantDetectors.map(f => f[fieldName]));
// use fieldCaps endpoint to get data about whether fields are aggregatable
const fieldCaps = await callWithRequest('fieldCaps', {
index: job.datafeed_config.indices.join(','),
fields: uniqueFieldNames
});
// parse fieldCaps to return an array of just the fields which are aggregatable
const aggregatableFieldNames = Object.keys(fieldCaps.fields).filter((field) => {
const fieldType = Object.keys(fieldCaps.fields[field])[0];
return fieldCaps.fields[field][fieldType].aggregatable;
});
const stats = await dv.checkAggregatableFieldsExist(
job.datafeed_config.indices.join(','),
job.datafeed_config.query,
uniqueFieldNames,
aggregatableFieldNames,
0,
job.data_description.time_field
);