[ML] Add runtime fields support (#78700)

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Quynh Nguyen 2020-10-05 13:00:58 -05:00 committed by GitHub
parent ead4ebc9f6
commit ad89e6f956
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 58 additions and 25 deletions

View file

@ -315,3 +315,16 @@ export const showDataGridColumnChartErrorMessageToast = (
})
);
};
// helper function to transform { [key]: [val] } => { [key]: val }
// for when `fields` is used in es.search since response is always an array of values
// since response always returns an array of values for each field
export const getProcessedFields = (originalObj: object) => {
const obj: { [key: string]: any } = { ...originalObj };
for (const key of Object.keys(obj)) {
if (Array.isArray(obj[key]) && obj[key].length === 1) {
obj[key] = obj[key][0];
}
}
return obj;
};

View file

@ -11,6 +11,7 @@ export {
multiColumnSortFactory,
showDataGridColumnChartErrorMessageToast,
useRenderCellValue,
getProcessedFields,
} from './common';
export { getFieldType, ChartData } from './use_column_chart';
export { useDataGrid } from './use_data_grid';

View file

@ -7,7 +7,7 @@
import type { SearchResponse7 } from '../../../../common/types/es_client';
import { extractErrorMessage } from '../../../../common/util/errors';
import { EsSorting, UseDataGridReturnType } from '../../components/data_grid';
import { EsSorting, UseDataGridReturnType, getProcessedFields } from '../../components/data_grid';
import { ml } from '../../services/ml_api_service';
import { isKeywordAndTextType } from '../common/fields';
@ -47,9 +47,12 @@ export const getIndexData = async (
}, {} as EsSorting);
const { pageIndex, pageSize } = pagination;
// TODO: remove results_field from `fields` when possible
const resp: SearchResponse7 = await ml.esSearch({
index: jobConfig.dest.index,
body: {
fields: ['*'],
_source: jobConfig.dest.results_field,
query: searchQuery,
from: pageIndex * pageSize,
size: pageSize,
@ -58,8 +61,11 @@ export const getIndexData = async (
});
setRowCount(resp.hits.total.value);
const docs = resp.hits.hits.map((d) => ({
...getProcessedFields(d.fields),
[jobConfig.dest.results_field]: d._source[jobConfig.dest.results_field],
}));
const docs = resp.hits.hits.map((d) => d._source);
setTableItems(docs);
setStatus(INDEX_STATUS.LOADED);
} catch (e) {

View file

@ -23,6 +23,7 @@ import {
useRenderCellValue,
EsSorting,
UseIndexDataReturnType,
getProcessedFields,
} from '../../../../components/data_grid';
import type { SearchResponse7 } from '../../../../../../common/types/es_client';
import { extractErrorMessage } from '../../../../../../common/util/errors';
@ -81,6 +82,8 @@ export const useIndexData = (
query, // isDefaultQuery(query) ? matchAllQuery : query,
from: pagination.pageIndex * pagination.pageSize,
size: pagination.pageSize,
fields: ['*'],
_source: false,
...(Object.keys(sort).length > 0 ? { sort } : {}),
},
};
@ -88,8 +91,7 @@ export const useIndexData = (
try {
const resp: IndexSearchResponse = await ml.esSearch(esSearchRequest);
const docs = resp.hits.hits.map((d) => d._source);
const docs = resp.hits.hits.map((d) => getProcessedFields(d.fields));
setRowCount(resp.hits.total.value);
setTableItems(docs);
setStatus(INDEX_STATUS.LOADED);

View file

@ -18,6 +18,7 @@ import { ml } from '../../../services/ml_api_service';
import { mlJobService } from '../../../services/job_service';
import { escapeForElasticsearchQuery } from '../../../util/string_utils';
import { getSavedObjectsClient, getGetUrlGenerator } from '../../../util/dependency_cache';
import { getProcessedFields } from '../../../components/data_grid';
export function getNewCustomUrlDefaults(job, dashboards, indexPatterns) {
// Returns the settings object in the format used by the custom URL editor
@ -329,7 +330,7 @@ export function getTestUrl(job, customUrl) {
});
} else {
if (response.hits.total.value > 0) {
testDoc = response.hits.hits[0]._source;
testDoc = getProcessedFields(response.hits.hits[0].fields);
}
}

View file

@ -509,10 +509,10 @@ class JobService {
fields[job.data_description.time_field] = {};
}
// console.log('fields: ', fields);
const fieldsList = Object.keys(fields);
if (fieldsList.length) {
body._source = fieldsList;
body.fields = fieldsList;
body._source = false;
}
}

View file

@ -56,18 +56,25 @@ export function categorizationExamplesProvider({
}
}
}
const { body } = await asCurrentUser.search<SearchResponse<{ [id: string]: string }>>({
index: indexPatternTitle,
size,
body: {
_source: categorizationFieldName,
fields: [categorizationFieldName],
_source: false,
query,
sort: ['_doc'],
},
});
const tempExamples = body.hits.hits.map(({ _source }) => _source[categorizationFieldName]);
// hit.fields can be undefined if value is originally null
const tempExamples = body.hits.hits.map(({ fields }) =>
fields &&
Array.isArray(fields[categorizationFieldName]) &&
fields[categorizationFieldName].length > 0
? fields[categorizationFieldName][0]
: null
);
validationResults.createNullValueResult(tempExamples);
@ -81,7 +88,6 @@ export function categorizationExamplesProvider({
const examplesWithTokens = await getTokens(CHUNK_SIZE, allExamples, analyzer);
return { examples: examplesWithTokens };
} catch (err) {
// console.log('dropping to 50 chunk size');
// if an error is thrown when loading the tokens, lower the chunk size by half and try again
// the error may have been caused by too many tokens being found.
// the _analyze endpoint has a maximum of 10000 tokens.

View file

@ -123,15 +123,19 @@ export class ValidationResults {
public createNullValueResult(examples: Array<string | null | undefined>) {
const nullCount = examples.filter((e) => e === null).length;
if (nullCount / examples.length >= NULL_COUNT_PERCENT_LIMIT) {
this._results.push({
id: VALIDATION_RESULT.NULL_VALUES,
valid: CATEGORY_EXAMPLES_VALIDATION_STATUS.PARTIALLY_VALID,
message: i18n.translate('xpack.ml.models.jobService.categorization.messages.nullValues', {
defaultMessage: 'More than {percent}% of field values are null.',
values: { percent: NULL_COUNT_PERCENT_LIMIT * 100 },
}),
});
// if all values are null, VALIDATION_RESULT.NO_EXAMPLES will be raised
// so we don't need to display this warning as well
if (nullCount !== examples.length) {
if (nullCount / examples.length >= NULL_COUNT_PERCENT_LIMIT) {
this._results.push({
id: VALIDATION_RESULT.NULL_VALUES,
valid: CATEGORY_EXAMPLES_VALIDATION_STATUS.PARTIALLY_VALID,
message: i18n.translate('xpack.ml.models.jobService.categorization.messages.nullValues', {
defaultMessage: 'More than {percent}% of field values are null.',
values: { percent: NULL_COUNT_PERCENT_LIMIT * 100 },
}),
});
}
}
}

View file

@ -12,13 +12,10 @@ import {
isEsSearchResponse,
isFieldHistogramsResponseSchema,
} from '../../../common/api_schemas/type_guards';
import { getErrorMessage } from '../../../common/utils/errors';
import type { EsSorting, UseIndexDataReturnType } from '../../shared_imports';
import { getErrorMessage } from '../../../common/utils/errors';
import { isDefaultQuery, matchAllQuery, PivotQuery } from '../common';
import { SearchItems } from './use_search_items';
import { useApi } from './use_api';
@ -38,6 +35,7 @@ export const useIndexData = (
showDataGridColumnChartErrorMessageToast,
useDataGrid,
useRenderCellValue,
getProcessedFields,
INDEX_STATUS,
},
} = useAppDependencies();
@ -86,6 +84,8 @@ export const useIndexData = (
const esSearchRequest = {
index: indexPattern.title,
body: {
fields: ['*'],
_source: false,
// Instead of using the default query (`*`), fall back to a more efficient `match_all` query.
query: isDefaultQuery(query) ? matchAllQuery : query,
from: pagination.pageIndex * pagination.pageSize,
@ -102,7 +102,7 @@ export const useIndexData = (
return;
}
const docs = resp.hits.hits.map((d) => d._source);
const docs = resp.hits.hits.map((d) => getProcessedFields(d.fields));
setRowCount(resp.hits.total.value);
setTableItems(docs);