[ML] Transforms: Fix handling of fields with keyword mapping available (#98882)

- For groupby/agg configs, removes the .keyword postfix for the agg name and field name being displayed. The config itself will still use the field name including .keyword.
- For histogram charts, if available, query data using the .keyword field. This enables support for charts for terms when there's both a text and keyword variant.
- Fixes isKeywordDuplicate check for field names with multiple dots in them.
This commit is contained in:
Walter Rafelsberger 2021-05-04 16:21:27 +02:00 committed by GitHub
parent bc352c033c
commit dfcb1794d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 203 additions and 34 deletions

View file

@ -0,0 +1,59 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { hasKeywordDuplicate, isKeywordDuplicate, removeKeywordPostfix } from './field_utils';
const allFields = new Set([
'field1',
'field2',
'field2.keyword',
'field3.keyword',
'field3.keyword.keyword',
'field4.keyword.b',
'field4.keyword.b.keyword',
]);
describe('field_utils: hasKeywordDuplicate()', () => {
it('returns true when a corresponding keyword field is available', () => {
expect(hasKeywordDuplicate('field2', allFields)).toBe(true);
expect(hasKeywordDuplicate('field3.keyword', allFields)).toBe(true);
expect(hasKeywordDuplicate('field4.keyword.b', allFields)).toBe(true);
});
it('returns false when a corresponding keyword field is not available', () => {
expect(hasKeywordDuplicate('field1', allFields)).toBe(false);
expect(hasKeywordDuplicate('field2.keyword', allFields)).toBe(false);
expect(hasKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(false);
expect(hasKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(false);
});
});
describe('field_utils: isKeywordDuplicate()', () => {
it('returns true when a corresponding field without keyword postfix is available', () => {
expect(isKeywordDuplicate('field2.keyword', allFields)).toBe(true);
expect(isKeywordDuplicate('field3.keyword.keyword', allFields)).toBe(true);
expect(isKeywordDuplicate('field4.keyword.b.keyword', allFields)).toBe(true);
});
it('returns false when a corresponding field without keyword postfix is not available', () => {
expect(isKeywordDuplicate('field1', allFields)).toBe(false);
expect(isKeywordDuplicate('field2', allFields)).toBe(false);
expect(isKeywordDuplicate('field3.keyword', allFields)).toBe(false);
expect(isKeywordDuplicate('field4.keyword.b', allFields)).toBe(false);
});
});
describe('field_utils: removeKeywordPostfix()', () => {
it('removes the keyword postfix', () => {
expect(removeKeywordPostfix('field2.keyword')).toBe('field2');
expect(removeKeywordPostfix('field3.keyword.keyword')).toBe('field3.keyword');
expect(removeKeywordPostfix('field4.keyword.b.keyword')).toBe('field4.keyword.b');
});
it("returns the field name as is when there's no keyword postfix", () => {
expect(removeKeywordPostfix('field1')).toBe('field1');
expect(removeKeywordPostfix('field2')).toBe('field2');
expect(removeKeywordPostfix('field4.keyword.b')).toBe('field4.keyword.b');
});
});

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const KEYWORD_POSTFIX = '.keyword';
// checks if fieldName has a `fieldName.keyword` equivalent in the set of all field names.
export const hasKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
fieldNamesSet.has(`${fieldName}${KEYWORD_POSTFIX}`);
// checks if a fieldName ends with `.keyword` and has a field name equivalent without the postfix in the set of all field names.
export const isKeywordDuplicate = (fieldName: string, fieldNamesSet: Set<string>): boolean =>
fieldName.endsWith(KEYWORD_POSTFIX) && fieldNamesSet.has(removeKeywordPostfix(fieldName));
// removes the `.keyword` postfix form a field name if applicable
export const removeKeywordPostfix = (fieldName: string): string =>
fieldName.replace(new RegExp(`${KEYWORD_POSTFIX}$`), '');

View file

@ -14,6 +14,11 @@ import {
isEsSearchResponse,
isFieldHistogramsResponseSchema,
} from '../../../common/api_schemas/type_guards';
import {
hasKeywordDuplicate,
isKeywordDuplicate,
removeKeywordPostfix,
} from '../../../common/utils/field_utils';
import type { EsSorting, UseIndexDataReturnType } from '../../shared_imports';
import { getErrorMessage } from '../../../common/utils/errors';
@ -209,14 +214,25 @@ export const useIndexData = (
};
const fetchColumnChartsData = async function () {
const allIndexPatternFieldNames = new Set(indexPattern.fields.map((f) => f.name));
const columnChartsData = await api.getHistogramsForFields(
indexPattern.title,
columns
.filter((cT) => dataGrid.visibleColumns.includes(cT.id))
.map((cT) => ({
fieldName: cT.id,
type: getFieldType(cT.schema),
})),
.map((cT) => {
// If a column field name has a corresponding keyword field,
// fetch the keyword field instead to be able to do aggregations.
const fieldName = cT.id;
return hasKeywordDuplicate(fieldName, allIndexPatternFieldNames)
? {
fieldName: `${fieldName}.keyword`,
type: getFieldType(undefined),
}
: {
fieldName,
type: getFieldType(cT.schema),
};
}),
isDefaultQuery(query) ? matchAllQuery : query,
combinedRuntimeMappings
);
@ -226,7 +242,15 @@ export const useIndexData = (
return;
}
setColumnCharts(columnChartsData);
setColumnCharts(
// revert field names with `.keyword` used to do aggregations to their original column name
columnChartsData.map((d) => ({
...d,
...(isKeywordDuplicate(d.id, allIndexPatternFieldNames)
? { id: removeKeywordPostfix(d.id) }
: {}),
}))
);
};
useEffect(() => {

View file

@ -13,6 +13,8 @@ import {
} from '../../../../../../../../../../src/plugins/data/public';
import { getNestedProperty } from '../../../../../../../common/utils/object_utils';
import { removeKeywordPostfix } from '../../../../../../../common/utils/field_utils';
import { isRuntimeMappings } from '../../../../../../../common/shared_imports';
import {
@ -93,41 +95,44 @@ export function getPivotDropdownOptions(
const combinedFields = [...indexPatternFields, ...runtimeFields].sort(sortByLabel);
combinedFields.forEach((field) => {
const rawFieldName = field.name;
const displayFieldName = removeKeywordPostfix(rawFieldName);
// Group by
const availableGroupByAggs: [] = getNestedProperty(pivotGroupByFieldSupport, field.type);
if (availableGroupByAggs !== undefined) {
availableGroupByAggs.forEach((groupByAgg) => {
// Aggregation name for the group-by is the plain field name. Illegal characters will be removed.
const aggName = field.name.replace(illegalEsAggNameChars, '').trim();
const aggName = displayFieldName.replace(illegalEsAggNameChars, '').trim();
// Option name in the dropdown for the group-by is in the form of `sum(fieldname)`.
const dropDownName = `${groupByAgg}(${field.name})`;
const dropDownName = `${groupByAgg}(${displayFieldName})`;
const groupByOption: DropDownLabel = { label: dropDownName };
groupByOptions.push(groupByOption);
groupByOptionsData[dropDownName] = getDefaultGroupByConfig(
aggName,
dropDownName,
field.name,
rawFieldName,
groupByAgg
);
});
}
// Aggregations
const aggOption: DropDownOption = { label: field.name, options: [] };
const aggOption: DropDownOption = { label: displayFieldName, options: [] };
const availableAggs: [] = getNestedProperty(pivotAggsFieldSupport, field.type);
if (availableAggs !== undefined) {
availableAggs.forEach((agg) => {
// Aggregation name is formatted like `fieldname.sum`. Illegal characters will be removed.
const aggName = `${field.name.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
const aggName = `${displayFieldName.replace(illegalEsAggNameChars, '').trim()}.${agg}`;
// Option name in the dropdown for the aggregation is in the form of `sum(fieldname)`.
const dropDownName = `${agg}(${field.name})`;
const dropDownName = `${agg}(${displayFieldName})`;
aggOption.options.push({ label: dropDownName });
aggOptionsData[dropDownName] = getDefaultAggregationConfig(
aggName,
dropDownName,
field.name,
rawFieldName,
agg
);
});

View file

@ -61,6 +61,7 @@ import { registerTransformsAuditMessagesRoutes } from './transforms_audit_messag
import { registerTransformNodesRoutes } from './transforms_nodes';
import { IIndexPattern } from '../../../../../../src/plugins/data/common/index_patterns';
import { isLatestTransform } from '../../../common/types/transform';
import { isKeywordDuplicate } from '../../../common/utils/field_utils';
enum TRANSFORM_ACTIONS {
STOP = 'stop',
@ -562,9 +563,7 @@ const previewTransformHandler: RequestHandler<
).reduce((acc, [fieldName, fieldCaps]) => {
const fieldDefinition = Object.values(fieldCaps)[0];
const isMetaField = fieldDefinition.type.startsWith('_') || fieldName === '_doc_count';
const isKeywordDuplicate =
fieldName.endsWith('.keyword') && fieldNamesSet.has(fieldName.split('.keyword')[0]);
if (isMetaField || isKeywordDuplicate) {
if (isMetaField || isKeywordDuplicate(fieldName, fieldNamesSet)) {
return acc;
}
acc[fieldName] = { ...fieldDefinition };

View file

@ -45,8 +45,8 @@ export default function ({ getService }: FtrProviderContext) {
const pivotGroupByEntries = [
{
identifier: 'terms(category.keyword)',
label: 'category.keyword',
identifier: 'terms(category)',
label: 'category',
},
{
identifier: 'date_histogram(order_date)',

View file

@ -32,7 +32,7 @@ function getTransformConfig(): TransformPivotConfig {
aggregations: { 'products.base_price.avg': { avg: { field: 'products.base_price' } } },
},
description:
'ecommerce batch transform with avg(products.base_price) grouped by terms(category.keyword)',
'ecommerce batch transform with avg(products.base_price) grouped by terms(category)',
frequency: '3s',
settings: {
max_page_search_size: 250,

View file

@ -17,6 +17,7 @@ import {
} from './index';
export default function ({ getService }: FtrProviderContext) {
const canvasElement = getService('canvasElement');
const esArchiver = getService('esArchiver');
const transform = getService('transform');
@ -40,8 +41,8 @@ export default function ({ getService }: FtrProviderContext) {
source: 'ft_ecommerce',
groupByEntries: [
{
identifier: 'terms(category.keyword)',
label: 'category.keyword',
identifier: 'terms(category)',
label: 'category',
} as GroupByEntry,
{
identifier: 'date_histogram(order_date)',
@ -85,16 +86,16 @@ export default function ({ getService }: FtrProviderContext) {
],
transformId: `ec_1_${Date.now()}`,
transformDescription:
'ecommerce batch transform with groups terms(category.keyword) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
'ecommerce batch transform with groups terms(category) + date_histogram(order_date) 1m and aggregation avg(products.base_price)',
get destinationIndex(): string {
return `user-${this.transformId}`;
},
discoverAdjustSuperDatePicker: true,
expected: {
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category.keyword": {'],
pivotAdvancedEditorValueArr: ['{', ' "group_by": {', ' "category": {'],
pivotAdvancedEditorValue: {
group_by: {
'category.keyword': {
category: {
terms: {
field: 'category.keyword',
},
@ -156,7 +157,15 @@ export default function ({ getService }: FtrProviderContext) {
rows: 5,
},
histogramCharts: [
{ chartAvailable: false, id: 'category', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'category',
legend: '6 categories',
colorStats: [
{ color: '#000000', percentage: 45 },
{ color: '#54B399', percentage: 55 },
],
},
{
chartAvailable: true,
id: 'currency',
@ -166,8 +175,24 @@ export default function ({ getService }: FtrProviderContext) {
{ color: '#54B399', percentage: 90 },
],
},
{ chartAvailable: false, id: 'customer_first_name', legend: 'Chart not supported.' },
{ chartAvailable: false, id: 'customer_full_name', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'customer_first_name',
legend: 'top 20 of 46 categories',
colorStats: [
{ color: '#000000', percentage: 60 },
{ color: '#54B399', percentage: 35 },
],
},
{
chartAvailable: true,
id: 'customer_full_name',
legend: 'top 20 of 3321 categories',
colorStats: [
{ color: '#000000', percentage: 25 },
{ color: '#54B399', percentage: 67 },
],
},
{
chartAvailable: true,
id: 'customer_gender',
@ -186,7 +211,15 @@ export default function ({ getService }: FtrProviderContext) {
{ color: '#000000', percentage: 60 },
],
},
{ chartAvailable: false, id: 'customer_last_name', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'customer_last_name',
legend: 'top 20 of 183 categories',
colorStats: [
{ color: '#000000', percentage: 25 },
{ color: '#54B399', percentage: 70 },
],
},
{
chartAvailable: true,
id: 'customer_phone',
@ -403,6 +436,9 @@ export default function ({ getService }: FtrProviderContext) {
await transform.wizard.assertAdvancedQueryEditorSwitchExists();
await transform.wizard.assertAdvancedQueryEditorSwitchCheckState(false);
// Disable anti-aliasing to stabilize canvas image rendering assertions
await canvasElement.disableAntiAliasing();
await transform.testExecution.logTestStep('enables the index preview histogram charts');
await transform.wizard.enableIndexPreviewHistogramCharts(true);
@ -415,6 +451,8 @@ export default function ({ getService }: FtrProviderContext) {
);
}
await canvasElement.resetAntiAliasing();
if (isPivotTransformTestData(testData)) {
await transform.testExecution.logTestStep('adds the group by entries');
for (const [index, entry] of testData.groupByEntries.entries()) {

View file

@ -53,7 +53,15 @@ export default function ({ getService }: FtrProviderContext) {
chartAvailable: true,
id: '@timestamp',
},
{ chartAvailable: false, id: '@version', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: '@version',
legend: '1 category',
colorStats: [
{ color: '#000000', percentage: 10 },
{ color: '#54B399', percentage: 90 },
],
},
{
chartAvailable: true,
id: 'airline',
@ -67,7 +75,8 @@ export default function ({ getService }: FtrProviderContext) {
chartAvailable: true,
id: 'responsetime',
colorStats: [
{ color: '#54B399', percentage: 5 },
// below 10% threshold
// { color: '#54B399', percentage: 5 },
{ color: '#000000', percentage: 95 },
],
},
@ -84,11 +93,20 @@ export default function ({ getService }: FtrProviderContext) {
chartAvailable: true,
id: 'rt_responsetime_x_2',
colorStats: [
{ color: '#54B399', percentage: 5 },
// below 10% threshold
// { color: '#54B399', percentage: 5 },
{ color: '#000000', percentage: 95 },
],
},
{ chartAvailable: false, id: 'type', legend: 'Chart not supported.' },
{
chartAvailable: true,
id: 'type',
legend: '1 category',
colorStats: [
{ color: '#000000', percentage: 10 },
{ color: '#54B399', percentage: 90 },
],
},
];
const testDataList: Array<PivotTransformTestData | LatestTransformTestData> = [

View file

@ -20,7 +20,7 @@ function getTransformConfig(): TransformPivotConfig {
aggregations: { 'products.base_price.avg': { avg: { field: 'products.base_price' } } },
},
description:
'ecommerce batch transform with avg(products.base_price) grouped by terms(category.keyword)',
'ecommerce batch transform with avg(products.base_price) grouped by terms(category)',
dest: { index: `user-ec_2_${date}` },
};
}

View file

@ -262,12 +262,18 @@ export function TransformWizardProvider({ getService, getPageObjects }: FtrProvi
`[data-test-subj="mlDataGridChart-${id}-histogram"] .echCanvasRenderer`,
sortedExpectedColorStats,
undefined,
4
10
);
expect(actualColorStats.length).to.eql(
sortedExpectedColorStats.length,
`Expected and actual color stats for column '${expected.id}' should have the same amount of elements. Expected: ${sortedExpectedColorStats.length} (got ${actualColorStats.length})`
`Expected and actual color stats for column '${
expected.id
}' should have the same amount of elements. Expected: ${
sortedExpectedColorStats.length
} '${JSON.stringify(sortedExpectedColorStats)}' (got ${
actualColorStats.length
} '${JSON.stringify(actualColorStats)}')`
);
expect(actualColorStats.every((d) => d.withinTolerance)).to.eql(
true,