[Maps] Use id-values from client-manifest to suggest layers (#102788)

This commit is contained in:
Thomas Neirynck 2021-06-23 18:56:15 +02:00 committed by GitHub
parent 81fe54109e
commit 3c780a8505
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 122 additions and 225 deletions

View file

@ -14,13 +14,6 @@ import { DocumentStatsTable } from './document_stats';
import { ExpandedRowContent } from './expanded_row_content';
import { ChoroplethMap } from './choropleth_map';
const COMMON_EMS_LAYER_IDS = [
'world_countries',
'administrative_regions_lvl2',
'usa_zip_codes',
'usa_states',
];
export const KeywordContent: FC<FieldDataRowProps> = ({ config }) => {
const [EMSSuggestion, setEMSSuggestion] = useState<EMSTermJoinConfig | null | undefined>();
const { stats, fieldName } = config;
@ -32,7 +25,6 @@ export const KeywordContent: FC<FieldDataRowProps> = ({ config }) => {
const loadEMSTermSuggestions = useCallback(async () => {
if (!mapsPlugin) return;
const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({
emsLayerIds: COMMON_EMS_LAYER_IDS,
sampleValues: Array.isArray(stats?.topValues)
? stats?.topValues.map((value) => value.key)
: [],

View file

@ -6,42 +6,24 @@
*/
import { suggestEMSTermJoinConfig } from './ems_autosuggest';
import { FeatureCollection } from 'geojson';
class MockFileLayer {
private readonly _url: string;
private readonly _id: string;
private readonly _fields: Array<{ id: string }>;
constructor(url: string, fields: Array<{ id: string }>) {
this._url = url;
this._id = url;
constructor(id: string, fields: Array<{ id: string; alias?: string[]; values?: string[] }>) {
this._id = id;
this._fields = fields;
}
getId() {
return this._id;
}
getFields() {
return this._fields;
}
getGeoJson() {
if (this._url === 'world_countries') {
return ({
type: 'FeatureCollection',
features: [
{ properties: { iso2: 'CA', iso3: 'CAN' } },
{ properties: { iso2: 'US', iso3: 'USA' } },
],
} as unknown) as FeatureCollection;
} else if (this._url === 'zips') {
return ({
type: 'FeatureCollection',
features: [{ properties: { zip: '40204' } }, { properties: { zip: '40205' } }],
} as unknown) as FeatureCollection;
} else {
throw new Error(`unrecognized mock url ${this._url}`);
}
}
hasId(id: string) {
return id === this._id;
}
@ -51,31 +33,31 @@ jest.mock('../util', () => {
return {
async getEmsFileLayers() {
return [
new MockFileLayer('world_countries', [{ id: 'iso2' }, { id: 'iso3' }]),
new MockFileLayer('zips', [{ id: 'zip' }]),
new MockFileLayer('world_countries', [
{
id: 'iso2',
alias: ['(geo\\.){0,}country_iso_code$', '(country|countries)'],
values: ['CA', 'US'],
},
{ id: 'iso3', values: ['CAN', 'USA'] },
{ id: 'name', alias: ['(country|countries)'] },
]),
new MockFileLayer('usa_zip_codes', [
{ id: 'zip', alias: ['zip'], values: ['40204', '40205'] },
]),
];
},
};
});
describe('suggestEMSTermJoinConfig', () => {
test('no info provided', async () => {
test('Should not validate when no info provided', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({});
expect(termJoinConfig).toBe(null);
});
describe('validate common column names', () => {
test('ecs region', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValuesColumnName: 'destination.geo.region_iso_code',
});
expect(termJoinConfig).toEqual({
layerId: 'administrative_regions_lvl2',
field: 'region_iso_code',
});
});
test('ecs country', async () => {
describe('With common column names', () => {
test('should match first match', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValuesColumnName: 'country_iso_code',
});
@ -85,13 +67,29 @@ describe('suggestEMSTermJoinConfig', () => {
});
});
test('country', async () => {
test('When sampleValues are provided, should reject match if no sampleValues for a layer, even though the name matches', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValuesColumnName: 'country_iso_code',
sampleValues: ['FO', 'US', 'CA'],
});
expect(termJoinConfig).toEqual(null);
});
test('should reject match if sampleValues not in id-list', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValuesColumnName: 'zip',
sampleValues: ['90201', '40205'],
});
expect(termJoinConfig).toEqual(null);
});
test('should return first match (regex matches both iso2 and name)', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValuesColumnName: 'Country_name',
});
expect(termJoinConfig).toEqual({
layerId: 'world_countries',
field: 'name',
field: 'iso2',
});
});
@ -103,10 +101,10 @@ describe('suggestEMSTermJoinConfig', () => {
});
});
describe('validate well known formats', () => {
test('5-digit zip code', async () => {
describe('validate well known formats (using id-values in manifest)', () => {
test('Should validate known zipcodes', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['90201', 40204],
sampleValues: ['40205', 40204],
});
expect(termJoinConfig).toEqual({
layerId: 'usa_zip_codes',
@ -114,51 +112,18 @@ describe('suggestEMSTermJoinConfig', () => {
});
});
test('mismatch', async () => {
test('Should not validate unknown zipcode (in this case, 90201)', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['90201', 40204],
});
expect(termJoinConfig).toEqual(null);
});
test('Should not validate mismatches', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['90201', 'foobar'],
});
expect(termJoinConfig).toEqual(null);
});
});
describe('validate based on EMS data', () => {
test('Should validate with zip codes layer', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['40204', 40205],
emsLayerIds: ['world_countries', 'zips'],
});
expect(termJoinConfig).toEqual({
layerId: 'zips',
field: 'zip',
});
});
test('Should not validate with faulty zip codes', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['40204', '00000'],
emsLayerIds: ['world_countries', 'zips'],
});
expect(termJoinConfig).toEqual(null);
});
test('Should validate against countries', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['USA', 'USA', 'CAN'],
emsLayerIds: ['world_countries', 'zips'],
});
expect(termJoinConfig).toEqual({
layerId: 'world_countries',
field: 'iso3',
});
});
test('Should not validate against missing countries', async () => {
const termJoinConfig = await suggestEMSTermJoinConfig({
sampleValues: ['USA', 'BEL', 'CAN'],
emsLayerIds: ['world_countries', 'zips'],
});
expect(termJoinConfig).toEqual(null);
});
});
});

View file

@ -7,10 +7,8 @@
import type { FileLayer } from '@elastic/ems-client';
import { getEmsFileLayers } from '../util';
import { emsWorldLayerId, emsRegionLayerId, emsUsaZipLayerId } from '../../common';
export interface SampleValuesConfig {
emsLayerIds?: string[];
sampleValues?: Array<string | number>;
sampleValuesColumnName?: string;
}
@ -20,44 +18,16 @@ export interface EMSTermJoinConfig {
field: string;
}
const wellKnownColumnNames = [
{
regex: /(geo\.){0,}country_iso_code$/i, // ECS postfix for country
emsConfig: {
layerId: emsWorldLayerId,
field: 'iso2',
},
},
{
regex: /(geo\.){0,}region_iso_code$/i, // ECS postfixn for region
emsConfig: {
layerId: emsRegionLayerId,
field: 'region_iso_code',
},
},
{
regex: /^country/i, // anything starting with country
emsConfig: {
layerId: emsWorldLayerId,
field: 'name',
},
},
];
const wellKnownColumnFormats = [
{
regex: /(^\d{5}$)/i, // 5-digit zipcode
emsConfig: {
layerId: emsUsaZipLayerId,
field: 'zip',
},
},
];
interface UniqueMatch {
config: { layerId: string; field: string };
config: EMSTermJoinConfig;
count: number;
}
interface FileLayerFieldShim {
id: string;
values?: string[];
regex?: string;
alias?: string[];
}
export async function suggestEMSTermJoinConfig(
sampleValuesConfig: SampleValuesConfig
@ -65,20 +35,17 @@ export async function suggestEMSTermJoinConfig(
const matches: EMSTermJoinConfig[] = [];
if (sampleValuesConfig.sampleValuesColumnName) {
matches.push(...suggestByName(sampleValuesConfig.sampleValuesColumnName));
const matchesBasedOnColumnName = await suggestByName(
sampleValuesConfig.sampleValuesColumnName,
sampleValuesConfig.sampleValues
);
matches.push(...matchesBasedOnColumnName);
}
if (sampleValuesConfig.sampleValues && sampleValuesConfig.sampleValues.length) {
if (sampleValuesConfig.emsLayerIds && sampleValuesConfig.emsLayerIds.length) {
matches.push(
...(await suggestByEMSLayerIds(
sampleValuesConfig.emsLayerIds,
sampleValuesConfig.sampleValues
))
);
} else {
matches.push(...suggestByValues(sampleValuesConfig.sampleValues));
}
// Only looks at id-values in main manifest
const matchesBasedOnIds = await suggestByIdValues(sampleValuesConfig.sampleValues);
matches.push(...matchesBasedOnIds);
}
const uniqMatches: UniqueMatch[] = matches.reduce((accum: UniqueMatch[], match) => {
@ -105,92 +72,80 @@ export async function suggestEMSTermJoinConfig(
return uniqMatches.length ? uniqMatches[0].config : null;
}
function suggestByName(columnName: string): EMSTermJoinConfig[] {
const matches = wellKnownColumnNames.filter((wellknown) => {
return columnName.match(wellknown.regex);
});
async function suggestByName(
columnName: string,
sampleValues?: Array<string | number>
): Promise<EMSTermJoinConfig[]> {
const fileLayers = await getEmsFileLayers();
return matches.map((m) => {
return m.emsConfig;
});
}
function suggestByValues(values: Array<string | number>): EMSTermJoinConfig[] {
const matches = wellKnownColumnFormats.filter((wellknown) => {
for (let i = 0; i < values.length; i++) {
const value = values[i].toString();
if (!value.match(wellknown.regex)) {
return false;
const matches: EMSTermJoinConfig[] = [];
fileLayers.forEach((fileLayer) => {
const emsFields: FileLayerFieldShim[] = fileLayer.getFields();
emsFields.forEach((emsField: FileLayerFieldShim) => {
if (!emsField.alias || !emsField.alias.length) {
return;
}
}
return true;
const emsConfig = {
layerId: fileLayer.getId(),
field: emsField.id,
};
emsField.alias.forEach((alias: string) => {
const regex = new RegExp(alias, 'i');
const nameMatchesAlias = !!columnName.match(regex);
// Check if this violates any known id-values.
let isMatch: boolean;
if (sampleValues) {
if (emsField.values && emsField.values.length) {
isMatch = nameMatchesAlias && allSamplesMatch(sampleValues, emsField.values);
} else {
// requires validation against sample-values but EMS provides no meta to do so.
isMatch = false;
}
} else {
isMatch = nameMatchesAlias;
}
if (isMatch) {
matches.push(emsConfig);
}
});
});
});
return matches.map((m) => {
return m.emsConfig;
});
return matches;
}
function existsInEMS(emsJson: any, emsFieldId: string, sampleValue: string): boolean {
for (let i = 0; i < emsJson.features.length; i++) {
const emsFieldValue = emsJson.features[i].properties[emsFieldId].toString();
if (emsFieldValue.toString() === sampleValue) {
return true;
}
}
return false;
}
function matchesEmsField(emsJson: any, emsFieldId: string, sampleValues: Array<string | number>) {
function allSamplesMatch(sampleValues: Array<string | number>, ids: string[]) {
for (let j = 0; j < sampleValues.length; j++) {
const sampleValue = sampleValues[j].toString();
if (!existsInEMS(emsJson, emsFieldId, sampleValue)) {
if (!ids.includes(sampleValue)) {
return false;
}
}
return true;
}
async function getMatchesForEMSLayer(
emsLayerId: string,
async function suggestByIdValues(
sampleValues: Array<string | number>
): Promise<EMSTermJoinConfig[]> {
const matches: EMSTermJoinConfig[] = [];
const fileLayers: FileLayer[] = await getEmsFileLayers();
const emsFileLayer: FileLayer | undefined = fileLayers.find((fl: FileLayer) =>
fl.hasId(emsLayerId)
);
if (!emsFileLayer) {
return [];
}
const emsFields = emsFileLayer.getFields();
try {
const emsJson = await emsFileLayer.getGeoJson();
const matches: EMSTermJoinConfig[] = [];
for (let f = 0; f < emsFields.length; f++) {
if (matchesEmsField(emsJson, emsFields[f].id, sampleValues)) {
matches.push({
layerId: emsLayerId,
field: emsFields[f].id,
});
fileLayers.forEach((fileLayer) => {
const emsFields: FileLayerFieldShim[] = fileLayer.getFields();
emsFields.forEach((emsField: FileLayerFieldShim) => {
if (!emsField.values || !emsField.values.length) {
return;
}
}
return matches;
} catch (e) {
return [];
}
}
async function suggestByEMSLayerIds(
emsLayerIds: string[],
values: Array<string | number>
): Promise<EMSTermJoinConfig[]> {
const matches = [];
for (const emsLayerId of emsLayerIds) {
const layerIdMathes = await getMatchesForEMSLayer(emsLayerId, values);
matches.push(...layerIdMathes);
}
const emsConfig = {
layerId: fileLayer.getId(),
field: emsField.id,
};
if (allSamplesMatch(sampleValues, emsField.values)) {
matches.push(emsConfig);
}
});
});
return matches;
}

View file

@ -1,13 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const COMMON_EMS_LAYER_IDS = [
'world_countries',
'administrative_regions_lvl2',
'usa_zip_codes',
'usa_states',
];

View file

@ -28,7 +28,6 @@ import { isDefined } from '../../../common/types/guards';
import { MlEmbeddedMapComponent } from '../components/ml_embedded_map';
import { EMSTermJoinConfig } from '../../../../maps/public';
import { AnomaliesTableRecord } from '../../../common/types/anomalies';
import { COMMON_EMS_LAYER_IDS } from '../../../common/constants/embeddable_map';
const MAX_ENTITY_VALUES = 3;
@ -177,7 +176,6 @@ export const AnomaliesMap: FC<Props> = ({ anomalies, jobIds }) => {
}
const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({
emsLayerIds: COMMON_EMS_LAYER_IDS,
sampleValues: Array.from(entityValues),
sampleValuesColumnName: entityName || '',
});