[Maps] Use id-values from client-manifest to suggest layers (#102788)
This commit is contained in:
parent
81fe54109e
commit
3c780a8505
|
@ -14,13 +14,6 @@ import { DocumentStatsTable } from './document_stats';
|
|||
import { ExpandedRowContent } from './expanded_row_content';
|
||||
import { ChoroplethMap } from './choropleth_map';
|
||||
|
||||
const COMMON_EMS_LAYER_IDS = [
|
||||
'world_countries',
|
||||
'administrative_regions_lvl2',
|
||||
'usa_zip_codes',
|
||||
'usa_states',
|
||||
];
|
||||
|
||||
export const KeywordContent: FC<FieldDataRowProps> = ({ config }) => {
|
||||
const [EMSSuggestion, setEMSSuggestion] = useState<EMSTermJoinConfig | null | undefined>();
|
||||
const { stats, fieldName } = config;
|
||||
|
@ -32,7 +25,6 @@ export const KeywordContent: FC<FieldDataRowProps> = ({ config }) => {
|
|||
const loadEMSTermSuggestions = useCallback(async () => {
|
||||
if (!mapsPlugin) return;
|
||||
const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({
|
||||
emsLayerIds: COMMON_EMS_LAYER_IDS,
|
||||
sampleValues: Array.isArray(stats?.topValues)
|
||||
? stats?.topValues.map((value) => value.key)
|
||||
: [],
|
||||
|
|
|
@ -6,42 +6,24 @@
|
|||
*/
|
||||
|
||||
import { suggestEMSTermJoinConfig } from './ems_autosuggest';
|
||||
import { FeatureCollection } from 'geojson';
|
||||
|
||||
class MockFileLayer {
|
||||
private readonly _url: string;
|
||||
private readonly _id: string;
|
||||
private readonly _fields: Array<{ id: string }>;
|
||||
|
||||
constructor(url: string, fields: Array<{ id: string }>) {
|
||||
this._url = url;
|
||||
this._id = url;
|
||||
constructor(id: string, fields: Array<{ id: string; alias?: string[]; values?: string[] }>) {
|
||||
this._id = id;
|
||||
this._fields = fields;
|
||||
}
|
||||
|
||||
getId() {
|
||||
return this._id;
|
||||
}
|
||||
|
||||
getFields() {
|
||||
return this._fields;
|
||||
}
|
||||
|
||||
getGeoJson() {
|
||||
if (this._url === 'world_countries') {
|
||||
return ({
|
||||
type: 'FeatureCollection',
|
||||
features: [
|
||||
{ properties: { iso2: 'CA', iso3: 'CAN' } },
|
||||
{ properties: { iso2: 'US', iso3: 'USA' } },
|
||||
],
|
||||
} as unknown) as FeatureCollection;
|
||||
} else if (this._url === 'zips') {
|
||||
return ({
|
||||
type: 'FeatureCollection',
|
||||
features: [{ properties: { zip: '40204' } }, { properties: { zip: '40205' } }],
|
||||
} as unknown) as FeatureCollection;
|
||||
} else {
|
||||
throw new Error(`unrecognized mock url ${this._url}`);
|
||||
}
|
||||
}
|
||||
|
||||
hasId(id: string) {
|
||||
return id === this._id;
|
||||
}
|
||||
|
@ -51,31 +33,31 @@ jest.mock('../util', () => {
|
|||
return {
|
||||
async getEmsFileLayers() {
|
||||
return [
|
||||
new MockFileLayer('world_countries', [{ id: 'iso2' }, { id: 'iso3' }]),
|
||||
new MockFileLayer('zips', [{ id: 'zip' }]),
|
||||
new MockFileLayer('world_countries', [
|
||||
{
|
||||
id: 'iso2',
|
||||
alias: ['(geo\\.){0,}country_iso_code$', '(country|countries)'],
|
||||
values: ['CA', 'US'],
|
||||
},
|
||||
{ id: 'iso3', values: ['CAN', 'USA'] },
|
||||
{ id: 'name', alias: ['(country|countries)'] },
|
||||
]),
|
||||
new MockFileLayer('usa_zip_codes', [
|
||||
{ id: 'zip', alias: ['zip'], values: ['40204', '40205'] },
|
||||
]),
|
||||
];
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
describe('suggestEMSTermJoinConfig', () => {
|
||||
test('no info provided', async () => {
|
||||
test('Should not validate when no info provided', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({});
|
||||
expect(termJoinConfig).toBe(null);
|
||||
});
|
||||
|
||||
describe('validate common column names', () => {
|
||||
test('ecs region', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValuesColumnName: 'destination.geo.region_iso_code',
|
||||
});
|
||||
expect(termJoinConfig).toEqual({
|
||||
layerId: 'administrative_regions_lvl2',
|
||||
field: 'region_iso_code',
|
||||
});
|
||||
});
|
||||
|
||||
test('ecs country', async () => {
|
||||
describe('With common column names', () => {
|
||||
test('should match first match', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValuesColumnName: 'country_iso_code',
|
||||
});
|
||||
|
@ -85,13 +67,29 @@ describe('suggestEMSTermJoinConfig', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('country', async () => {
|
||||
test('When sampleValues are provided, should reject match if no sampleValues for a layer, even though the name matches', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValuesColumnName: 'country_iso_code',
|
||||
sampleValues: ['FO', 'US', 'CA'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
|
||||
test('should reject match if sampleValues not in id-list', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValuesColumnName: 'zip',
|
||||
sampleValues: ['90201', '40205'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
|
||||
test('should return first match (regex matches both iso2 and name)', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValuesColumnName: 'Country_name',
|
||||
});
|
||||
expect(termJoinConfig).toEqual({
|
||||
layerId: 'world_countries',
|
||||
field: 'name',
|
||||
field: 'iso2',
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -103,10 +101,10 @@ describe('suggestEMSTermJoinConfig', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('validate well known formats', () => {
|
||||
test('5-digit zip code', async () => {
|
||||
describe('validate well known formats (using id-values in manifest)', () => {
|
||||
test('Should validate known zipcodes', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['90201', 40204],
|
||||
sampleValues: ['40205', 40204],
|
||||
});
|
||||
expect(termJoinConfig).toEqual({
|
||||
layerId: 'usa_zip_codes',
|
||||
|
@ -114,51 +112,18 @@ describe('suggestEMSTermJoinConfig', () => {
|
|||
});
|
||||
});
|
||||
|
||||
test('mismatch', async () => {
|
||||
test('Should not validate unknown zipcode (in this case, 90201)', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['90201', 40204],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
|
||||
test('Should not validate mismatches', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['90201', 'foobar'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('validate based on EMS data', () => {
|
||||
test('Should validate with zip codes layer', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['40204', 40205],
|
||||
emsLayerIds: ['world_countries', 'zips'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual({
|
||||
layerId: 'zips',
|
||||
field: 'zip',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should not validate with faulty zip codes', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['40204', '00000'],
|
||||
emsLayerIds: ['world_countries', 'zips'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
|
||||
test('Should validate against countries', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['USA', 'USA', 'CAN'],
|
||||
emsLayerIds: ['world_countries', 'zips'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual({
|
||||
layerId: 'world_countries',
|
||||
field: 'iso3',
|
||||
});
|
||||
});
|
||||
|
||||
test('Should not validate against missing countries', async () => {
|
||||
const termJoinConfig = await suggestEMSTermJoinConfig({
|
||||
sampleValues: ['USA', 'BEL', 'CAN'],
|
||||
emsLayerIds: ['world_countries', 'zips'],
|
||||
});
|
||||
expect(termJoinConfig).toEqual(null);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -7,10 +7,8 @@
|
|||
|
||||
import type { FileLayer } from '@elastic/ems-client';
|
||||
import { getEmsFileLayers } from '../util';
|
||||
import { emsWorldLayerId, emsRegionLayerId, emsUsaZipLayerId } from '../../common';
|
||||
|
||||
export interface SampleValuesConfig {
|
||||
emsLayerIds?: string[];
|
||||
sampleValues?: Array<string | number>;
|
||||
sampleValuesColumnName?: string;
|
||||
}
|
||||
|
@ -20,44 +18,16 @@ export interface EMSTermJoinConfig {
|
|||
field: string;
|
||||
}
|
||||
|
||||
const wellKnownColumnNames = [
|
||||
{
|
||||
regex: /(geo\.){0,}country_iso_code$/i, // ECS postfix for country
|
||||
emsConfig: {
|
||||
layerId: emsWorldLayerId,
|
||||
field: 'iso2',
|
||||
},
|
||||
},
|
||||
{
|
||||
regex: /(geo\.){0,}region_iso_code$/i, // ECS postfixn for region
|
||||
emsConfig: {
|
||||
layerId: emsRegionLayerId,
|
||||
field: 'region_iso_code',
|
||||
},
|
||||
},
|
||||
{
|
||||
regex: /^country/i, // anything starting with country
|
||||
emsConfig: {
|
||||
layerId: emsWorldLayerId,
|
||||
field: 'name',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const wellKnownColumnFormats = [
|
||||
{
|
||||
regex: /(^\d{5}$)/i, // 5-digit zipcode
|
||||
emsConfig: {
|
||||
layerId: emsUsaZipLayerId,
|
||||
field: 'zip',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
interface UniqueMatch {
|
||||
config: { layerId: string; field: string };
|
||||
config: EMSTermJoinConfig;
|
||||
count: number;
|
||||
}
|
||||
interface FileLayerFieldShim {
|
||||
id: string;
|
||||
values?: string[];
|
||||
regex?: string;
|
||||
alias?: string[];
|
||||
}
|
||||
|
||||
export async function suggestEMSTermJoinConfig(
|
||||
sampleValuesConfig: SampleValuesConfig
|
||||
|
@ -65,20 +35,17 @@ export async function suggestEMSTermJoinConfig(
|
|||
const matches: EMSTermJoinConfig[] = [];
|
||||
|
||||
if (sampleValuesConfig.sampleValuesColumnName) {
|
||||
matches.push(...suggestByName(sampleValuesConfig.sampleValuesColumnName));
|
||||
const matchesBasedOnColumnName = await suggestByName(
|
||||
sampleValuesConfig.sampleValuesColumnName,
|
||||
sampleValuesConfig.sampleValues
|
||||
);
|
||||
matches.push(...matchesBasedOnColumnName);
|
||||
}
|
||||
|
||||
if (sampleValuesConfig.sampleValues && sampleValuesConfig.sampleValues.length) {
|
||||
if (sampleValuesConfig.emsLayerIds && sampleValuesConfig.emsLayerIds.length) {
|
||||
matches.push(
|
||||
...(await suggestByEMSLayerIds(
|
||||
sampleValuesConfig.emsLayerIds,
|
||||
sampleValuesConfig.sampleValues
|
||||
))
|
||||
);
|
||||
} else {
|
||||
matches.push(...suggestByValues(sampleValuesConfig.sampleValues));
|
||||
}
|
||||
// Only looks at id-values in main manifest
|
||||
const matchesBasedOnIds = await suggestByIdValues(sampleValuesConfig.sampleValues);
|
||||
matches.push(...matchesBasedOnIds);
|
||||
}
|
||||
|
||||
const uniqMatches: UniqueMatch[] = matches.reduce((accum: UniqueMatch[], match) => {
|
||||
|
@ -105,92 +72,80 @@ export async function suggestEMSTermJoinConfig(
|
|||
return uniqMatches.length ? uniqMatches[0].config : null;
|
||||
}
|
||||
|
||||
function suggestByName(columnName: string): EMSTermJoinConfig[] {
|
||||
const matches = wellKnownColumnNames.filter((wellknown) => {
|
||||
return columnName.match(wellknown.regex);
|
||||
});
|
||||
async function suggestByName(
|
||||
columnName: string,
|
||||
sampleValues?: Array<string | number>
|
||||
): Promise<EMSTermJoinConfig[]> {
|
||||
const fileLayers = await getEmsFileLayers();
|
||||
|
||||
return matches.map((m) => {
|
||||
return m.emsConfig;
|
||||
});
|
||||
}
|
||||
|
||||
function suggestByValues(values: Array<string | number>): EMSTermJoinConfig[] {
|
||||
const matches = wellKnownColumnFormats.filter((wellknown) => {
|
||||
for (let i = 0; i < values.length; i++) {
|
||||
const value = values[i].toString();
|
||||
if (!value.match(wellknown.regex)) {
|
||||
return false;
|
||||
const matches: EMSTermJoinConfig[] = [];
|
||||
fileLayers.forEach((fileLayer) => {
|
||||
const emsFields: FileLayerFieldShim[] = fileLayer.getFields();
|
||||
emsFields.forEach((emsField: FileLayerFieldShim) => {
|
||||
if (!emsField.alias || !emsField.alias.length) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
const emsConfig = {
|
||||
layerId: fileLayer.getId(),
|
||||
field: emsField.id,
|
||||
};
|
||||
emsField.alias.forEach((alias: string) => {
|
||||
const regex = new RegExp(alias, 'i');
|
||||
const nameMatchesAlias = !!columnName.match(regex);
|
||||
// Check if this violates any known id-values.
|
||||
|
||||
let isMatch: boolean;
|
||||
if (sampleValues) {
|
||||
if (emsField.values && emsField.values.length) {
|
||||
isMatch = nameMatchesAlias && allSamplesMatch(sampleValues, emsField.values);
|
||||
} else {
|
||||
// requires validation against sample-values but EMS provides no meta to do so.
|
||||
isMatch = false;
|
||||
}
|
||||
} else {
|
||||
isMatch = nameMatchesAlias;
|
||||
}
|
||||
|
||||
if (isMatch) {
|
||||
matches.push(emsConfig);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return matches.map((m) => {
|
||||
return m.emsConfig;
|
||||
});
|
||||
return matches;
|
||||
}
|
||||
|
||||
function existsInEMS(emsJson: any, emsFieldId: string, sampleValue: string): boolean {
|
||||
for (let i = 0; i < emsJson.features.length; i++) {
|
||||
const emsFieldValue = emsJson.features[i].properties[emsFieldId].toString();
|
||||
if (emsFieldValue.toString() === sampleValue) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function matchesEmsField(emsJson: any, emsFieldId: string, sampleValues: Array<string | number>) {
|
||||
function allSamplesMatch(sampleValues: Array<string | number>, ids: string[]) {
|
||||
for (let j = 0; j < sampleValues.length; j++) {
|
||||
const sampleValue = sampleValues[j].toString();
|
||||
if (!existsInEMS(emsJson, emsFieldId, sampleValue)) {
|
||||
if (!ids.includes(sampleValue)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function getMatchesForEMSLayer(
|
||||
emsLayerId: string,
|
||||
async function suggestByIdValues(
|
||||
sampleValues: Array<string | number>
|
||||
): Promise<EMSTermJoinConfig[]> {
|
||||
const matches: EMSTermJoinConfig[] = [];
|
||||
const fileLayers: FileLayer[] = await getEmsFileLayers();
|
||||
const emsFileLayer: FileLayer | undefined = fileLayers.find((fl: FileLayer) =>
|
||||
fl.hasId(emsLayerId)
|
||||
);
|
||||
|
||||
if (!emsFileLayer) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const emsFields = emsFileLayer.getFields();
|
||||
|
||||
try {
|
||||
const emsJson = await emsFileLayer.getGeoJson();
|
||||
const matches: EMSTermJoinConfig[] = [];
|
||||
for (let f = 0; f < emsFields.length; f++) {
|
||||
if (matchesEmsField(emsJson, emsFields[f].id, sampleValues)) {
|
||||
matches.push({
|
||||
layerId: emsLayerId,
|
||||
field: emsFields[f].id,
|
||||
});
|
||||
fileLayers.forEach((fileLayer) => {
|
||||
const emsFields: FileLayerFieldShim[] = fileLayer.getFields();
|
||||
emsFields.forEach((emsField: FileLayerFieldShim) => {
|
||||
if (!emsField.values || !emsField.values.length) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
return matches;
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function suggestByEMSLayerIds(
|
||||
emsLayerIds: string[],
|
||||
values: Array<string | number>
|
||||
): Promise<EMSTermJoinConfig[]> {
|
||||
const matches = [];
|
||||
for (const emsLayerId of emsLayerIds) {
|
||||
const layerIdMathes = await getMatchesForEMSLayer(emsLayerId, values);
|
||||
matches.push(...layerIdMathes);
|
||||
}
|
||||
const emsConfig = {
|
||||
layerId: fileLayer.getId(),
|
||||
field: emsField.id,
|
||||
};
|
||||
if (allSamplesMatch(sampleValues, emsField.values)) {
|
||||
matches.push(emsConfig);
|
||||
}
|
||||
});
|
||||
});
|
||||
return matches;
|
||||
}
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
export const COMMON_EMS_LAYER_IDS = [
|
||||
'world_countries',
|
||||
'administrative_regions_lvl2',
|
||||
'usa_zip_codes',
|
||||
'usa_states',
|
||||
];
|
|
@ -28,7 +28,6 @@ import { isDefined } from '../../../common/types/guards';
|
|||
import { MlEmbeddedMapComponent } from '../components/ml_embedded_map';
|
||||
import { EMSTermJoinConfig } from '../../../../maps/public';
|
||||
import { AnomaliesTableRecord } from '../../../common/types/anomalies';
|
||||
import { COMMON_EMS_LAYER_IDS } from '../../../common/constants/embeddable_map';
|
||||
|
||||
const MAX_ENTITY_VALUES = 3;
|
||||
|
||||
|
@ -177,7 +176,6 @@ export const AnomaliesMap: FC<Props> = ({ anomalies, jobIds }) => {
|
|||
}
|
||||
|
||||
const suggestion: EMSTermJoinConfig | null = await mapsPlugin.suggestEMSTermJoinConfig({
|
||||
emsLayerIds: COMMON_EMS_LAYER_IDS,
|
||||
sampleValues: Array.from(entityValues),
|
||||
sampleValuesColumnName: entityName || '',
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue