[ML] Fix counters and percentages for array fields on the Data visualizer page (#55209) (#55518)

* [ML] update data visualizer endpoint to check doc counts

* [ML] fix mock for cardinality tests

* [ML] use actual field name for agg filtering instead of safeFieldName

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Dima Arnautov 2020-01-22 08:44:59 +01:00 committed by GitHub
parent 97e6ae502b
commit 14cd8e21f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 11 deletions

View file

@ -261,7 +261,7 @@ export class DataVisualizer {
aggregatableFields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field, i);
aggs[`${safeFieldName}_count`] = {
value_count: { field },
filter: { exists: { field } },
};
aggs[`${safeFieldName}_cardinality`] = {
cardinality: { field },
@ -296,7 +296,7 @@ export class DataVisualizer {
samplerShardSize > 0 ? _.get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
aggregatableFields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field, i);
const count = _.get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'value'], 0);
const count = _.get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
if (count > 0) {
const cardinality = _.get(
aggregations,
@ -433,7 +433,12 @@ export class DataVisualizer {
fields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
aggs[`${safeFieldName}_field_stats`] = {
stats: { field: field.fieldName },
filter: { exists: { field: field.fieldName } },
aggs: {
actual_stats: {
stats: { field: field.fieldName },
},
},
};
aggs[`${safeFieldName}_percentiles`] = {
percentiles: {
@ -484,10 +489,19 @@ export class DataVisualizer {
const batchStats = [];
fields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
const fieldStatsResp = _.get(aggregations, [...aggsPath, `${safeFieldName}_field_stats`], {});
const docCount = _.get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
0
);
const fieldStatsResp = _.get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
{}
);
const stats = {
fieldName: field.fieldName,
count: _.get(fieldStatsResp, 'count', 0),
count: docCount,
min: _.get(fieldStatsResp, 'min', 0),
max: _.get(fieldStatsResp, 'max', 0),
avg: _.get(fieldStatsResp, 'avg', 0),
@ -632,7 +646,12 @@ export class DataVisualizer {
fields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
aggs[`${safeFieldName}_field_stats`] = {
stats: { field: field.fieldName },
filter: { exists: { field: field.fieldName } },
aggs: {
actual_stats: {
stats: { field: field.fieldName },
},
},
};
});
@ -651,10 +670,19 @@ export class DataVisualizer {
const batchStats = [];
fields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
const fieldStatsResp = _.get(aggregations, [...aggsPath, `${safeFieldName}_field_stats`], {});
const docCount = _.get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
0
);
const fieldStatsResp = _.get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
{}
);
batchStats.push({
fieldName: field.fieldName,
count: _.get(fieldStatsResp, 'count', 0),
count: docCount,
earliest: _.get(fieldStatsResp, 'min', 0),
latest: _.get(fieldStatsResp, 'max', 0),
});
@ -680,7 +708,7 @@ export class DataVisualizer {
fields.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
aggs[`${safeFieldName}_value_count`] = {
value_count: { field: field.fieldName },
filter: { exists: { field: field.fieldName } },
};
aggs[`${safeFieldName}_values`] = {
terms: {
@ -707,7 +735,7 @@ export class DataVisualizer {
const safeFieldName = getSafeAggregationName(field.fieldName, i);
const stats = {
fieldName: field.fieldName,
count: _.get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'value'], 0),
count: _.get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
trueCount: 0,
falseCount: 0,
};

View file

@ -1 +1,7 @@
{"took":0,"timed_out":false,"_shards":{"total":1,"successful":1,"skipped":0,"failed":0},"hits":{"total":86274,"max_score":0,"hits":[]},"aggregations":{"airline_cardinality":{"value":19},"airline_count":{"value":86274}}}
{
"took": 0,
"timed_out": false,
"_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 },
"hits": { "total": 86274, "max_score": 0, "hits": [] },
"aggregations": { "airline_cardinality": { "value": 19 }, "airline_count": { "doc_count": 86274 } }
}