[Data Telemetry] Rename dataset.* to data_stream.* (#75415)

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Alejandro Fernández Haro 2020-08-24 17:32:52 +01:00 committed by GitHub
parent a3d3abd22d
commit 8fe62c33a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 54 deletions

View file

@ -59,16 +59,16 @@ describe('get_data_telemetry', () => {
test('matches some indices and puts them in their own category', () => {
expect(
buildDataTelemetryPayload([
// APM Indices have known shipper (so we can infer the datasetType from mapping constant)
// APM Indices have known shipper (so we can infer the dataStreamType from mapping constant)
{ name: 'apm-7.7.0-error-000001', shipper: 'apm', isECS: true },
{ name: 'apm-7.7.0-metric-000001', shipper: 'apm', isECS: true },
{ name: 'apm-7.7.0-onboarding-2020.05.17', shipper: 'apm', isECS: true },
{ name: 'apm-7.7.0-profile-000001', shipper: 'apm', isECS: true },
{ name: 'apm-7.7.0-span-000001', shipper: 'apm', isECS: true },
{ name: 'apm-7.7.0-transaction-000001', shipper: 'apm', isECS: true },
// Packetbeat indices with known shipper (we can infer datasetType from mapping constant)
// Packetbeat indices with known shipper (we can infer dataStreamType from mapping constant)
{ name: 'packetbeat-7.7.0-2020.06.11-000001', shipper: 'packetbeat', isECS: true },
// Matching patterns from the list => known datasetName but the rest is unknown
// Matching patterns from the list => known dataStreamDataset but the rest is unknown
{ name: 'filebeat-12314', docCount: 100, sizeInBytes: 10 },
{ name: 'metricbeat-1234', docCount: 100, sizeInBytes: 10, isECS: false },
{ name: '.app-search-1234', docCount: 0 },
@ -76,8 +76,8 @@ describe('get_data_telemetry', () => {
// New Indexing strategy: everything can be inferred from the constant_keyword values
{
name: '.ds-logs-nginx.access-default-000001',
datasetName: 'nginx.access',
datasetType: 'logs',
dataStreamDataset: 'nginx.access',
dataStreamType: 'logs',
shipper: 'filebeat',
isECS: true,
docCount: 1000,
@ -85,8 +85,8 @@ describe('get_data_telemetry', () => {
},
{
name: '.ds-logs-nginx.access-default-000002',
datasetName: 'nginx.access',
datasetType: 'logs',
dataStreamDataset: 'nginx.access',
dataStreamType: 'logs',
shipper: 'filebeat',
isECS: true,
docCount: 1000,
@ -94,8 +94,8 @@ describe('get_data_telemetry', () => {
},
{
name: '.ds-traces-something-default-000002',
datasetName: 'something',
datasetType: 'traces',
dataStreamDataset: 'something',
dataStreamType: 'traces',
packageName: 'some-package',
isECS: true,
docCount: 1000,
@ -103,26 +103,26 @@ describe('get_data_telemetry', () => {
},
{
name: '.ds-metrics-something.else-default-000002',
datasetName: 'something.else',
datasetType: 'metrics',
dataStreamDataset: 'something.else',
dataStreamType: 'metrics',
managedBy: 'ingest-manager',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
// Filter out if it has datasetName and datasetType but none of the shipper, packageName or managedBy === 'ingest-manager'
// Filter out if it has dataStreamDataset and dataStreamType but none of the shipper, packageName or managedBy === 'ingest-manager'
{
name: 'some-index-that-should-not-show',
datasetName: 'should-not-show',
datasetType: 'logs',
dataStreamDataset: 'should-not-show',
dataStreamType: 'logs',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: 'other-index-that-should-not-show',
datasetName: 'should-not-show-either',
datasetType: 'metrics',
dataStreamDataset: 'should-not-show-either',
dataStreamType: 'metrics',
managedBy: 'me',
isECS: true,
docCount: 1000,
@ -167,7 +167,7 @@ describe('get_data_telemetry', () => {
doc_count: 0,
},
{
dataset: { name: 'nginx.access', type: 'logs' },
data_stream: { dataset: 'nginx.access', type: 'logs' },
shipper: 'filebeat',
index_count: 2,
ecs_index_count: 2,
@ -175,7 +175,7 @@ describe('get_data_telemetry', () => {
size_in_bytes: 1060,
},
{
dataset: { name: 'something', type: 'traces' },
data_stream: { dataset: 'something', type: 'traces' },
package: { name: 'some-package' },
index_count: 1,
ecs_index_count: 1,
@ -183,7 +183,7 @@ describe('get_data_telemetry', () => {
size_in_bytes: 60,
},
{
dataset: { name: 'something.else', type: 'metrics' },
data_stream: { dataset: 'something.else', type: 'metrics' },
index_count: 1,
ecs_index_count: 1,
doc_count: 1000,
@ -236,7 +236,7 @@ describe('get_data_telemetry', () => {
test('find an index that does not match any index pattern but has mappings metadata', async () => {
const callCluster = mockCallCluster(
['cannot_match_anything'],
{ isECS: true, datasetType: 'traces', shipper: 'my-beat' },
{ isECS: true, dataStreamType: 'traces', shipper: 'my-beat' },
{
indices: {
cannot_match_anything: {
@ -247,7 +247,7 @@ describe('get_data_telemetry', () => {
);
await expect(getDataTelemetry(callCluster)).resolves.toStrictEqual([
{
dataset: { name: undefined, type: 'traces' },
data_stream: { dataset: undefined, type: 'traces' },
shipper: 'my-beat',
index_count: 1,
ecs_index_count: 1,
@ -266,7 +266,7 @@ describe('get_data_telemetry', () => {
function mockCallCluster(
indicesMappings: string[] = [],
{ isECS = false, datasetName = '', datasetType = '', shipper = '' } = {},
{ isECS = false, dataStreamDataset = '', dataStreamType = '', shipper = '' } = {},
indexStats: any = {}
) {
return jest.fn().mockImplementation(async (method: string, opts: any) => {
@ -279,14 +279,14 @@ function mockCallCluster(
...(shipper && { _meta: { beat: shipper } }),
properties: {
...(isECS && { ecs: { properties: { version: { type: 'keyword' } } } }),
...((datasetType || datasetName) && {
dataset: {
...((dataStreamType || dataStreamDataset) && {
data_stream: {
properties: {
...(datasetName && {
name: { type: 'constant_keyword', value: datasetName },
...(dataStreamDataset && {
dataset: { type: 'constant_keyword', value: dataStreamDataset },
}),
...(datasetType && {
type: { type: 'constant_keyword', value: datasetType },
...(dataStreamType && {
type: { type: 'constant_keyword', value: dataStreamType },
}),
},
},

View file

@ -32,9 +32,9 @@ export interface DataTelemetryBasePayload {
}
export interface DataTelemetryDocument extends DataTelemetryBasePayload {
dataset?: {
name?: string;
type?: DataTelemetryType | 'unknown' | string; // The union of types is to help autocompletion with some known `dataset.type`s
data_stream?: {
dataset?: string;
type?: DataTelemetryType | string; // The union of types is to help autocompletion with some known `data_stream.type`s
};
package?: {
name: string;
@ -49,8 +49,8 @@ export interface DataTelemetryIndex {
name: string;
packageName?: string; // Populated by Ingest Manager at `_meta.package.name`
managedBy?: string; // Populated by Ingest Manager at `_meta.managed_by`
datasetName?: string; // To be obtained from `mappings.dataset.name` if it's a constant keyword
datasetType?: string; // To be obtained from `mappings.dataset.type` if it's a constant keyword
dataStreamDataset?: string; // To be obtained from `mappings.data_stream.dataset` if it's a constant keyword
dataStreamType?: string; // To be obtained from `mappings.data_stream.type` if it's a constant keyword
shipper?: string; // To be obtained from `_meta.beat` if it's set
isECS?: boolean; // Optional because it can't be obtained via Monitoring.
@ -64,8 +64,8 @@ type AtLeastOne<T, U = { [K in keyof T]: Pick<T, K> }> = Partial<T> & U[keyof U]
type DataDescriptor = AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
dataStreamDataset: string;
dataStreamType: string;
shipper: string;
patternName: DataPatternName; // When found from the list of the index patterns
}>;
@ -75,24 +75,24 @@ function findMatchingDescriptors({
shipper,
packageName,
managedBy,
datasetName,
datasetType,
dataStreamDataset,
dataStreamType,
}: DataTelemetryIndex): DataDescriptor[] {
// If we already have the data from the indices' mappings...
if (
[shipper, packageName].some(Boolean) ||
(managedBy === 'ingest-manager' && [datasetType, datasetName].some(Boolean))
(managedBy === 'ingest-manager' && [dataStreamType, dataStreamDataset].some(Boolean))
) {
return [
{
...(shipper && { shipper }),
...(packageName && { packageName }),
...(datasetName && { datasetName }),
...(datasetType && { datasetType }),
...(dataStreamDataset && { dataStreamDataset }),
...(dataStreamType && { dataStreamType }),
} as AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
dataStreamDataset: string;
dataStreamType: string;
shipper: string;
}>, // Using casting here because TS doesn't infer at least one exists from the if clause
];
@ -149,15 +149,17 @@ export function buildDataTelemetryPayload(indices: DataTelemetryIndex[]): DataTe
for (const indexCandidate of indexCandidates) {
const matchingDescriptors = findMatchingDescriptors(indexCandidate);
for (const {
datasetName,
datasetType,
dataStreamDataset,
dataStreamType,
packageName,
shipper,
patternName,
} of matchingDescriptors) {
const key = `${datasetName}-${datasetType}-${packageName}-${shipper}-${patternName}`;
const key = `${dataStreamDataset}-${dataStreamType}-${packageName}-${shipper}-${patternName}`;
acc.set(key, {
...((datasetName || datasetType) && { dataset: { name: datasetName, type: datasetType } }),
...((dataStreamDataset || dataStreamType) && {
data_stream: { dataset: dataStreamDataset, type: dataStreamType },
}),
...(packageName && { package: { name: packageName } }),
...(shipper && { shipper }),
...(patternName && { pattern_name: patternName }),
@ -198,9 +200,9 @@ interface IndexMappings {
managed_by?: string; // Typically "ingest-manager"
};
properties: {
dataset?: {
data_stream?: {
properties: {
name?: {
dataset?: {
type: string;
value?: string;
};
@ -242,10 +244,10 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {
// Does it have `ecs.version` in the mappings? => It follows the ECS conventions
'*.mappings.properties.ecs.properties.version.type',
// If `dataset.type` is a `constant_keyword`, it can be reported as a type
'*.mappings.properties.dataset.properties.type.value',
// If `dataset.name` is a `constant_keyword`, it can be reported as the dataset
'*.mappings.properties.dataset.properties.name.value',
// If `data_stream.type` is a `constant_keyword`, it can be reported as a type
'*.mappings.properties.data_stream.properties.type.value',
// If `data_stream.dataset` is a `constant_keyword`, it can be reported as the dataset
'*.mappings.properties.data_stream.properties.dataset.value',
],
}),
// GET <index>/_stats/docs,store?level=indices&filter_path=indices.*.total
@ -265,8 +267,10 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {
shipper: indexMappings[name]?.mappings?._meta?.beat,
packageName: indexMappings[name]?.mappings?._meta?.package?.name,
managedBy: indexMappings[name]?.mappings?._meta?.managed_by,
datasetName: indexMappings[name]?.mappings?.properties.dataset?.properties.name?.value,
datasetType: indexMappings[name]?.mappings?.properties.dataset?.properties.type?.value,
dataStreamDataset:
indexMappings[name]?.mappings?.properties.data_stream?.properties.dataset?.value,
dataStreamType:
indexMappings[name]?.mappings?.properties.data_stream?.properties.type?.value,
};
const stats = (indexStats?.indices || {})[name];