[ML] Improve anomalies in Kibana sample data (#36982)

* [ML] Sample ML jobs: Updates to make anomalies clearer

Minor updates to web logs data and ML job
definitions to make anomalous activity more
distinct.

* [ML] Sample ML jobs: Further updates to jobs and data

Adding missing files from previous commit and
changing job naming (removing weblog_* prefix etc.)

* [ML] Resolving PR review comments.

* [ML] Improve anomalies in Kibana sample data

Minor fixes:
- Rename job group to `kibana_sample_data` throughout
- Fix index pattern link in `sample_data_sets.ts`
- Fix log.json.gz count test
This commit is contained in:
stevedodson 2019-05-24 15:32:06 +02:00 committed by GitHub
parent 673d92ef23
commit 9463fd8778
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 57 additions and 125 deletions

View file

@ -35,8 +35,8 @@ test('load log data', async () => {
myDocsCount += docs.length;
};
const count = await loadData('./src/legacy/server/sample_data/data_sets/logs/logs.json.gz', bulkInsertMock);
expect(myDocsCount).toBe(14005);
expect(count).toBe(14005);
expect(myDocsCount).toBe(14074);
expect(count).toBe(14074);
});
test('load ecommerce data', async () => {

View file

@ -18,16 +18,9 @@ export function addLinksToSampleDatasets(server: any) {
icon: 'machineLearningApp',
});
server.addAppLinksToSampleDataset('flights', {
path:
'/app/ml#/modules/check_view_or_create?id=sample_data_flights&index=d3d7af60-4c81-11e8-b3d7-01146121b73d',
label: sampleDataLinkLabel,
icon: 'machineLearningApp',
});
server.addAppLinksToSampleDataset('logs', {
path:
'/app/ml#/modules/check_view_or_create?id=sample_data_weblogs&index=edf84fe0-e1a0-11e7-b6d5-4dc382ef7f5b',
'/app/ml#/modules/check_view_or_create?id=sample_data_weblogs&index=90943e30-9a47-11e8-b64d-95841ca0b247',
label: sampleDataLinkLabel,
icon: 'machineLearningApp',
});

View file

@ -20,7 +20,6 @@ describe('ML - data recognizer', () => {
'metricbeat_system_ecs',
'nginx_ecs',
'sample_data_ecommerce',
'sample_data_flights',
'sample_data_weblogs',
];

View file

@ -1,6 +1,6 @@
{
"groups": ["kibana_sample_data"],
"description": "Kibana sample eCommerce data: detect anomalies in total sales",
"groups": ["kibana_sample_data", "kibana_sample_ecommerce"],
"description": "Find customers spending an unusually high amount in an hour",
"analysis_config": {
"bucket_span": "1h",
"detectors": [

View file

@ -1,27 +0,0 @@
{
"id": "sample_data_flights",
"title": "Kibana sample data flights",
"description": "Find anomalies in mean flight delay",
"type": "Sample Dataset",
"logoFile": "logo.json",
"defaultIndexPattern": "kibana_sample_data_flights",
"query": {
"bool": {
"filter": [{ "term": { "_index": "kibana_sample_data_flights" } }]
}
},
"jobs": [
{
"id": "mean_delay_by_carrier",
"file": "mean_delay_by_carrier.json"
}
],
"datafeeds": [
{
"id": "datafeed-mean_delay_by_carrier",
"file": "datafeed_mean_delay_by_carrier.json",
"job_id": "mean_delay_by_carrier"
}
],
"kibana": {}
}

View file

@ -1,9 +0,0 @@
{
"job_id": "JOB_ID",
"indexes": ["INDEX_PATTERN_NAME"],
"query": {
"bool": {
"filter": [{ "term": { "_index": "kibana_sample_data_flights" } }]
}
}
}

View file

@ -1,38 +0,0 @@
{
"groups": ["kibana_sample_data"],
"description": "Kibana sample flights data: detect anomalies in mean flight delay",
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{
"detector_description": "Mean flight delay time by airline carrier",
"function": "high_mean",
"field_name": "FlightDelayMin",
"partition_field_name": "Carrier"
}
],
"influencers": ["Carrier", "DestWeather", "OriginWeather"]
},
"analysis_limits": {
"model_memory_limit": "10mb"
},
"data_description": {
"time_field": "timestamp"
},
"model_plot_config": {
"enabled": true
},
"custom_settings": {
"created_by": "ml-module-sample",
"custom_urls": [
{
"url_name": "Raw data",
"url_value": "kibana#/discover?_g=(time:(from:\u0027$earliest$\u0027,mode:absolute,to:\u0027$latest$\u0027))&_a=(index:d3d7af60-4c81-11e8-b3d7-01146121b73d,query:(language:kuery,query:\u0027Carrier:\u0022$Carrier$\u0022\u0027),sort:!('@timestamp',desc))"
},
{
"url_name": "Data dashboard",
"url_value": "kibana#/dashboard/7adfa750-4c81-11e8-b3d7-01146121b73d?_g=(filters:!(),time:(from:\u0027$earliest$\u0027,mode:absolute,to:\u0027$latest$\u0027))&_a=(filters:!((\u0027$state\u0027:(store:appState),meta:(alias:!n,disabled:!f,index:\u0027INDEX_PATTERN_ID\u0027,key:Carrier,negate:!f,params:(query:\u0027$Carrier$\u0027),type:phrase,value:\u0027$Carrier$\u0027),query:(match:(Carrier:(query:\u0027$Carrier$\u0027,type:phrase))))),query:(language:kuery,query:\u0027\u0027))"
}
]
}
}

View file

@ -12,33 +12,33 @@
},
"jobs": [
{
"id": "low_count",
"file": "low_count.json"
"id": "low_request_rate",
"file": "low_request_rate.json"
},
{
"id": "count_by_response_code",
"file": "count_by_response_code.json"
"id": "response_code_rates",
"file": "response_code_rates.json"
},
{
"id": "distinct_count_ip",
"file": "distinct_count_ip.json"
"id": "url_scanning",
"file": "url_scanning.json"
}
],
"datafeeds": [
{
"id": "datafeed-low_count",
"file": "datafeed_low_count.json",
"job_id": "low_count"
"id": "datafeed-low_request_rate",
"file": "datafeed_low_request_rate.json",
"job_id": "low_request_rate"
},
{
"id": "datafeed-count_by_response_code",
"file": "datafeed_count_by_response_code.json",
"job_id": "count_by_response_code"
"id": "datafeed-response_code_rates",
"file": "datafeed_response_code_rates.json",
"job_id": "response_code_rates"
},
{
"id": "datafeed-distinct_count_ip",
"file": "datafeed_distinct_count_ip.json",
"job_id": "distinct_count_ip"
"id": "datafeed-url_scanning",
"file": "datafeed_url_scanning.json",
"job_id": "url_scanning"
}
],
"kibana": {}

View file

@ -1,9 +0,0 @@
{
"job_id": "JOB_ID",
"indexes": ["INDEX_PATTERN_NAME"],
"query": {
"bool": {
"filter": [{ "term": { "_index": "kibana_sample_data_logs" } }]
}
}
}

View file

@ -0,0 +1,24 @@
{
"job_id": "JOB_ID",
"indexes": ["INDEX_PATTERN_NAME"],
"query": {
"bool": {
"filter": [{ "term": { "_index": "kibana_sample_data_logs" } }]
}
},
"aggregations": {
"buckets": {
"date_histogram": {
"field": "timestamp",
"interval": 3600000
},
"aggregations": {
"timestamp": {
"max": {
"field": "timestamp"
}
}
}
}
}
}

View file

@ -1,11 +1,12 @@
{
"groups": ["kibana_sample_logs", "kibana_sample_data"],
"description": "Kibana sample web logs data: find anomalies in the visitor count",
"groups": ["kibana_sample_data", "kibana_sample_web_logs"],
"description": "Find unusually low request rates",
"analysis_config": {
"bucket_span": "1h",
"summary_count_field_name": "doc_count",
"detectors": [
{
"detector_description": "Low visitor count",
"detector_description": "Low request rates",
"function": "low_count"
}
],

View file

@ -1,11 +1,11 @@
{
"groups": ["kibana_sample_logs", "kibana_sample_data"],
"description": "Kibana sample web logs data: find anomalies in the event count by HTTP response code ",
"groups": ["kibana_sample_data", "kibana_sample_web_logs"],
"description": "Find unusual event rates by HTTP response code (high and low)",
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{
"detector_description": "Event count by response code",
"detector_description": "Event rate by response code",
"function": "count",
"partition_field_name": "response.keyword"
}

View file

@ -1,13 +1,14 @@
{
"groups": ["kibana_sample_logs", "kibana_sample_data"],
"description": "Kibana sample web logs data: find anomalies in the distinct count of client IPs",
"groups": ["kibana_sample_data", "kibana_sample_web_logs"],
"description": "Find client IPs accessing an unusually high distinct count of URLs",
"analysis_config": {
"bucket_span": "1h",
"detectors": [
{
"detector_description": "Count of distinct client IPs",
"function": "distinct_count",
"field_name": "clientip"
"detector_description": "High distinct count of URLs for a client IPs",
"function": "high_distinct_count",
"field_name": "url.keyword",
"over_field_name": "clientip"
}
],
"influencers": ["clientip"]