From 7494e25bf1c4787060e2d49f774e4b6504471963 Mon Sep 17 00:00:00 2001 From: igoristic Date: Tue, 30 Mar 2021 15:04:12 -0400 Subject: [PATCH] [7.x] [DOC] Updated documentation for newly added monitoring alerts (#91272) (#95725) * [DOC] Updated documentation for newly added monitoring alerts (#91272) * Documentation for recently added alerts * [DOCS] Fixes broken link * Addressed review feedback Co-authored-by: lcawl Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> # Conflicts: # docs/user/monitoring/cluster-alerts.asciidoc * [DOC] Removes broken link Co-authored-by: lcawl --- docs/management/advanced-options.asciidoc | 3 +- docs/settings/monitoring-settings.asciidoc | 5 -- docs/user/monitoring/cluster-alerts.asciidoc | 64 ---------------- docs/user/monitoring/index.asciidoc | 1 - docs/user/monitoring/kibana-alerts.asciidoc | 73 ++++++++++++++++++- .../public/doc_links/doc_links_service.ts | 5 +- .../ccr_read_exceptions_alert/index.tsx | 2 +- .../alerts/large_shard_size_alert/index.tsx | 2 +- .../alerts/legacy_alert/legacy_alert.tsx | 2 +- .../thread_pool_rejections_alert/index.tsx | 2 +- 10 files changed, 79 insertions(+), 80 deletions(-) delete mode 100644 docs/user/monitoring/cluster-alerts.asciidoc diff --git a/docs/management/advanced-options.asciidoc b/docs/management/advanced-options.asciidoc index f96ca9060497..e9b1b8a2e683 100644 --- a/docs/management/advanced-options.asciidoc +++ b/docs/management/advanced-options.asciidoc @@ -196,7 +196,8 @@ The maximum height that a cell occupies in a table. Set to 0 to disable truncation. [[xpack-defaultadminemail]]`xPack:defaultAdminEmail`:: -**Deprecated. Use <> instead.** +**Deprecated. Instead, use {stack-manage-app} > {alerts-ui} to review and +modify all the available alerts.** Email address for {xpack} admin operations, such as cluster alert notifications from *{stack-monitor-app}*. diff --git a/docs/settings/monitoring-settings.asciidoc b/docs/settings/monitoring-settings.asciidoc index f48dbeab9d61..6483442248ce 100644 --- a/docs/settings/monitoring-settings.asciidoc +++ b/docs/settings/monitoring-settings.asciidoc @@ -37,11 +37,6 @@ For more information, see monitoring back-end does not run and {kib} stats are not sent to the monitoring cluster. -a|`monitoring.cluster_alerts.` -`email_notifications.email_address` {ess-icon} - | Specifies the email address where you want to receive cluster alerts. - See <> for details. - | `monitoring.ui.elasticsearch.hosts` | Specifies the location of the {es} cluster where your monitoring data is stored. By default, this is the same as <>. This setting enables diff --git a/docs/user/monitoring/cluster-alerts.asciidoc b/docs/user/monitoring/cluster-alerts.asciidoc deleted file mode 100644 index 98d14de226bd..000000000000 --- a/docs/user/monitoring/cluster-alerts.asciidoc +++ /dev/null @@ -1,64 +0,0 @@ -[role="xpack"] -[[cluster-alerts]] -= Cluster Alerts - -The *Stack Monitoring > Clusters* page in {kib} summarizes the status of your -{stack}. You can drill down into the metrics to view more information about your -cluster and specific nodes, instances, and indices. - -The Top Cluster Alerts shown on the Clusters page notify you of -conditions that require your attention: - -* {es} Cluster Health Status is Yellow (missing at least one replica) -or Red (missing at least one primary). -* {es} Version Mismatch. You have {es} nodes with -different versions in the same cluster. -* {kib} Version Mismatch. You have {kib} instances with different -versions running against the same {es} cluster. -* Logstash Version Mismatch. You have Logstash nodes with different -versions reporting stats to the same monitoring cluster. -* {es} Nodes Changed. You have {es} nodes that were recently added or removed. -* {es} License Expiration. The cluster's license is about to expire. -+ --- -If you do not preserve the data directory when upgrading a {kib} or -Logstash node, the instance is assigned a new persistent UUID and shows up -as a new instance --- -* {xpack} License Expiration. When the {xpack} license expiration date -approaches, you will get notifications with a severity level relative to how -soon the expiration date is: - ** 60 days: Informational alert - ** 30 days: Low-level alert - ** 15 days: Medium-level alert - ** 7 days: Severe-level alert -+ -The 60-day and 30-day thresholds are skipped for Trial licenses, which are only -valid for 30 days. - -The {monitor-features} check the cluster alert conditions every minute. Cluster -alerts are automatically dismissed when the condition is resolved. - -NOTE: {watcher} must be enabled to view cluster alerts. If you have a Basic -license, Top Cluster Alerts are not displayed. - -[float] -[[cluster-alert-email-notifications]] -== Email Notifications -To receive email notifications for the Cluster Alerts: - -1. Configure an email account as described in -{ref}/actions-email.html#configuring-email[Configuring email accounts]. -2. Configure the -`monitoring.cluster_alerts.email_notifications.email_address` setting in -`kibana.yml` with your email address. -+ --- -TIP: If you have separate production and monitoring clusters and separate {kib} -instances for those clusters, you must put the -`monitoring.cluster_alerts.email_notifications.email_address` setting in -the {kib} instance that is associated with the production cluster. - --- - -Email notifications are sent only when Cluster Alerts are triggered and resolved. diff --git a/docs/user/monitoring/index.asciidoc b/docs/user/monitoring/index.asciidoc index 514988792d21..e4fd4a8cd085 100644 --- a/docs/user/monitoring/index.asciidoc +++ b/docs/user/monitoring/index.asciidoc @@ -1,6 +1,5 @@ include::xpack-monitoring.asciidoc[] include::beats-details.asciidoc[leveloffset=+1] -include::cluster-alerts.asciidoc[leveloffset=+1] include::elasticsearch-details.asciidoc[leveloffset=+1] include::kibana-alerts.asciidoc[leveloffset=+1] include::kibana-details.asciidoc[leveloffset=+1] diff --git a/docs/user/monitoring/kibana-alerts.asciidoc b/docs/user/monitoring/kibana-alerts.asciidoc index 300497126c3e..04f4e986ca28 100644 --- a/docs/user/monitoring/kibana-alerts.asciidoc +++ b/docs/user/monitoring/kibana-alerts.asciidoc @@ -29,7 +29,7 @@ To review and modify all the available alerts, use This alert is triggered when a node runs a consistently high CPU load. By default, the trigger condition is set at 85% or more averaged over the last 5 minutes. The alert is grouped across all the nodes of the cluster by running -checks on a schedule time of 1 minute with a re-notify internal of 1 day. +checks on a schedule time of 1 minute with a re-notify interval of 1 day. [discrete] [[kibana-alerts-disk-usage-threshold]] @@ -38,7 +38,7 @@ checks on a schedule time of 1 minute with a re-notify internal of 1 day. This alert is triggered when a node is nearly at disk capacity. By default, the trigger condition is set at 80% or more averaged over the last 5 minutes. The alert is grouped across all the nodes of the cluster by running -checks on a schedule time of 1 minute with a re-notify internal of 1 day. +checks on a schedule time of 1 minute with a re-notify interval of 1 day. [discrete] [[kibana-alerts-jvm-memory-threshold]] @@ -47,7 +47,7 @@ checks on a schedule time of 1 minute with a re-notify internal of 1 day. This alert is triggered when a node runs a consistently high JVM memory usage. By default, the trigger condition is set at 85% or more averaged over the last 5 minutes. The alert is grouped across all the nodes of the cluster by running -checks on a schedule time of 1 minute with a re-notify internal of 1 day. +checks on a schedule time of 1 minute with a re-notify interval of 1 day. [discrete] [[kibana-alerts-missing-monitoring-data]] @@ -56,7 +56,72 @@ checks on a schedule time of 1 minute with a re-notify internal of 1 day. This alert is triggered when any stack products nodes or instances stop sending monitoring data. By default, the trigger condition is set to missing for 15 minutes looking back 1 day. The alert is grouped across all the nodes of the cluster by running -checks on a schedule time of 1 minute with a re-notify internal of 6 hours. +checks on a schedule time of 1 minute with a re-notify interval of 6 hours. + +[discrete] +[[kibana-alerts-thread-pool-rejections]] +== Thread pool rejections (search/write) + +This alert is triggered when a node experiences thread pool rejections. By +default, the trigger condition is set at 300 or more over the last 5 +minutes. The alert is grouped across all the nodes of the cluster by running +checks on a schedule time of 1 minute with a re-notify interval of 1 day. +Thresholds can be set independently for `search` and `write` type rejections. + +[discrete] +[[kibana-alerts-ccr-read-exceptions]] +== CCR read exceptions + +This alert is triggered if a read exception has been detected on any of the +replicated clusters. The trigger condition is met if 1 or more read exceptions +are detected in the last hour. The alert is grouped across all replicated clusters +by running checks on a schedule time of 1 minute with a re-notify interval of 6 hours. + +[discrete] +[[kibana-alerts-large-shard-size]] +== Large shard size + +This alert is triggered if a large (primary) shard size is found on any of the +specified index patterns. The trigger condition is met if an index's shard size is +55gb or higher in the last 5 minutes. The alert is grouped across all indices that match +the default patter of `*` by running checks on a schedule time of 1 minute with a re-notify +interval of 12 hours. + +[discrete] +[[kibana-alerts-cluster-alerts]] +== Cluster alerts + +These alerts summarize the current status of your {stack}. You can drill down into the metrics +to view more information about your cluster and specific nodes, instances, and indices. + +An alert will be triggered if any of the following conditions are met within the last minute: + +* {es} cluster health status is yellow (missing at least one replica) +or red (missing at least one primary). +* {es} version mismatch. You have {es} nodes with +different versions in the same cluster. +* {kib} version mismatch. You have {kib} instances with different +versions running against the same {es} cluster. +* Logstash version mismatch. You have Logstash nodes with different +versions reporting stats to the same monitoring cluster. +* {es} nodes changed. You have {es} nodes that were recently added or removed. +* {es} license expiration. The cluster's license is about to expire. ++ +-- +If you do not preserve the data directory when upgrading a {kib} or +Logstash node, the instance is assigned a new persistent UUID and shows up +as a new instance +-- +* Subscription license expiration. When the expiration date +approaches, you will get notifications with a severity level relative to how +soon the expiration date is: + ** 60 days: Informational alert + ** 30 days: Low-level alert + ** 15 days: Medium-level alert + ** 7 days: Severe-level alert ++ +The 60-day and 30-day thresholds are skipped for Trial licenses, which are only +valid for 30 days. NOTE: Some action types are subscription features, while others are free. For a comparison of the Elastic subscription levels, see the alerting section of diff --git a/src/core/public/doc_links/doc_links_service.ts b/src/core/public/doc_links/doc_links_service.ts index 862a075d55dd..f59d131de189 100644 --- a/src/core/public/doc_links/doc_links_service.ts +++ b/src/core/public/doc_links/doc_links_service.ts @@ -218,12 +218,15 @@ export class DocLinksService { guide: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/maps.html`, }, monitoring: { - alertsCluster: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/cluster-alerts.html`, alertsKibana: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html`, alertsKibanaCpuThreshold: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-cpu-threshold`, alertsKibanaDiskThreshold: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-disk-usage-threshold`, alertsKibanaJvmThreshold: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-jvm-memory-threshold`, alertsKibanaMissingData: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-missing-monitoring-data`, + alertsKibanaThreadpoolRejections: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-thread-pool-rejections`, + alertsKibanaCCRReadExceptions: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-ccr-read-exceptions`, + alertsKibanaLargeShardSize: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-large-shard-size`, + alertsKibanaClusterAlerts: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/kibana-alerts.html#kibana-alerts-cluster-alerts`, metricbeatBlog: `${ELASTIC_WEBSITE_URL}blog/external-collection-for-elastic-stack-monitoring-is-now-available-via-metricbeat`, monitorElasticsearch: `${ELASTICSEARCH_DOCS}configuring-metricbeat.html`, monitorKibana: `${ELASTIC_WEBSITE_URL}guide/en/kibana/${DOC_LINK_VERSION}/monitoring-metricbeat.html`, diff --git a/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx b/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx index 16d58c865538..b660bdcfdce8 100644 --- a/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx +++ b/x-pack/plugins/monitoring/public/alerts/ccr_read_exceptions_alert/index.tsx @@ -42,7 +42,7 @@ export function createCCRReadExceptionsAlertType(): AlertTypeModel ( diff --git a/x-pack/plugins/monitoring/public/alerts/large_shard_size_alert/index.tsx b/x-pack/plugins/monitoring/public/alerts/large_shard_size_alert/index.tsx index 01b812568ff5..89139f12dacc 100644 --- a/x-pack/plugins/monitoring/public/alerts/large_shard_size_alert/index.tsx +++ b/x-pack/plugins/monitoring/public/alerts/large_shard_size_alert/index.tsx @@ -42,7 +42,7 @@ export function createLargeShardSizeAlertType(): AlertTypeModel description: ALERT_DETAILS[ALERT_LARGE_SHARD_SIZE].description, iconClass: 'bell', documentationUrl(docLinks) { - return `${docLinks.links.monitoring.alertsKibana}`; + return `${docLinks.links.monitoring.alertsKibanaLargeShardSize}`; }, alertParamsExpression: (props: Props) => ( diff --git a/x-pack/plugins/monitoring/public/alerts/legacy_alert/legacy_alert.tsx b/x-pack/plugins/monitoring/public/alerts/legacy_alert/legacy_alert.tsx index 87850f893797..82bdf2786f89 100644 --- a/x-pack/plugins/monitoring/public/alerts/legacy_alert/legacy_alert.tsx +++ b/x-pack/plugins/monitoring/public/alerts/legacy_alert/legacy_alert.tsx @@ -23,7 +23,7 @@ export function createLegacyAlertTypes(): AlertTypeModel[] { description: LEGACY_ALERT_DETAILS[legacyAlert].description, iconClass: 'bell', documentationUrl(docLinks) { - return `${docLinks.links.monitoring.alertsCluster}`; + return `${docLinks.links.monitoring.alertsKibanaClusterAlerts}`; }, alertParamsExpression: () => ( diff --git a/x-pack/plugins/monitoring/public/alerts/thread_pool_rejections_alert/index.tsx b/x-pack/plugins/monitoring/public/alerts/thread_pool_rejections_alert/index.tsx index 00f560b60257..f3e697bd270e 100644 --- a/x-pack/plugins/monitoring/public/alerts/thread_pool_rejections_alert/index.tsx +++ b/x-pack/plugins/monitoring/public/alerts/thread_pool_rejections_alert/index.tsx @@ -33,7 +33,7 @@ export function createThreadPoolRejectionsAlertType( description: threadPoolAlertDetails.description, iconClass: 'bell', documentationUrl(docLinks) { - return `${docLinks.links.monitoring.alertsKibana}`; + return `${docLinks.links.monitoring.alertsKibanaThreadpoolRejections}`; }, alertParamsExpression: (props: Props) => ( <>