Mercurial > code > home > repos > victoriametrics
diff alert_rules.py @ 34:3b91d52b007d
rules tuning
author | drewp@bigasterisk.com |
---|---|
date | Mon, 09 Oct 2023 18:50:36 -0700 |
parents | eb1de82c93aa |
children | 2bc188c4117a |
line wrap: on
line diff
--- a/alert_rules.py Fri Sep 01 23:19:56 2023 -0700 +++ b/alert_rules.py Mon Oct 09 18:50:36 2023 -0700 @@ -16,14 +16,14 @@ # from https://awesome-prometheus-alerts.grep.to/rules.html return [ { - "alert": "PrometheusTargetMissing", - "expr": "up == 0", + "alert": "metricsTargetMissing", + "expr": 'up{job!~"cm-acme-.*"} == 0', "labels": { "severity": "critical" }, "annotations": { - "summary": "Prometheus target missing (instance {{ $labels.instance }})", - "description": "A Prometheus target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}", + "summary": "metrics target missing (instance {{ $labels.instance }})", + "description": "A metrics target has disappeared. An exporter might be crashed.\n VALUE = {{ $value }}", }, }, { @@ -100,7 +100,11 @@ }, { "alert": "container_waiting", - "expr": "sum by (container)(kube_pod_container_status_waiting!=0)", + "expr": "sum by (namespace, pod, container)(kube_pod_container_status_waiting!=0)", + "annotations": { + "description": '', + "dashboard": "https://bigasterisk.com/k/clusters/local/namespaces/{{ $labels.namespace }}/pods/{{ $labels.pod }}", + }, "for": "2m", }, ] @@ -177,6 +181,22 @@ }, { "name": + "front_door", + "interval": + "5m", + "rules": [ + { + "alert": "service_disconnected_from_mqtt", + "expr": "mqtt_connected < 1" + }, + { + "alert": "esp_not_connected_to_mqtt", + "expr": "hw_connected < 1", + }, + ] + }, + { + "name": "alerts", "rules": [ { @@ -204,7 +224,7 @@ "labels": { "severity": "warning" }, - "expr": 'disk_free{path="/"} < 20G', + "expr": 'disk_free{host!="garage",path="/"} < 20G', }, { "alert": "zpool_space_low",