changeset 41:407ee7fbda13

rm double metrics; add alert for too-many-500s
author drewp@bigasterisk.com
date Tue, 23 Jan 2024 14:56:33 -0800
parents 24daba56722c
children 2f87ecd2a754
files alert_rules.py config/scrape_main.yaml
diffstat 2 files changed, 15 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/alert_rules.py	Tue Jan 09 19:56:47 2024 -0800
+++ b/alert_rules.py	Tue Jan 23 14:56:33 2024 -0800
@@ -239,12 +239,17 @@
                 ],
             },
             {
-                "name": "https_certs",
+                "name": "http",
                 "interval": "1h",
-                'rules': [{
-                    'alert': 'old_https_certs',
-                    'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15',
-                }],
+                'rules': [
+                    {
+                        'alert': 'old_https_certs',
+                        'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15',
+                    }, {
+                        'alert': 'high_500_response_rate',
+                        'expr': 'avg_over_time(rate(sum by (envoy_cluster_name) (envoy_cluster_internal_upstream_rq_xx{envoy_response_code_class="5"})[20m])) > 0.02',
+                    },
+                ],
             },
             {
                 "name":
--- a/config/scrape_main.yaml	Tue Jan 09 19:56:47 2024 -0800
+++ b/config/scrape_main.yaml	Tue Jan 23 14:56:33 2024 -0800
@@ -112,6 +112,11 @@
       - {if: '{job="pomerium-proxy",    __port_number="8443"}',  action: drop}
       - {if: '{job="video-files",       __port_number="8003"}',  action: drop}
       - {if: '{job=~"cm-acme-.*"}',                              action: drop}
+      
+      # already have this with a job="pomerium-proxy"
+      - {if: '{job="pomerium-metrics"}',                         action: drop}
+
+
 
       # Assume all 8001/8002 port discoveries are redundant with an nginx proxy
       - {if: '{__port_number="8001"}', action: drop}