diff alert_rules.py @ 41:407ee7fbda13

rm double metrics; add alert for too-many-500s
author drewp@bigasterisk.com
date Tue, 23 Jan 2024 14:56:33 -0800
parents 24daba56722c
children 2f87ecd2a754
line wrap: on
line diff
--- a/alert_rules.py	Tue Jan 09 19:56:47 2024 -0800
+++ b/alert_rules.py	Tue Jan 23 14:56:33 2024 -0800
@@ -239,12 +239,17 @@
                 ],
             },
             {
-                "name": "https_certs",
+                "name": "http",
                 "interval": "1h",
-                'rules': [{
-                    'alert': 'old_https_certs',
-                    'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15',
-                }],
+                'rules': [
+                    {
+                        'alert': 'old_https_certs',
+                        'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15',
+                    }, {
+                        'alert': 'high_500_response_rate',
+                        'expr': 'avg_over_time(rate(sum by (envoy_cluster_name) (envoy_cluster_internal_upstream_rq_xx{envoy_response_code_class="5"})[20m])) > 0.02',
+                    },
+                ],
             },
             {
                 "name":