Mercurial > code > home > repos > victoriametrics
changeset 41:407ee7fbda13
rm double metrics; add alert for too-many-500s
author | drewp@bigasterisk.com |
---|---|
date | Tue, 23 Jan 2024 14:56:33 -0800 |
parents | 24daba56722c |
children | 2f87ecd2a754 |
files | alert_rules.py config/scrape_main.yaml |
diffstat | 2 files changed, 15 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/alert_rules.py Tue Jan 09 19:56:47 2024 -0800 +++ b/alert_rules.py Tue Jan 23 14:56:33 2024 -0800 @@ -239,12 +239,17 @@ ], }, { - "name": "https_certs", + "name": "http", "interval": "1h", - 'rules': [{ - 'alert': 'old_https_certs', - 'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15', - }], + 'rules': [ + { + 'alert': 'old_https_certs', + 'expr': 'min by (source) (x509_cert_enddate - now())/86400 < 15', + }, { + 'alert': 'high_500_response_rate', + 'expr': 'avg_over_time(rate(sum by (envoy_cluster_name) (envoy_cluster_internal_upstream_rq_xx{envoy_response_code_class="5"})[20m])) > 0.02', + }, + ], }, { "name":
--- a/config/scrape_main.yaml Tue Jan 09 19:56:47 2024 -0800 +++ b/config/scrape_main.yaml Tue Jan 23 14:56:33 2024 -0800 @@ -112,6 +112,11 @@ - {if: '{job="pomerium-proxy", __port_number="8443"}', action: drop} - {if: '{job="video-files", __port_number="8003"}', action: drop} - {if: '{job=~"cm-acme-.*"}', action: drop} + + # already have this with a job="pomerium-proxy" + - {if: '{job="pomerium-metrics"}', action: drop} + + # Assume all 8001/8002 port discoveries are redundant with an nginx proxy - {if: '{__port_number="8001"}', action: drop}