changeset 76:009527a145d0

add kube-state-metrics scrape; loosen some high-logging thresholds
author drewp@bigasterisk.com
date Sun, 05 May 2024 18:51:35 -0700
parents 37fb428d5013
children 489c5d9a7dbc
files alert_rules.py create_scrape_configs.py
diffstat 2 files changed, 10 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/alert_rules.py	Sat May 04 19:30:56 2024 -0700
+++ b/alert_rules.py	Sun May 05 18:51:35 2024 -0700
@@ -24,7 +24,7 @@
             "labels": {
                 "severity": "waste"
             },
-            "expr": 'sum by (container) (rate(kubelet_container_log_filesystem_used_bytes{container="pomerium"}[3h])) > 8k',
+            "expr": 'sum by (container) (rate(kubelet_container_log_filesystem_used_bytes{container="pomerium"}[1h])) > 12k',
             "annotations": {
                 "summary": "high log output rate"
             },
@@ -382,7 +382,7 @@
                         "labels": {
                             "severity": "waste"
                         },
-                        "expr": 'sum by (container) (rate(kubelet_container_log_filesystem_used_bytes{container!="pomerium"}[3h])) > 4k',
+                        "expr": 'sum by (namespace, pod, container) (rate(kubelet_container_log_filesystem_used_bytes{container!="pomerium"}[3h])) > 10k',
                         "annotations": {
                             "summary": "high log output rate"
                         },
--- a/create_scrape_configs.py	Sat May 04 19:30:56 2024 -0700
+++ b/create_scrape_configs.py	Sun May 05 18:51:35 2024 -0700
@@ -46,13 +46,14 @@
 ]  # yapf: disable
 
 recent_jobs = [
-    jobConfig(name="dnsmasq-log", targets=['pipe:9991']),
-    jobConfig(name="filebeat",    targets=[f'{h}:5067' for h in all_hosts]),
-    jobConfig(name="net-routes",  targets=['pipe:9999']),
-    jobConfig(name="net-traffic", targets=['pipe:8080']),
-    jobConfig(name="pomerium",    targets=['pomerium-metrics.pomerium:9090']),
-    jobConfig(name="telegraf",    targets=[f'{h}:9273' for h in all_hosts]),
-    jobConfig(name="victorialogs",targets=['victorialogs'], metrics_path='/logs/metrics'),
+    jobConfig(name="dnsmasq-log",        targets=['pipe:9991']),
+    jobConfig(name="filebeat",           targets=[f'{h}:5067' for h in all_hosts]),
+    jobConfig(name="kube-state-metrics", targets=['kube-state-metrics.kube-system:8080']),
+    jobConfig(name="net-routes",         targets=['pipe:9999']),
+    jobConfig(name="net-traffic",        targets=['pipe:8080']),
+    jobConfig(name="pomerium",           targets=['pomerium-metrics.pomerium:9090']),
+    jobConfig(name="telegraf",           targets=[f'{h}:9273' for h in all_hosts]),
+    jobConfig(name="victorialogs",       targets=['victorialogs'], metrics_path='/logs/metrics'),
 
     jobConfig(name="victoriametrics-forever-vmagent",   metrics_path='/m/forever/vmagent/metrics',  targets=FromName),
     jobConfig(name="victoriametrics-forever-vminsert",  metrics_path='/m/forever/vminsert/metrics', targets=FromName),