changeset 22:cd115f1ca2a8

use configmaps and a special pod refresh trick
author drewp@bigasterisk.com
date Sat, 24 Jun 2023 23:02:04 -0700
parents 10127391f6f3
children ffa013944200
files deploy_vmalert.yaml k8s_ops.py tasks.py
diffstat 3 files changed, 33 insertions(+), 112 deletions(-) [+]
line wrap: on
line diff
--- a/deploy_vmalert.yaml	Sat Jun 24 23:00:40 2023 -0700
+++ b/deploy_vmalert.yaml	Sat Jun 24 23:02:04 2023 -0700
@@ -15,6 +15,9 @@
       annotations:
         prometheus.io/scrape: "true"
     spec:
+      volumes:
+        - name: config
+          configMap: { name: victoriametrics-config }
       serviceAccountName: victoriametrics
       containers:
         - name: vmalert
@@ -29,10 +32,11 @@
             - -notifier.url=http://alertmanager
             - -remoteRead.url=http://victoriametrics/m/
             - -remoteWrite.url=http://victoriametrics/m/
-            - -rule=/local/rules/*.yaml
-            - -rule=/local/rules/build/*.yaml
+            - -rule=/local/rules
           ports:
             - containerPort: 8880
+          volumeMounts:
+            - { name: config, mountPath: /local }
 ---
 apiVersion: v1
 kind: Service
--- a/k8s_ops.py	Sat Jun 24 23:00:40 2023 -0700
+++ b/k8s_ops.py	Sat Jun 24 23:02:04 2023 -0700
@@ -4,7 +4,7 @@
 from kubernetes import client
 
 
-def refreshPodCmaps(pod_name, namespace='default'):
+def refreshPodCmaps(pod_name, namespace="default"):
     """
     Per https://ahmet.im/blog/kubernetes-secret-volumes-delay/ there could be a while
     until k8s updates the CM volume that a pod sees. Workaround is to edit the pod annotations.
@@ -14,33 +14,32 @@
     pod = api_instance.read_namespaced_pod(name=pod_name, namespace=namespace)
     if pod.metadata.annotations is None:
         pod.metadata.annotations = {}
-    pod.metadata.annotations['force-configmap-update'] = str(time.time())
+    pod.metadata.annotations["force-configmap-update"] = str(time.time())
     api_instance.replace_namespaced_pod(name=pod_name, namespace=namespace, body=pod)
 
 
 def firstPodName(selector):
     api_instance = client.CoreV1Api()
-    pod_list = api_instance.list_namespaced_pod(namespace='default', label_selector=selector)
+    pod_list = api_instance.list_namespaced_pod(
+        namespace="default", label_selector=selector
+    )
     return pod_list.items[0].metadata.name
 
 
 def hup(ctx, deployment, process_name):
-    ctx.run(f'kubectl exec {deployment} -- pkill -HUP {process_name}')
+    ctx.run(f"kubectl exec {deployment} -- pkill -HUP {process_name}")
 
 
-def replaceCmap(name, configObj):
+def replaceCmap(name, dataObj):
     api_instance = client.CoreV1Api()
     api_response = api_instance.replace_namespaced_config_map(  #
         name=name,
-        namespace='default',
+        namespace="default",
         body={
-            'apiVersion': 'v1',
-            'kind': 'ConfigMap',
-            'metadata': {
-                'name': name
-            },
-            'data': {
-                'scrape_main': json.dumps(configObj)
-            },
-        })
+            "apiVersion": "v1",
+            "kind": "ConfigMap",
+            "metadata": {"name": name},
+            "data": dict((fn, json.dumps(obj)) for fn, obj in dataObj.items()),
+        },
+    )
     print(f"{name} resource_version is now {api_response.metadata.resource_version}")
--- a/tasks.py	Sat Jun 24 23:00:40 2023 -0700
+++ b/tasks.py	Sat Jun 24 23:02:04 2023 -0700
@@ -1,112 +1,30 @@
 import json
-import sys
-import tempfile
-import time
-import urllib.request
-from pathlib import Path
-from typing import Dict
 
 import yaml
 from invoke import task
 from kubernetes import config
 
 from k8s_ops import firstPodName, replaceCmap, refreshPodCmaps
+import alert_rules
 
-_tfs = []
 config.load_kube_config()
 
 
-def saveTmp(text):
-    tf = tempfile.NamedTemporaryFile(mode='wt')
-    _tfs.append(tf)
-    tf.write(text)
-    tf.flush()
-    return Path(tf.name)
-
-
-def writeConfigmap(ctx, files: Dict[str, Path]):
-    arg = ','.join(f'{k}={v}' for k, v in files.items())
-    ctx.run(
-        f'kubectl create configmap victoriametrics-config --from-file {arg} -o yaml --dry-run=client | kubectl apply -f -'
-    )
-
-
-def reload(ctx, svc):
-    host = ctx.run(f'khost {svc}').stdout
-    path = {'victoriametrics': '/m/', 'vmalert': '/'}[svc]
-    reload_url = f'http://{host}{path}-/reload'
-    print(f'reload with POST {reload_url}')
-    for workaround in [1]:
-        print(' -> status',
-              urllib.request.urlopen(reload_url, data=b'unused').status)
-        time.sleep(0)
-
-
-def hostsExpectedOnline(ctx):
-    return ctx.run(
-        'cd /my/serv/lanscape; pdm run python hosts_expected_online.py').stdout
-
+@task
+def push_config(ctx):
+    configObj = scrapeConfig(ctx)
+    rulesObj = alert_rules.allRules()
+    rulesObj["groups"] += alert_rules.expectedK8sNodes(ctx)["groups"]
+    rulesObj["groups"] += alert_rules.hostsExpectedOnline(ctx)["groups"]
 
-def expectedK8sNodes(ctx):
-    getNode = json.loads(ctx.run("kubectl get node -o json").stdout)
-    hosts = [item['metadata']['name'] for item in getNode['items']]
-    return yaml.dump({
-        'groups': [{
-            'name':
-            'k8s_expected_nodes',
-            'rules': [{
-                'alert':
-                'kube_node_log_size_report_' + h,
-                'expr':
-                'absent(kubelet_container_log_filesystem_used_bytes{instance="%s"})'
-                % h,
-                'for':
-                '1h',
-                'annotations': {
-                    'summary': f"no recent k8s log size report from host {h}"
-                }
-            } for h in hosts]
-        }]
-    })
-
+    replaceCmap("victoriametrics-config", {"scrape_main": configObj, "rules": rulesObj})
 
-@task
-def sync_config(ctx):
-    config = Path('config')
-    for workaround in [1]:
-        writeConfigmap(
-            ctx, {
-#                'scrape_ssl.yaml': saveTmp(httpsCertProber()),
-                'rules_expected_nodes.yaml': saveTmp(expectedK8sNodes(ctx)),
-                'rules_expected_hosts.yaml': saveTmp(hostsExpectedOnline(ctx)),
-            })
-        reload(ctx, 'victoriametrics')
+    refreshPodCmaps(firstPodName("app=victoriametrics"))
+    refreshPodCmaps(firstPodName("app=vmalert"))
 
-        # this reload doesn't get the new config- not sure if it's vmalert bug or k8s cm propogation problem
-        # reload(ctx, 'vmalert')
-        ctx.run('kubectl rollout restart deploy/vmalert')
-
-
-@task
-def build_config(ctx):
-    with open('rules/build/expected_hosts.yaml', 'w') as out:
-        out.write(hostsExpectedOnline(ctx))
-
-
-# --------------------------
+    # If the VM reloader isn't fast enough, we could do this too:
+    # hup(ctx, 'deploy/victoriametrics', 'victoria-metrics-prod')
 
 
 def scrapeConfig(ctx):
-    return yaml.load(open('config/scrape_main.yaml'), yaml.FullLoader)
-
-
-@task
-def updateScrapes(ctx):
-    configObj = scrapeConfig(ctx)
-
-    replaceCmap('victoriametrics-config', configObj)
-
-    refreshPodCmaps(firstPodName('app=victoriametrics'))
-
-    # If the VM reloader isn't fast enough, we could do this too:
-    # hup(ctx, 'deploy/victoriametrics', 'victoria-metrics-prod')
+    return yaml.load(open("config/scrape_main.yaml"), yaml.FullLoader)