changeset 61:fb0519859645

better py configs
author drewp@bigasterisk.com
date Thu, 02 May 2024 18:35:46 -0700
parents e3151ab43579
children 8134cd480817
files .hgignore config/create_scrape_configs.py config/scrape_job.py next/index_page.py tasks.py
diffstat 5 files changed, 193 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Sat Apr 27 01:31:07 2024 -0700
+++ b/.hgignore	Thu May 02 18:35:46 2024 -0700
@@ -1,4 +1,7 @@
 __pypackages__
+__pycache__
 .pdm-python
 .vscode
 .venv
+^export
+^build
--- a/config/create_scrape_configs.py	Sat Apr 27 01:31:07 2024 -0700
+++ b/config/create_scrape_configs.py	Thu May 02 18:35:46 2024 -0700
@@ -1,82 +1,103 @@
-import json
 from pathlib import Path
 
-forever_jobs = [
-    'maildir-count',
-    'mongodb',
-    'net-traffic',
-    'ping',
-    'power-eagle',
-    'smartctl',
-    'wifi',
-    'zfs-exporter',
-    'zigbee2mqtt-exporter',
-    'zpool-exporter',
+from scrape_job import jobConfig, scrape_deployments, writeJobConfigs
+import private
+
+
+
+
+# previously this used `kubernetes_sd_configs: [{ role: node }]`
+all_hosts = [
+    'dash',
+    'ditto',
+    # 'ws-printer',
+    #todo:
+]
+
+smartctl_hosts = [
+    # ideally, all nodes with disks, but many turn off and on
+    'dash',
+    'ditto',
+]
+
+ping_hosts = [
+    # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://printer/general/status.html?pageid=1
+    'printer',
+    # wireguard connection test
+    'prime5',
+    # after pyinfra or reboot, seems to lose wg0 address
+    'garage5',
 ]
 
-job_targets = {
-    'maildir-count': ['prime:2500'],
-    'mongodb': ['mongodb:9216'],
-    'net-traffic': ['pipe:8080'],
-    #   - job_name: "ping"
-    #     scrape_interval: 2m
-    #     metrics_path: /probe
-    #     params:
-    #       module: [icmp]
-    #     static_configs:
-    #       - targets:
-    #           # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://printer/general/status.html?pageid=1
-    #           - printer
-    #           # wireguard connection test
-    #           - prime5
-    #           # after pyinfra or reboot, seems to lose wg0 address
-    #           - garage5
-    #     relabel_configs:
-    #       - {source_labels: [__address__],    target_label: __param_target}
-    #       - {source_labels: [__param_target], target_label: instance}
-    #       - target_label: __address__
-    #         replacement: prober
-    'ping': [':80'], # multi target with tricks
-    'power-eagle': ['power-eagle:80'],
-    'smartctl': [':80'], # per-node!
-    'wifi': ['wifi:80'],
-    'zfs-exporter': ['ditto:9634'],
-    'zigbee2mqtt-exporter': ['zigbee2mqtt-exporter:80'],
-    'zpool-exporter': ['ditto:9986'],
-}
+deploy_doesnt_serve_metrics = [
+    'apprise',
+    'bitwarden',
+    'digi-files',
+    'digi-pose-predict',
+    'digi-tts-mimic',
+    'dovecot',
+    'front-door-display',
+    'hass',
+    'homepage',
+    'itch150',
+    'kallithea',
+    'kube-web-view',
+    'magma',
+    'megasecond',
+    'minecraft-build-world',
+    'minecraft-lake-world',
+    'minecraft-smp-world',
+    'mongodb',
+    'mqtt1',
+    'mqtt2',
+    'nodered',
+    'photoprism',
+    'plik',
+    'projects',
+    'registry',
+    'registry-ui',
+    'speakerphone',
+    'video',
+    'video-files',
+    'zigbee2mqtt',
+]
 
-for job in forever_jobs:
-    d = [{
-        "job_name": job,
-        "static_configs": [{
-            "targets": job_targets[job],
-        }],
-        "relabel_configs": [
-            {
-                "target_label": "namespace",
-                "replacement": "default"
-            },
-            {
-                "source_labels": ["__meta_kubernetes_pod_node_name"],
-                "target_label": "node"
-            },
-        ]
-    }]
+forever_jobs = [
+    jobConfig(name='maildir-count',        targets=['prime:2500']),
+    jobConfig(name='mongodb',              targets=['mongodb:9216']),
+    jobConfig(name='net-traffic',          targets=['pipe:8080']),
+    jobConfig(name='ping',                 targets=ping_hosts,              scrape_interval='2m', ping_job=True),
+    jobConfig(name='power-eagle',          targets=['power-eagle:80'],      scrape_interval='8s'),  # from powerEagle/private_config.periodSec
+    jobConfig(name='powermeter-exporter',  targets=['powermeter-exporter'], scrape_interval='10s'),
+    jobConfig(name='smartctl',             targets=[f'{h}:9633' for h in smartctl_hosts]),
+    jobConfig(name='wifi',                 targets=['wifi:80']),
+    jobConfig(name='zfs-exporter',         targets=['ditto:9634']),
+    jobConfig(name='zigbee2mqtt-exporter', targets=['zigbee2mqtt-exporter:80']),
+    jobConfig(name='zpool-exporter',       targets=['ditto:9986']),
+    jobConfig(name='octoprint',            targets=['octoprint'],
+              metrics_path='/plugin/prometheus_exporter/metrics',
+              params={'apikey' : [private.octoprint_apikey]},
+              ),
+]  # yapf: disable
 
-    if job == 'power-eagle':
-        # from powerEagle/private_config.periodSec
-        d[0]['relabel_configs'].append({
-            "action": "replace",
-            "target_label": "__scrape_interval__",
-            "replacement": "8s",
-        })
+recent_jobs = [
+    jobConfig( name="telegraf",    targets=[f'{h}:9273' for h in all_hosts]),
+    jobConfig( name="filebeat",    targets=[f'{h}:5067' for h in all_hosts]),
+    jobConfig( name="net-routes",  targets=['pipe:9999']),
+    jobConfig( name="net-traffic", targets=['pipe:8080']),
+    jobConfig( name="dnsmasq-log", targets=['pipe:9991']),
+    jobConfig(
+        name="racc",
+        scrape_interval='30s',
+        targets=[
+            # - dash:5150
+            # - dot:5150
+            # - squib:5150
+            # - ashermac:5150
+        ],
+    ),
+]  # yapf: disable
+recent_jobs.extend(scrape_deployments(deploy_doesnt_serve_metrics, forever_jobs))
 
-    Path(f'{job}.yaml').write_text(json.dumps(d, indent=2, sort_keys=True))
-
-Path('scrape_forever.yaml').write_text(json.dumps({
-    "global": {
-        "scrape_interval": "1m",
-        "scrape_timeout": "10s"
-    },
-    "scrape_config_files": [f'{job}.yaml' for job in forever_jobs],
-}, indent=2))
+writeJobConfigs(Path('build/scrape_jobs'), forever_jobs, 'forever')
+writeJobConfigs(Path('build/scrape_jobs'), recent_jobs, 'recent')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/config/scrape_job.py	Thu May 02 18:35:46 2024 -0700
@@ -0,0 +1,88 @@
+import json
+from pathlib import Path
+import subprocess
+
+
+def jobConfig(name, targets, scrape_interval=None, ping_job=False, metrics_path=None, params=None):
+    """one scrape job config"""
+    ret = {
+        "job_name": name,
+        "static_configs": [{
+            "targets": targets,
+        }],
+        "relabel_configs": [
+            {
+                "target_label": "namespace",
+                "replacement": "default"
+            },
+            {
+                "source_labels": ["__meta_kubernetes_pod_node_name"],
+                "target_label": "node"
+            },
+        ]
+    }
+
+    if metrics_path:
+        ret['metrics_path'] = metrics_path
+
+    if scrape_interval:
+        ret['scrape_interval'] = scrape_interval
+
+    if params:
+        ret['params'] = params
+
+    if ping_job:
+        ret['metrics_path'] = '/probe'
+        ret['params'] = {'module': ['icmp']}
+        ret["relabel_configs"] = [
+            {
+                "source_labels": ["__address__"],
+                "target_label": "__param_target"
+            },
+            {
+                "source_labels": ["__param_target"],
+                "target_label": "instance"
+            },
+            {
+                "target_label": "__address__",
+                "replacement": "prober"
+            },
+        ]
+
+    return ret
+
+
+def current_deployments():
+    deploys = json.loads(subprocess.check_output(['kubectl', 'get', 'deploy', '-o=json']))
+    for deploy in deploys['items']:
+        name = deploy['metadata']['name']
+        yield name
+
+
+def scrape_deployments(deploy_doesnt_serve_metrics, forever_jobs):
+    ret = []
+    for name in current_deployments():
+        if name in deploy_doesnt_serve_metrics:
+            continue
+        if name in [j['job_name'] for j in forever_jobs]:
+            continue
+        targets = [name]
+        ret.append(jobConfig(name=name, targets=targets))
+    return ret
+
+
+def writeJobConfigs(outDir: Path, jobConfs: list, retention: str):
+    (outDir / retention).mkdir(exist_ok=True, parents=True)
+    filenames_written = []
+    for job in jobConfs:
+        filename = f'job_{job["job_name"]}.yaml'
+        (outDir / filename).write_text(json.dumps([job], indent=2, sort_keys=True))
+        filenames_written.append(filename)
+
+    (outDir / f'scrape_{retention}.yaml').write_text(json.dumps({
+        "global": {
+            "scrape_interval": "1m",
+            "scrape_timeout": "10s"
+        },
+        "scrape_config_files": filenames_written,
+    }, indent=2))
--- a/next/index_page.py	Sat Apr 27 01:31:07 2024 -0700
+++ b/next/index_page.py	Thu May 02 18:35:46 2024 -0700
@@ -15,6 +15,7 @@
                   <tr>
                     <td><a href="recent/vmagent/">vmagent</a></td>
                     <td><a href="recent/vmagent/metrics">metrics</a></td>
+                    <td><a href="recent/vmagent/targets">targets</a></td>
                   </tr>
                   <tr>
                     <td><a href="recent/vminsert/">vminsert</a></td>
@@ -33,6 +34,7 @@
                   <tr>
                     <td><a href="forever/vmagent/">vmagent</a></td>
                     <td><a href="forever/vmagent/metrics">metrics</a></td>
+                    <td><a href="forever/vmagent/targets">targets</a></td>
                   </tr>
                   <tr>
                     <td><a href="forever/vminsert/">vminsert</a></td>
--- a/tasks.py	Sat Apr 27 01:31:07 2024 -0700
+++ b/tasks.py	Thu May 02 18:35:46 2024 -0700
@@ -45,10 +45,12 @@
     map: dict[str, object] = {
         'rules': alert_rules.allRules(ctx),
     }
-    for p in Path('config').glob('*.yaml'):
-        map[p.name] = scrapeConfig(p)
+    top=Path('config/build/scrape_jobs')
+    for p in top.glob('**/*.yaml'):
+        map[str(p.relative_to(top))] = scrapeConfig(p)
     replaceCmap("next-victoriametrics-config", map)
     refreshPodCmaps(firstPodName("app=next-victoriametrics-forever-vmagent"))
+    refreshPodCmaps(firstPodName("app=next-victoriametrics-recent-vmagent"))
   
 
 def scrapeConfig(fn):