view create_scrape_configs.py @ 75:37fb428d5013

workaround for cadvisor metrics path not getting set
author drewp@bigasterisk.com
date Sat, 04 May 2024 19:30:56 -0700
parents c3d13fab248e
children 009527a145d0
line wrap: on
line source

from pathlib import Path

from scrape_job import jobConfig, scrape_deployments, writeJobConfigs, FromName
import private

# previously this used `kubernetes_sd_configs: [{ role: node }]`
all_hosts = [
    'dash',
    'ditto',
    # 'ws-printer',
    #todo:
]

smartctl_hosts = [
    # ideally, all nodes with disks, but many turn off and on
    'dash',
    'ditto',
]

ping_hosts = [
    # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://printer/general/status.html?pageid=1
    'printer',
    # wireguard connection test
    'prime5',
    # after pyinfra or reboot, seems to lose wg0 address
    'garage5',
]


forever_jobs = [
    jobConfig(name='maildir-count',        targets=['prime:2500']),
    jobConfig(name='mongodb',              targets=['mongodb:9216']),
    jobConfig(name='net-traffic',          targets=['pipe:8080']),
    jobConfig(name='ping',                 targets=ping_hosts,              scrape_interval='2m', ping_job=True),
    jobConfig(name='power-eagle',          targets=['power-eagle:80'],      scrape_interval='8s'),  # from powerEagle/private_config.periodSec
    jobConfig(name='powermeter-exporter',  targets=['powermeter-exporter'], scrape_interval='10s'),
    jobConfig(name='smartctl',             targets=[f'{h}:9633' for h in smartctl_hosts]),
    jobConfig(name='wifi',                 targets=['wifi:80']),
    jobConfig(name='zfs-exporter',         targets=['ditto:9634']),
    jobConfig(name='zigbee2mqtt-exporter', targets=['zigbee2mqtt-exporter:80']),
    jobConfig(name='zpool-exporter',       targets=['ditto:9986']),
    jobConfig(name='octoprint',            targets=['octoprint'],
              metrics_path='/plugin/prometheus_exporter/metrics',
              params={'apikey' : [private.octoprint_apikey]},
              ),
]  # yapf: disable

recent_jobs = [
    jobConfig(name="dnsmasq-log", targets=['pipe:9991']),
    jobConfig(name="filebeat",    targets=[f'{h}:5067' for h in all_hosts]),
    jobConfig(name="net-routes",  targets=['pipe:9999']),
    jobConfig(name="net-traffic", targets=['pipe:8080']),
    jobConfig(name="pomerium",    targets=['pomerium-metrics.pomerium:9090']),
    jobConfig(name="telegraf",    targets=[f'{h}:9273' for h in all_hosts]),
    jobConfig(name="victorialogs",targets=['victorialogs'], metrics_path='/logs/metrics'),

    jobConfig(name="victoriametrics-forever-vmagent",   metrics_path='/m/forever/vmagent/metrics',  targets=FromName),
    jobConfig(name="victoriametrics-forever-vminsert",  metrics_path='/m/forever/vminsert/metrics', targets=FromName),
    jobConfig(name="victoriametrics-forever-vmstorage", metrics_path='/m/forever/vmstorage/metrics',targets=FromName),
    jobConfig(name="victoriametrics-recent-vmagent",    metrics_path='/m/recent/vmagent/metrics',   targets=FromName),
    jobConfig(name="victoriametrics-recent-vminsert",   metrics_path='/m/recent/vminsert/metrics',  targets=FromName),
    jobConfig(name="victoriametrics-recent-vmstorage",  metrics_path='/m/recent/vmstorage/metrics', targets=FromName),
    jobConfig(name="victoriametrics-vmselect",          metrics_path='/m/vmselect/metrics',         targets=FromName),
    jobConfig(name="victoriametrics-vmalert",           metrics_path='/m/vmalert/metrics',          targets=FromName),
    jobConfig(name="victoriametrics-alertmanager",      metrics_path='/m/alertmanager/metrics',     targets=FromName),
    jobConfig(name="victoriametrics-index",                                                         targets=FromName),

    # todo:
    #  - video-files
    #  - cert-manager
    #  - syncthing(s)
    #  - nvidia runner
    #  - longhorn  (svc longhorn-backend, port 9500?)
    #  - kube-system.metrics-server
    jobConfig(
        name="racc",
        scrape_interval='30s',
        targets=[
            # - dash:5150
            # - dot:5150
            # - squib:5150
            # - ashermac:5150
        ],
    ),
]  # yapf: disable


deploy_doesnt_serve_metrics = [
    'apprise',
    'bitwarden',
    'digi-files',
    'digi-pose-predict',
    'digi-tts-mimic',
    'digi-web',
    'dovecot',
    'ectoscope',
    'front-door-display',
    'hass',
    'homepage',
    'itch150',
    'jsregistry',
    'kallithea',
    'kube-web-view',
    'magma',
    'megasecond',
    'minecraft-build-world',
    'minecraft-lake-world',
    'minecraft-smp-world',
    'mongodb',
    'mqtt1',
    'mqtt2',
    'nodered',
    'photoprism',
    'plik',
    'projects',
    'registry-ui',
    'registry',
    'speakerphone',
    'victorialogs-ui',
    'video-files',
    'video',
    'zigbee2mqtt',
    'zwave2mqtt',
]

existing_jobs = [j['job_name'] for j in forever_jobs + recent_jobs]
recent_jobs.extend(scrape_deployments(deploy_doesnt_serve_metrics + existing_jobs))

recent_jobs.append(jobConfig(name='kubernetes-apiservers', https=True, targets=[]) | {
    'kubernetes_sd_configs': [{
        'role': 'endpoints'
    }],
    'relabel_configs': [{
        'source_labels': ['__meta_kubernetes_namespace', '__meta_kubernetes_service_name', '__meta_kubernetes_endpoint_port_name'],
        'action': 'keep',
        'regex': 'default;kubernetes;https'
    }],
})

recent_jobs.append(
    jobConfig(name="kubernetes-nodes", https=True, targets=[]) | {
        "kubernetes_sd_configs": [{
            "role": "node"
        }],
        "relabel_configs": [{
            "action": "labeldrop",
            "regex": "__meta_kubernetes_node_label_(feature_node|nvidia_com_|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*"
        }, {
            "action": "labelmap",
            "regex": "__meta_kubernetes_node_label_(.+)"
        }, {
            "action": "labeldrop",
            "regex": "kubernetes_io_hostname"
        }],
    })

# see https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md
# for metric definitions

recent_jobs.append(
    jobConfig(name="kubernetes-cadvisor", https=True, metrics_path="/metrics/cadvisor", targets=[]) | {
        "kubernetes_sd_configs": [{
            "role": "node"
        }],
        "relabel_configs": [{
            "action": "labeldrop",
            "regex": "(feature_node|nvidia_com_gpu|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*"
        }, {
            "action": "replace",
            "target_label": "__metrics_path__",
            "replacement": '/metrics/cadvisor',
        }],
    })

outDir = Path('build/scrape_config')
writeJobConfigs(outDir, forever_jobs, 'forever')
writeJobConfigs(outDir, recent_jobs, 'recent')