view tasks.py @ 60:e3151ab43579

moving scrape target configs in py, and into separate retention classes
author drewp@bigasterisk.com
date Sat, 27 Apr 2024 01:31:07 -0700
parents 6a357b483c04
children fb0519859645
line wrap: on
line source

from pathlib import Path
import yaml
from invoke import task
from kubernetes import config

import alert_rules
from k8s_ops import firstPodName, refreshPodCmaps, replaceCmap

config.load_kube_config()


@task
def push_config(ctx):
    rulesObj = alert_rules.allRules(ctx)

    replaceCmap("victoriametrics-config", {
        "scrape_main": scrapeConfig("config/scrape_main.yaml"),
        "scrape_recent": scrapeConfig("config/scrape_recent.yaml"),
        "scrape_forever": scrapeConfig("config/scrape_forever.yaml"),
        "rules": rulesObj,
    })

    # these don't give errors on rules format! they just quietly keep the old
    # rules! use `skaffold run` to get errs.
    #
    # or run
    #  validateTemplates   = flag.Bool("rule.validateTemplates", true, "Whether to validate annotation and label templates")
    #  validateExpressions = flag.Bool("rule.validateExpressions", true, "Whether to validate rules expressions via MetricsQL engine")

    refreshPodCmaps(firstPodName("app=victoriametrics"))
    refreshPodCmaps(firstPodName("app=vmalert"))

    # If the VM reloader isn't fast enough, we could do this too:
    # hup(ctx, 'deploy/victoriametrics', 'victoria-metrics-prod')


@task
def push_config_2024(ctx):
    # plan:
    #   every discovered service may:
    #      - be described here as a forever retention - ignore the discovery
    #      - be blocked here as a no-metrics service - ignore the discovery
    #      - be scraped as 'recent', with possible overrides of port/path
    #   all per-node metrics shall be 'recent' (oops, not smartctl!)
    map: dict[str, object] = {
        'rules': alert_rules.allRules(ctx),
    }
    for p in Path('config').glob('*.yaml'):
        map[p.name] = scrapeConfig(p)
    replaceCmap("next-victoriametrics-config", map)
    refreshPodCmaps(firstPodName("app=next-victoriametrics-forever-vmagent"))
  

def scrapeConfig(fn):
    return yaml.load(open(fn), yaml.FullLoader)


metricsToMigrate = [
    'currently_on_wifi',
    'connected',
    'house_power_kwh',
    'house_power_price',
    'house_power_w',
    'lan_bytes_sent_from_created',
    'lan_bytes_sent_from_total',
    'lan_last_seen_time',
    'maildir_count',
    'mongodb_mongod_db_coll_avgobjsize',
    'mongodb_mongod_db_coll_count',
    'mongodb_mongod_db_coll_index_size',
    'mongodb_mongod_db_coll_indexes',
    'mongodb_mongod_db_coll_indexes_size',
    'mongodb_mongod_db_coll_size',
    'mongodb_mongod_db_coll_storage_size',
    'mongodb_mongod_db_collections_total',
    'mongodb_mongod_db_data_size_bytes',
    'mongodb_mongod_db_index_size_bytes',
    'mongodb_mongod_db_indexes_total',
    'mongodb_mongod_db_objects_total',
    'probe_duration_seconds',
    'smartctl_device',
    'smartctl_device_attribute',
    'smartctl_device_available_spare',
    'smartctl_device_available_spare_threshold',
    'smartctl_device_block_size',
    'smartctl_device_bytes_read',
    'smartctl_device_bytes_written',
    'smartctl_device_capacity_blocks',
    'smartctl_device_capacity_bytes',
    'smartctl_device_critical_warning',
    'smartctl_device_interface_speed',
    'smartctl_device_media_errors',
    'smartctl_device_num_err_log_entries',
    'smartctl_device_percentage_used',
    'smartctl_device_power_cycle_count',
    'smartctl_device_power_on_seconds',
    'smartctl_device_smart_status',
    'smartctl_device_smartctl_exit_status',
    'smartctl_device_status',
    'smartctl_device_temperature',
    'zfs_dataset_logical_used_bytes',
    'zfs_dataset_referenced_bytes',
    'zfs_dataset_used_by_dataset_bytes',
    'zfs_dataset_used_bytes',
    'zfs_dataset_written_bytes',
    'zfs_exporter_build_info',
    'zfs_pool_allocated_bytes',
    'zfs_pool_deduplication_ratio',
    'zfs_pool_fragmentation_ratio',
    'zfs_pool_free_bytes',
    'zfs_pool_freeing_bytes',
    'zfs_pool_health',
    'zfs_pool_leaked_bytes',
    'zfs_pool_readonly',
    'zfs_pool_size_bytes',
    'zfs_scrape_collector_duration_seconds',
    'zfs_scrape_collector_success',
    'zigbee_availability',
    'zigbee_battery',
    'zigbee_brightness',
    'zigbee_contact',
    'zigbee_link_quality',
    'zigbee_occupancy',
    'zpool_device_error_count',
    'zpool_device_state',
    'zpool_error_count',
    'zpool_state',
]


@task
def exportForeverMetrics(ctx):
    svc = ctx.run('khost victoriametrics').stdout
    for m in metricsToMigrate:
        ctx.run(f'curl http://{svc}/m/api/v1/export/native?match={m} | gzip -c > export/{m}.native.gz')


@task
def ingestForeverMetrics(ctx):
    svc = ctx.run('khost next-victoriametrics-forever-vminsert').stdout
    for p in Path('export').glob('*.native.gz'):
        print(f'importing {p}')
        ctx.run(f'zcat {p} | curl -s http://{svc}/m/next/forever/vminsert/insert/0/prometheus/api/v1/import/native --data-binary @-')