Mercurial > code > home > repos > victoriametrics
changeset 64:def1aa2bfa3f
more targets polish. reorg code into next/
author | drewp@bigasterisk.com |
---|---|
date | Thu, 02 May 2024 23:06:31 -0700 |
parents | 84a4c4cca4a5 |
children | fada8d64c4d3 |
files | .hgignore config/scrape_main.yaml config/scrape_recent.yaml next/create_scrape_configs.py next/scrape_job.py next/tasks.py |
diffstat | 6 files changed, 133 insertions(+), 222 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Thu May 02 23:05:14 2024 -0700 +++ b/.hgignore Thu May 02 23:06:31 2024 -0700 @@ -3,5 +3,7 @@ .pdm-python .vscode .venv -^export -^build +^migration/export +^next/build +^next/private.py +
--- a/config/scrape_main.yaml Thu May 02 23:05:14 2024 -0700 +++ b/config/scrape_main.yaml Thu May 02 23:06:31 2024 -0700 @@ -1,55 +1,5 @@ -# see https://relabeler.promlabs.com/ - -global: - scrape_interval: 1m - scrape_timeout: 10s - -# scrape_config_files: -# - build/scrape_ssl.yaml -# These can even be urls: https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#loading-scrape-configs-from-multiple-files - -scrape_configs: # some based on https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml - - job_name: "kubernetes-apiservers" - scheme: https - tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt } - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: [{ role: endpoints }] - - relabel_configs: - - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] - action: keep - regex: default;kubernetes;https - - - job_name: "kubernetes-nodes" - scheme: https - tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt } - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: [{ role: node }] - - relabel_configs: - - action: labeldrop - regex: "__meta_kubernetes_node_label_(feature_node|nvidia_com_|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*" - - action: labelmap - regex: __meta_kubernetes_node_label_(.+) - - # see https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md - # for metric definitions - - job_name: "kubernetes-cadvisor" - scheme: https - metrics_path: /metrics/cadvisor - tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt } - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - - kubernetes_sd_configs: [{ role: node }] - - relabel_configs: - - action: labeldrop - regex: "(feature_node|nvidia_com_gpu|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*" - - job_name: "k8services" kubernetes_sd_configs: [{ role: endpoints }] relabel_configs: @@ -74,13 +24,6 @@ - source_labels: [__meta_kubernetes_pod_container_port_number] target_label: __port_number - # period tweaks - - if: '{job="power-eagle"}' - action: replace - target_label: __scrape_interval__ - # from powerEagle/private_config.periodSec - replacement: 8s - # path tweaks - if: '{job="victoriametrics",__port_number="8428"}' action: replace @@ -130,91 +73,3 @@ # Something doesn't work with the scrape, and I don't see why I should care: - {if: '{job="metrics-server" }', action: drop} - - - job_name: "telegraf" - scheme: http - kubernetes_sd_configs: [{ role: node }] - relabel_configs: - - source_labels: [__address__] - regex: "(.*):(\\d+)" - target_label: __address__ - replacement: "${1}:9273" - action: replace - - - job_name: "smartctl" - scrape_interval: 1h - scheme: http - kubernetes_sd_configs: [{ role: node }] - relabel_configs: - - source_labels: [__address__] - regex: "(.*):(\\d+)" - target_label: __address__ - replacement: "${1}:9633" - action: replace - - - job_name: "filebeat" - scrape_interval: 1m - scheme: http - kubernetes_sd_configs: [{ role: node }] - relabel_configs: - - source_labels: [__address__] - regex: "(.*):(\\d+)" - target_label: __address__ - replacement: "${1}:5067" - action: replace - - - job_name: "net-routes" - static_configs: - - targets: - - pipe:9999 - - - job_name: "net-traffic" - static_configs: - - targets: - - pipe:8080 - - - job_name: "dnsmasq-log" - static_configs: - - targets: - - pipe:9991 - - - job_name: "maildir-count" - static_configs: - - targets: - - prime:2500 - - - job_name: "zfs" - scrape_interval: 1h - static_configs: - - targets: - # running in in k8s, but as daemonset so it's not in SD above - - ditto:9634 - - ditto:9986 - - - job_name: "ping" - scrape_interval: 2m - metrics_path: /probe - params: - module: [icmp] - static_configs: - - targets: - # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://10.2.0.37/general/status.html?pageid=1 - - printer - # wireguard connection test - - prime5 - # after pyinfra or reboot, seems to lose wg0 address - - garage5 - relabel_configs: - - {source_labels: [__address__], target_label: __param_target} - - {source_labels: [__param_target], target_label: instance} - - target_label: __address__ - replacement: prober - - - job_name: "racc" - scrape_interval: 30s - static_configs: - - targets: [] - # - dash:5150 - # - dot:5150 - # - plus:5150 - # - Kelsis-iMac:5150
--- a/config/scrape_recent.yaml Thu May 02 23:05:14 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -# see https://relabeler.promlabs.com/ - -global: - scrape_interval: 1m - scrape_timeout: 10s - - -scrape_configs: - # some based on https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml - - - job_name: "telegraf" - scheme: http - kubernetes_sd_configs: [{ role: node }] - relabel_configs: - - source_labels: [__address__] - regex: "(.*):(\\d+)" - target_label: __address__ - replacement: "${1}:9273" - action: replace
--- a/next/create_scrape_configs.py Thu May 02 23:05:14 2024 -0700 +++ b/next/create_scrape_configs.py Thu May 02 23:06:31 2024 -0700 @@ -1,11 +1,8 @@ from pathlib import Path -from scrape_job import jobConfig, scrape_deployments, writeJobConfigs +from scrape_job import jobConfig, scrape_deployments, writeJobConfigs, FromName import private - - - # previously this used `kubernetes_sd_configs: [{ role: node }]` all_hosts = [ 'dash', @@ -29,38 +26,6 @@ 'garage5', ] -deploy_doesnt_serve_metrics = [ - 'apprise', - 'bitwarden', - 'digi-files', - 'digi-pose-predict', - 'digi-tts-mimic', - 'dovecot', - 'front-door-display', - 'hass', - 'homepage', - 'itch150', - 'kallithea', - 'kube-web-view', - 'magma', - 'megasecond', - 'minecraft-build-world', - 'minecraft-lake-world', - 'minecraft-smp-world', - 'mongodb', - 'mqtt1', - 'mqtt2', - 'nodered', - 'photoprism', - 'plik', - 'projects', - 'registry', - 'registry-ui', - 'speakerphone', - 'video', - 'video-files', - 'zigbee2mqtt', -] forever_jobs = [ jobConfig(name='maildir-count', targets=['prime:2500']), @@ -81,11 +46,23 @@ ] # yapf: disable recent_jobs = [ - jobConfig( name="telegraf", targets=[f'{h}:9273' for h in all_hosts]), - jobConfig( name="filebeat", targets=[f'{h}:5067' for h in all_hosts]), - jobConfig( name="net-routes", targets=['pipe:9999']), - jobConfig( name="net-traffic", targets=['pipe:8080']), - jobConfig( name="dnsmasq-log", targets=['pipe:9991']), + jobConfig(name="dnsmasq-log", targets=['pipe:9991']), + jobConfig(name="filebeat", targets=[f'{h}:5067' for h in all_hosts]), + jobConfig(name="net-routes", targets=['pipe:9999']), + jobConfig(name="net-traffic", targets=['pipe:8080']), + jobConfig(name="pomerium", targets=['pomerium-metrics.pomerium:9090']), + jobConfig(name="telegraf", targets=[f'{h}:9273' for h in all_hosts]), + jobConfig(name="victorialogs",targets=['victorialogs'], metrics_path='/logs/metrics'), + + jobConfig(name="next-victoriametrics-forever-vmagent", metrics_path='/m/next/forever/vmagent/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-forever-vminsert", metrics_path='/m/next/forever/vminsert/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-forever-vmstorage", metrics_path='/m/next/forever/vmstorage/metrics',targets=FromName), + jobConfig(name="next-victoriametrics-recent-vmagent", metrics_path='/m/next/recent/vmagent/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-recent-vminsert", metrics_path='/m/next/recent/vminsert/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-recent-vmstorage", metrics_path='/m/next/recent/vmstorage/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-vmselect", metrics_path='/m/next/vmselect/metrics', targets=FromName), + jobConfig(name="next-victoriametrics-index", targets=FromName), + jobConfig( name="racc", scrape_interval='30s', @@ -97,8 +74,90 @@ ], ), ] # yapf: disable -recent_jobs.extend(scrape_deployments(deploy_doesnt_serve_metrics, forever_jobs)) + + +deploy_doesnt_serve_metrics = [ + 'apprise', + 'bitwarden', + 'digi-files', + 'digi-pose-predict', + 'digi-tts-mimic', + 'digi-web', + 'dovecot', + 'ectoscope', + 'front-door-display', + 'hass', + 'homepage', + 'itch150', + 'jsregistry', + 'kallithea', + 'kube-web-view', + 'magma', + 'megasecond', + 'minecraft-build-world', + 'minecraft-lake-world', + 'minecraft-smp-world', + 'mongodb', + 'mqtt1', + 'mqtt2', + 'nodered', + 'photoprism', + 'plik', + 'projects', + 'registry-ui', + 'registry', + 'speakerphone', + 'victorialogs-ui', + 'video-files', + 'video', + 'zigbee2mqtt', + 'zwave2mqtt', +] + +existing_jobs = [j['job_name'] for j in forever_jobs + recent_jobs] +recent_jobs.extend(scrape_deployments(deploy_doesnt_serve_metrics + existing_jobs)) -top = Path('build/scrape_config') -writeJobConfigs(top, forever_jobs, 'forever') -writeJobConfigs(top, recent_jobs, 'recent') +recent_jobs.append(jobConfig(name='kubernetes-apiservers', https=True, targets=[]) | { + 'kubernetes_sd_configs': [{ + 'role': 'endpoints' + }], + 'relabel_configs': [{ + 'source_labels': ['__meta_kubernetes_namespace', '__meta_kubernetes_service_name', '__meta_kubernetes_endpoint_port_name'], + 'action': 'keep', + 'regex': 'default;kubernetes;https' + }], +}) + +recent_jobs.append( + jobConfig(name="kubernetes-nodes", https=True, targets=[]) | { + "kubernetes_sd_configs": [{ + "role": "node" + }], + "relabel_configs": [{ + "action": "labeldrop", + "regex": "__meta_kubernetes_node_label_(feature_node|nvidia_com_|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*" + }, { + "action": "labelmap", + "regex": "__meta_kubernetes_node_label_(.+)" + }, { + "action": "labeldrop", + "regex": "kubernetes_io_hostname" + }], + }) + +# see https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md +# for metric definitions + +recent_jobs.append(jobConfig(name="kubernetes-cadvisor", https=True, metrics_path="/metrics/cadvisor", targets=[]) | { + "kubernetes_sd_configs": [{ + "role": "node" + }], + "relabel_configs": [{ + "action": "labeldrop", + "regex": "(feature_node|nvidia_com_gpu|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*" + }], +}) + +outDir = Path('build/scrape_config') +writeJobConfigs(outDir, forever_jobs, 'forever') +writeJobConfigs(outDir, recent_jobs, 'recent')
--- a/next/scrape_job.py Thu May 02 23:05:14 2024 -0700 +++ b/next/scrape_job.py Thu May 02 23:06:31 2024 -0700 @@ -2,14 +2,13 @@ from pathlib import Path import subprocess +class FromName: + pass -def jobConfig(name, targets, scrape_interval=None, ping_job=False, metrics_path=None, params=None): +def jobConfig(name, targets, scrape_interval=None, ping_job=False, metrics_path=None, params=None, https=False): """one scrape job config""" ret = { "job_name": name, - "static_configs": [{ - "targets": targets, - }], "relabel_configs": [ { "target_label": "namespace", @@ -21,9 +20,21 @@ }, ] } + + if targets is FromName: + targets = [name] + + if targets: + ret["static_configs"] = [{ + "targets": targets, + }] if metrics_path: - ret['metrics_path'] = metrics_path + ret.setdefault('relabel_configs', []).append({ + "action": "replace", + "target_label": "__metrics_path__", + "replacement": metrics_path, + }) if scrape_interval: ret['scrape_interval'] = scrape_interval @@ -49,6 +60,11 @@ }, ] + if https: + ret['scheme'] = 'https' + ret["tls_config"] = {"ca_file": "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"} + ret["bearer_token_file"] = "/var/run/secrets/kubernetes.io/serviceaccount/token" + return ret @@ -59,12 +75,10 @@ yield name -def scrape_deployments(deploy_doesnt_serve_metrics, forever_jobs): +def scrape_deployments(skip_names): ret = [] for name in current_deployments(): - if name in deploy_doesnt_serve_metrics: - continue - if name in [j['job_name'] for j in forever_jobs]: + if name in skip_names: continue targets = [name] ret.append(jobConfig(name=name, targets=targets)) @@ -84,5 +98,5 @@ "scrape_interval": "1m", "scrape_timeout": "10s" }, - "scrape_config_files": filenames_written, + "scrape_config_files": sorted(filenames_written), }, indent=2))
--- a/next/tasks.py Thu May 02 23:05:14 2024 -0700 +++ b/next/tasks.py Thu May 02 23:06:31 2024 -0700 @@ -26,7 +26,7 @@ 'rules': alert_rules.allRules(ctx), } top = Path('build/scrape_config') - for p in top.glob('**/*.yaml'): + for p in top.glob('*.yaml'): map[str(p.relative_to(top))] = scrapeConfig(p) replaceCmap("next-victoriametrics-config", map) refreshPodCmaps(firstPodName("app=next-victoriametrics-forever-vmagent"))