view config/scrape_main.yaml @ 52:8199713c9294

+ping garage5
author drewp@bigasterisk.com
date Wed, 24 Apr 2024 11:36:36 -0700
parents 16bde029b19f
children def1aa2bfa3f
line wrap: on
line source

# see https://relabeler.promlabs.com/

global:
  scrape_interval: 1m
  scrape_timeout: 10s

# scrape_config_files:
#   - build/scrape_ssl.yaml
# These can even be urls: https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#loading-scrape-configs-from-multiple-files

scrape_configs:
  # some based on https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml

  - job_name: "kubernetes-apiservers"
    scheme: https
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: endpoints }]

    relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

  - job_name: "kubernetes-nodes"
    scheme: https
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: node }]

    relabel_configs:
      - action: labeldrop
        regex: "__meta_kubernetes_node_label_(feature_node|nvidia_com_|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*"
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

  # see https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md
  # for metric definitions
  - job_name: "kubernetes-cadvisor"
    scheme: https
    metrics_path: /metrics/cadvisor
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: node }]

    relabel_configs:
      - action: labeldrop
        regex: "(feature_node|nvidia_com_gpu|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|beta_kubernetes_io_os|node_kubernetes_io_instance_type|kubernetes_io_os).*"

  - job_name: "k8services"
    kubernetes_sd_configs: [{ role: endpoints }]
    relabel_configs:
      # To omit a service, add this at pod-level (Deployment.spec.template.metadata.annotations):
      #   annotations: { prometheus.io/scrape: "false" }
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        regex: "false"
        action: drop

      # - source_labels: [__meta_kubernetes_namespace]
      #   regex: default
      #   action: keep

      # promote these to display
      - source_labels: [__meta_kubernetes_service_name]
        target_label: job

      - source_labels: [__meta_kubernetes_pod_node_name]
        target_label: node

      # for convenience in this config
      - source_labels: [__meta_kubernetes_pod_container_port_number]
        target_label: __port_number

      # period tweaks
      - if: '{job="power-eagle"}'
        action: replace
        target_label: __scrape_interval__
        # from powerEagle/private_config.periodSec
        replacement: 8s

      # path tweaks
      - if: '{job="victoriametrics",__port_number="8428"}'
        action: replace
        target_label: "__metrics_path__"
        replacement: "/m/metrics"
      - if: '{job="victorialogs",__port_number="9428"}'
        action: replace
        target_label: "__metrics_path__"
        replacement: "/logs/metrics"
      - if: '{job="video-files",__port_number="8004"}'
        action: replace
        target_label: "__metrics_path__"
        replacement: "/video/api/metrics"

      # discovery is matching extra ports that don't serve metrics- remove these targets
      - {if: '{job="cert-manager-webhook"}',                     action: drop}
      - {if: '{job="cert-manager",      __port_number="9403"}',  action: drop}
      - {if: '{job="filesync-syncthing",__port_number="21027"}', action: drop}
      - {if: '{job="filesync-syncthing",__port_number="22000"}', action: drop}
      - {if: '{job="filesync-syncthing",__port_number="8384"}',  action: drop}
      - {if: '{job="jsregistry",        __port_number="4873"}',  action: drop}
      - {if: '{job="kube-dns",          __port_number="53"}',    action: drop}
      - {if: '{job="kubernetes"}',                               action: drop}
      - {if: '{job="mongodb",           __port_number="27017"}', action: drop}
      - {if: '{job="mosquitto-ext",     __port_number="1883"}',  action: drop}
      - {if: '{job="net-route-input",   __port_number="80"}',    action: drop}
      - {if: '{job="photoprism",        __port_number="2342"}',  action: drop}
      - {if: '{job="pomerium-metrics",  __port_number="8080"}',  action: drop}
      - {if: '{job="pomerium-metrics",  __port_number="8443"}',  action: drop}
      - {if: '{job="pomerium-proxy",    __port_number="8080"}',  action: drop}
      - {if: '{job="pomerium-proxy",    __port_number="8443"}',  action: drop}
      - {if: '{job="video-files",       __port_number="8003"}',  action: drop}
      - {if: '{job=~"cm-acme-.*"}',                              action: drop}
      - {if: '{job="nvidiagpu-node-feature-discovery-master", __port_number="8080"}',  action: drop}
      
      # already have this with a job="pomerium-proxy"
      - {if: '{job="pomerium-metrics"}',                         action: drop}



      # Assume all 8001/8002 port discoveries are redundant with an nginx proxy
      - {if: '{__port_number="8001"}', action: drop}
      - {if: '{__port_number="8002"}', action: drop}
      
      # Needs https. Used by `kubectl top`
      - {if: '{job="metrics-server",    __port_number="443"}', action: drop}
      # Something doesn't work with the scrape, and I don't see why I should care:
      - {if: '{job="metrics-server"                        }', action: drop}


  - job_name: "telegraf"
    scheme: http
    kubernetes_sd_configs: [{ role: node }]
    relabel_configs:
      - source_labels: [__address__]
        regex: "(.*):(\\d+)"
        target_label: __address__
        replacement: "${1}:9273"
        action: replace

  - job_name: "smartctl"
    scrape_interval: 1h
    scheme: http
    kubernetes_sd_configs: [{ role: node }]
    relabel_configs:
      - source_labels: [__address__]
        regex: "(.*):(\\d+)"
        target_label: __address__
        replacement: "${1}:9633"
        action: replace

  - job_name: "filebeat"
    scrape_interval: 1m
    scheme: http
    kubernetes_sd_configs: [{ role: node }]
    relabel_configs:
      - source_labels: [__address__]
        regex: "(.*):(\\d+)"
        target_label: __address__
        replacement: "${1}:5067"
        action: replace

  - job_name: "net-routes"
    static_configs:
      - targets:
          - pipe:9999

  - job_name: "net-traffic"
    static_configs:
      - targets:
          - pipe:8080

  - job_name: "dnsmasq-log"
    static_configs:
      - targets:
          - pipe:9991

  - job_name: "maildir-count"
    static_configs:
      - targets:
          - prime:2500

  - job_name: "zfs"
    scrape_interval: 1h
    static_configs:
      - targets:
          # running in in k8s, but as daemonset so it's not in SD above
          - ditto:9634
          - ditto:9986

  - job_name: "ping"
    scrape_interval: 2m
    metrics_path: /probe
    params:
      module: [icmp]
    static_configs:
      - targets:
          # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://10.2.0.37/general/status.html?pageid=1
          - printer
          # wireguard connection test
          - prime5
          # after pyinfra or reboot, seems to lose wg0 address
          - garage5
    relabel_configs:
      - {source_labels: [__address__],    target_label: __param_target}
      - {source_labels: [__param_target], target_label: instance}
      - target_label: __address__
        replacement: prober

  - job_name: "racc"
    scrape_interval: 30s
    static_configs:
      - targets: []
        # - dash:5150
        # - dot:5150
        # - plus:5150
        # - Kelsis-iMac:5150