view config/scrape_main.yaml @ 17:5702bde523ad

update scrape configs
author drewp@bigasterisk.com
date Fri, 23 Jun 2023 23:54:38 -0700
parents b6720e379d5b
children 10017def57ce
line wrap: on
line source

# see https://relabeler.promlabs.com/

global:
  scrape_interval: 1m
  scrape_timeout: 10s

# scrape_config_files:
#   - build/scrape_ssl.yaml
# These can even be urls: https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmagent/README.md#loading-scrape-configs-from-multiple-files

scrape_configs:
  # some based on https://github.com/prometheus/prometheus/blob/main/documentation/examples/prometheus-kubernetes.yml

  # - job_name: "vmalert"
  #   metrics_path: /vmalert/metrics
  #   static_configs:
  #     - targets:
  #         - vmalert.default.svc.cluster.local

  - job_name: "kubernetes-apiservers"
    scheme: https
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: endpoints }]

    relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

  - job_name: "kubernetes-nodes"
    scheme: https
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: node }]

    relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

  # see https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md
  # for metric definitions
  - job_name: "kubernetes-cadvisor"
    scheme: https
    metrics_path: /metrics/cadvisor
    tls_config: { ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt }
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token

    kubernetes_sd_configs: [{ role: node }]

    relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

  - job_name: "k8services"
    kubernetes_sd_configs: [{ role: endpoints }]
    relabel_configs:
      # To omit a service, add this at pod-level (Deployment.spec.template.metadata.annotations):
      #   annotations: { prometheus.io/scrape: "false" }
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        regex: "false"
        action: drop

      - source_labels: [__meta_kubernetes_namespace]
        regex: default
        action: keep

      # promote these to display
      - source_labels: [__meta_kubernetes_service_name]
        target_label: job

      - source_labels: [__meta_kubernetes_pod_node_name]
        target_label: node

      # for convenience in this config
      - source_labels: [__meta_kubernetes_pod_container_port_number]
        target_label: __port_number

      # period tweaks
      - if: '{job="power-eagle"}'
        action: replace
        target_label: __scrape_interval__
        # from powerEagle/private_config.periodSec
        replacement: 8s

      # path tweaks
      - if: '{job="victoriametrics",__port_number="8428"}'
        action: replace
        target_label: "__metrics_path__"
        replacement: "/m/metrics"

      # discovery is matching extra ports that don't serve metrics- remove these targets
      - {if: '{job="kubernetes"}', action: drop}
      - {if: '{job="mongodb",           __port_number="27017"}', action: drop}
      - {if: '{job="mosquitto-ext",     __port_number="1883"}', action: drop}
      - {if: '{job="filesync-syncthing",__port_number="8384"}', action: drop}
      - {if: '{job="jsregistry",        __port_number="4873"}', action: drop}
      - {if: '{job="photoprism",        __port_number="2342"}', action: drop}
      - {if: '{job="net-route-input",   __port_number="80"}', action: drop}

      # discovery is also matching redundant container ports that it also catches with the service
      - {if: '{job="lanscape",        __port_number="8001"}', action: drop}
      - {if: '{job="lanscape",        __port_number="8002"}', action: drop}
      - {if: '{job="collector",        __port_number="8001"}', action: drop}
      - {if: '{job="collector",        __port_number="8002"}', action: drop}
      - {if: '{job="racc-console",        __port_number="8002"}', action: drop}
      - {if: '{job="antigen-web",        __port_number="8001"}', action: drop}

  
  # # seems like this would match more stuff, but all I get is coredns
  # - job_name: 'old_coredns'
  #   kubernetes_sd_configs: [{role: pod}]
  #   relabel_configs:
  #     - source_labels: [__meta_kubernetes_pod_container_port_name]
  #       regex: metrics
  #       action: keep
  #     - source_labels: [__meta_kubernetes_pod_container_name]
  #       target_label: job

  - job_name: "telegraf"
    scheme: http
    kubernetes_sd_configs: [{ role: node }]
    relabel_configs:
      - source_labels: [__address__]
        regex: "(.*):(\\d+)"
        target_label: __address__
        replacement: "${1}:9273"
        action: replace

  - job_name: "smartctl"
    scrape_interval: 1h
    scheme: http
    kubernetes_sd_configs: [{ role: node }]
    relabel_configs:
      - source_labels: [__address__]
        regex: "(.*):(\\d+)"
        target_label: __address__
        replacement: "${1}:9633"
        action: replace

  - job_name: "net-routes"
    static_configs:
      - targets:
          - pipe:9999

  - job_name: "net-traffic"
    static_configs:
      - targets:
          - pipe:8080

  - job_name: "dnsmasq-log"
    static_configs:
      - targets:
          - pipe:9991

  - job_name: "pomerium"
    static_configs:
      - targets:
          - pomerium-metrics.pomerium.svc:9090
          - cert-manager.cert-manager.svc:9402


  - job_name: "zfs"
    scrape_interval: 1h
    static_configs:
      - targets:
          # running in in k8s, but not in SD
          - ditto:9634
          - ditto:9986

  - job_name: "ping"
    scrape_interval: 2m
    metrics_path: /probe
    params:
      module: [icmp]
    static_configs:
      - targets:
          # printer, since it falls out of ntop with no traffic at all. Or, we could poll ink status at http://10.2.0.37/general/status.html?pageid=1
          - printer014032ED
          # frontbed, for monitoring
#          - 10.5.0.17 # too flaky
          # asher bulb, not sure why it sleeps so long
          - bulb1

    relabel_configs:
      - {source_labels: [__address__],    target_label: __param_target}
      - {source_labels: [__param_target], target_label: instance}
      - target_label: __address__
        replacement: prober
  - job_name: "racc"
    scrape_interval: 30s
    static_configs:
      - targets:
         - dash:5150
         - dot:5150
         - plus:5150
         - Kelsis-iMac:5150