Mercurial > code > home > repos > infra
view kube.py @ 295:2543a0c2b59f
upgr skaffold
author | drewp@bigasterisk.com |
---|---|
date | Tue, 07 May 2024 16:55:42 -0700 |
parents | 7f0482453ead |
children | a3b7b558b9b5 |
line wrap: on
line source
import os from pyinfra import host from pyinfra.facts.files import FindInFile from pyinfra.facts.server import Arch, LinuxDistribution from pyinfra.operations import files, server, systemd # https://github.com/GoogleContainerTools/skaffold/releases skaffold_version = 'v2.11.1' def download_k3s(k3s_version): tail = 'k3s' if host.get_fact(Arch) == 'x86_64' else 'k3s-armhf' if host.get_fact(Arch) == 'aarch64': tail = 'k3s-arm64' files.download( src=f'https://github.com/rancher/k3s/releases/download/{k3s_version}/{tail}', dest='/usr/local/bin/k3s', user='root', group='root', mode='755', cache_time=43000, # force=True, # to get a new version ) def install_skaffold(): files.download(src=f'https://storage.googleapis.com/skaffold/releases/{skaffold_version}/skaffold-linux-amd64', dest='/usr/local/bin/skaffold', user='root', group='root', mode='755', cache_time=1000) # one time; writes to $HOME server.shell(commands="skaffold config set --global insecure-registries reg:5000") def host_prep(): server.sysctl(key='net.ipv4.ip_forward', value="1", persist=True) server.sysctl(key='net.ipv6.conf.all.forwarding', value="1", persist=True) server.sysctl(key='fs.inotify.max_user_instances', value='8192', persist=True) server.sysctl(key='fs.inotify.max_user_watches', value='524288', persist=True) # https://sysctl-explorer.net/net/ipv4/rp_filter/ none, strict, loose = 0, 1, 2 server.sysctl(key='net.ipv4.conf.default.rp_filter', value=loose, persist=True) # don't try to get aufs-dkms on rpi-- https://github.com/docker/for-linux/issues/709 def podman_insecure_registry(reg): files.template(src='templates/kube/podman_registries.conf.j2', dest='/etc/containers/registries.conf.d/reg.conf', reg=reg) systemd.service(service='podman', user_mode=True) systemd.service(service='podman.socket', user_mode=True) def config_and_run_service(k3s_version, server_node, server_ip): download_k3s(k3s_version) service_name = 'k3s.service' if host.name == server_node else 'k3s-node.service' role = 'server' if host.name == server_node else 'agent' which_conf = 'config-server.yaml.j2' if host.name == server_node else 'config-agent.yaml.j2' files.put(src="files/kube/kubelet.config", dest="/etc/rancher/k3s/kubelet.config") # /var/lib/rancher/k3s/server/node-token is the source of the string in secrets/k3s_token, # so this presumes a previous run if host.name == server_node: token = "ununsed" else: # this assumes localhost is the k3s server. if not os.path.exists('/var/lib/rancher/k3s/server/node-token'): print("first pass is for server only- skipping other nodes") return token = open('/var/lib/rancher/k3s/server/node-token', 'rt').read().strip() files.template( src=f'templates/kube/{which_conf}', dest='/etc/k3s_config.yaml', server_ip=server_ip, token=token, wg_ip=host.host_data['wireguard_address'], ) files.template( src='templates/kube/k3s.service.j2', dest=f'/etc/systemd/system/{service_name}', role=role, ) if not host.data.get('gpu'): # no supported gpu ''' kubectl label --overwrite node bang nvidia.com/gpu.deploy.gpu-feature-discovery=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.container-toolkit=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.dcgm-exporter=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.device-plugin=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.driver=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.mig-manager=false kubectl label --overwrite node bang nvidia.com/gpu.deploy.operator-validator=false ''' systemd.service(service=service_name, daemon_reload=True, enabled=True, restarted=True) def setupNvidiaToolkit(): # guides: # https://github.com/NVIDIA/k8s-device-plugin#prerequisites # https://docs.k3s.io/advanced#nvidia-container-runtime-support # apply this once to kube-system: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.3/nvidia-device-plugin.yml # apply this once: https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/nfd.yaml # and: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/gpu-feature-discovery-daemonset.yaml # k3s says they do this: #server.shell('nvidia-ctk runtime configure --runtime=containerd --config /var/lib/rancher/k3s/agent/etc/containerd/config.toml') # then caller restarts k3s which includes containerd # tried https://github.com/k3s-io/k3s/discussions/9231#discussioncomment-8114243 pass def make_cluster( server_ip, server_node, nodes, # https://github.com/k3s-io/k3s/releases # 1.23.6 per https://github.com/cilium/cilium/issues/20331 k3s_version, ): if host.name in nodes + [server_node]: host_prep() files.directory(path='/etc/rancher/k3s') # docs: https://rancher.com/docs/k3s/latest/en/installation/private-registry/ # user confusions: https://github.com/rancher/k3s/issues/1802 files.template(src='templates/kube/registries.yaml.j2', dest='/etc/rancher/k3s/registries.yaml', reg='reg:5000') # also note that podman dropped the default `docker.io/` prefix on image names (see https://unix.stackexchange.com/a/701785/419418) config_and_run_service(k3s_version, server_node, server_ip) if host.data.get('k8s_admin'): podman_insecure_registry(reg='reg:5000') files.directory(path='/etc/rancher/k3s') install_skaffold() files.link(path='/usr/local/bin/kubectl', target='/usr/local/bin/k3s') files.directory(path='/home/drewp/.kube', user='drewp', group='drewp') # assumes our pyinfra process is running on server_node files.put( src='/etc/rancher/k3s/k3s.yaml', dest='/etc/rancher/k3s/k3s.yaml', # user='root', group='drewp', mode='640') server.shell( commands=f"kubectl config set-cluster default --server=https://{server_ip}:6443 --kubeconfig=/etc/rancher/k3s/k3s.yaml" ) make_cluster( server_ip="10.5.0.7", server_node='ditto', nodes=[ 'bang', 'slash', 'dash', 'ws-printer', # 'gn-music', 'li-drums', ], k3s_version='v1.29.1+k3s1') # consider https://github.com/derailed/k9s/releases/download/v0.32.4/k9s_Linux_amd64.tar.gz