Mercurial > code > home > repos > infra
annotate kube.py @ 287:c094d23b7943
refmt, notes
author | drewp@bigasterisk.com |
---|---|
date | Sun, 21 Apr 2024 14:03:04 -0700 |
parents | e10ee3ddadcf |
children | 3af02e24eaf9 |
rev | line source |
---|---|
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
1 import os |
103
8b8ef9d8f0fd
dead code and templates, reformat, maybe a little refactor
drewp@bigasterisk.com
parents:
99
diff
changeset
|
2 |
8 | 3 from pyinfra import host |
4 from pyinfra.facts.files import FindInFile | |
12 | 5 from pyinfra.facts.server import Arch, LinuxDistribution |
6 from pyinfra.operations import files, server, systemd | |
8 | 7 |
8 is_pi = host.get_fact(LinuxDistribution)['name'] in ['Debian', 'Raspbian GNU/Linux'] | |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
9 |
40 | 10 # https://github.com/GoogleContainerTools/skaffold/releases |
269 | 11 skaffold_version = 'v2.10.1' |
40 | 12 |
8 | 13 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
14 def download_k3s(k3s_version): |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
15 tail = 'k3s' if host.get_fact(Arch) == 'x86_64' else 'k3s-armhf' |
282 | 16 if host.get_fact(Arch) == 'aarch64': |
17 tail = 'k3s-arm64' | |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
18 files.download( |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
19 src=f'https://github.com/rancher/k3s/releases/download/{k3s_version}/{tail}', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
20 dest='/usr/local/bin/k3s', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
21 user='root', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
22 group='root', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
23 mode='755', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
24 cache_time=43000, |
263 | 25 # force=True, # to get a new version |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
26 ) |
8 | 27 |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
28 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
29 def install_skaffold(): |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
30 files.download(src=f'https://storage.googleapis.com/skaffold/releases/{skaffold_version}/skaffold-linux-amd64', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
31 dest='/usr/local/bin/skaffold', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
32 user='root', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
33 group='root', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
34 mode='755', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
35 cache_time=1000) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
36 # one time; writes to $HOME |
220
416aa647efd9
registry hostname, add garage pi back to k8s
drewp@bigasterisk.com
parents:
204
diff
changeset
|
37 server.shell("skaffold config set --global insecure-registries reg:5000") |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
38 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
39 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
40 def pi_cgroup_setup(): |
278 | 41 ''' |
42 fixes this: | |
43 | |
44 Mar 29 23:47:11 ws-printer k3s[5999]: time="2024-03-29T23:47:11-07:00" level=fatal msg="failed to find memory cgroup (v2)" | |
45 ''' | |
46 return 'cmdline.txt lives on pipe now, not on the pi host' | |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
47 old_cmdline = host.get_fact(FindInFile, path='/boot/cmdline.txt', pattern=r'.*')[0] |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
48 if 'cgroup' not in old_cmdline: |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
49 cmdline = old_cmdline + ' cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory' |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
50 files.line(path='/boot/cmdline.txt', line='.*', replace=cmdline) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
51 # pi needs reboot now |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
52 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
53 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
54 def host_prep(): |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
55 server.sysctl(key='net.ipv4.ip_forward', value="1", persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
56 server.sysctl(key='net.ipv6.conf.all.forwarding', value="1", persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
57 server.sysctl(key='fs.inotify.max_user_instances', value='8192', persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
58 server.sysctl(key='fs.inotify.max_user_watches', value='524288', persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
59 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
60 # https://sysctl-explorer.net/net/ipv4/rp_filter/ |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
61 none, strict, loose = 0, 1, 2 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
62 server.sysctl(key='net.ipv4.conf.default.rp_filter', value=loose, persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
63 |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
64 if is_pi: |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
65 pi_cgroup_setup() |
8 | 66 |
268
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
67 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
68 # don't try to get aufs-dkms on rpi-- https://github.com/docker/for-linux/issues/709 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
69 def podman_insecure_registry(reg): |
222 | 70 files.template(src='templates/kube/podman_registries.conf.j2', dest='/etc/containers/registries.conf.d/reg.conf', reg=reg) |
240 | 71 systemd.service(service='podman', user_mode=True) |
72 systemd.service(service='podman.socket', user_mode=True) | |
73 | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
74 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
75 def config_and_run_service(k3s_version, server_node, server_ip): |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
76 download_k3s(k3s_version) |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
77 service_name = 'k3s.service' if host.name == server_node else 'k3s-node.service' |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
78 role = 'server' if host.name == server_node else 'agent' |
103
8b8ef9d8f0fd
dead code and templates, reformat, maybe a little refactor
drewp@bigasterisk.com
parents:
99
diff
changeset
|
79 which_conf = 'config-server.yaml.j2' if host.name == server_node else 'config-agent.yaml.j2' |
8 | 80 |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
81 # /var/lib/rancher/k3s/server/node-token is the source of the string in secrets/k3s_token, |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
82 # so this presumes a previous run |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
83 if host.name == server_node: |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
84 token = "ununsed" |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
85 else: |
267 | 86 # this assumes localhost is the k3s server. |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
87 if not os.path.exists('/var/lib/rancher/k3s/server/node-token'): |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
88 print("first pass is for server only- skipping other nodes") |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
89 return |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
90 token = open('/var/lib/rancher/k3s/server/node-token', 'rt').read().strip() |
8 | 91 files.template( |
28 | 92 src=f'templates/kube/{which_conf}', |
21
948d9d72267d
k3s update and some config refactoring
drewp@bigasterisk.com
parents:
19
diff
changeset
|
93 dest='/etc/k3s_config.yaml', |
112 | 94 server_ip=server_ip, |
28 | 95 token=token, |
96 wg_ip=host.host_data['wireguard_address'], | |
8 | 97 ) |
84
eb38553a6806
trying to fix k3s networking but this doesn't work yet
drewp@bigasterisk.com
parents:
80
diff
changeset
|
98 files.template( |
28 | 99 src='templates/kube/k3s.service.j2', |
100 dest=f'/etc/systemd/system/{service_name}', | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
101 role=role, |
8 | 102 ) |
268
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
103 if host.name in ['bang', 'garage']: |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
104 # no supported gpu |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
105 ''' |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
106 kubectl label --overwrite node bang nvidia.com/gpu.deploy.gpu-feature-discovery=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
107 kubectl label --overwrite node bang nvidia.com/gpu.deploy.container-toolkit=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
108 kubectl label --overwrite node bang nvidia.com/gpu.deploy.dcgm-exporter=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
109 kubectl label --overwrite node bang nvidia.com/gpu.deploy.device-plugin=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
110 kubectl label --overwrite node bang nvidia.com/gpu.deploy.driver=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
111 kubectl label --overwrite node bang nvidia.com/gpu.deploy.mig-manager=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
112 kubectl label --overwrite node bang nvidia.com/gpu.deploy.operator-validator=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
113 ''' |
28 | 114 systemd.service(service=service_name, daemon_reload=True, enabled=True, restarted=True) |
8 | 115 |
267 | 116 |
254 | 117 def setupNvidiaToolkit(): |
265 | 118 # guides: |
119 # https://github.com/NVIDIA/k8s-device-plugin#prerequisites | |
120 # https://docs.k3s.io/advanced#nvidia-container-runtime-support | |
121 # apply this once to kube-system: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.3/nvidia-device-plugin.yml | |
122 # apply this once: https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/nfd.yaml | |
123 # and: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/gpu-feature-discovery-daemonset.yaml | |
124 | |
125 # k3s says they do this: | |
126 #server.shell('nvidia-ctk runtime configure --runtime=containerd --config /var/lib/rancher/k3s/agent/etc/containerd/config.toml') | |
127 | |
254 | 128 # then caller restarts k3s which includes containerd |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
129 |
265 | 130 # tried https://github.com/k3s-io/k3s/discussions/9231#discussioncomment-8114243 |
131 pass | |
132 | |
267 | 133 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
134 def make_cluster( |
267 | 135 server_ip, |
136 server_node, | |
137 nodes, | |
138 admin_from, | |
139 # https://github.com/k3s-io/k3s/releases | |
140 # 1.23.6 per https://github.com/cilium/cilium/issues/20331 | |
141 k3s_version, | |
142 ): | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
143 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
144 if host.name in nodes + [server_node]: |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
145 host_prep() |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
146 files.directory(path='/etc/rancher/k3s') |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
147 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
148 # docs: https://rancher.com/docs/k3s/latest/en/installation/private-registry/ |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
149 # user confusions: https://github.com/rancher/k3s/issues/1802 |
220
416aa647efd9
registry hostname, add garage pi back to k8s
drewp@bigasterisk.com
parents:
204
diff
changeset
|
150 files.template(src='templates/kube/registries.yaml.j2', dest='/etc/rancher/k3s/registries.yaml', reg='reg:5000') |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
151 # also note that podman dropped the default `docker.io/` prefix on image names (see https://unix.stackexchange.com/a/701785/419418) |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
152 config_and_run_service(k3s_version, server_node, server_ip) |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
153 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
154 if host.name in admin_from: |
260 | 155 podman_insecure_registry(reg='reg:5000') |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
156 files.directory(path='/etc/rancher/k3s') |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
157 install_skaffold() |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
158 files.link(path='/usr/local/bin/kubectl', target='/usr/local/bin/k3s') |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
159 files.directory(path='/home/drewp/.kube', user='drewp', group='drewp') |
8 | 160 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
161 # assumes our pyinfra process is running on server_node |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
162 files.put( |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
163 src='/etc/rancher/k3s/k3s.yaml', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
164 dest='/etc/rancher/k3s/k3s.yaml', # |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
165 user='root', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
166 group='drewp', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
167 mode='640') |
267 | 168 server.shell( |
169 f"kubectl config set-cluster default --server=https://{server_ip}:6443 --kubeconfig=/etc/rancher/k3s/k3s.yaml") | |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
170 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
171 |
267 | 172 make_cluster(server_ip="10.5.0.7", |
173 server_node='ditto', | |
287 | 174 nodes=[ |
175 'bang', | |
176 'slash', | |
177 'dash', | |
178 'ws-printer', | |
179 # 'gn-music', | |
180 'li-drums', | |
181 ], | |
267 | 182 admin_from=['bang', 'slash', 'dash', 'ditto'], |
183 k3s_version='v1.29.1+k3s1') | |
287 | 184 |
185 # consider https://github.com/derailed/k9s/releases/download/v0.32.4/k9s_Linux_amd64.tar.gz |