Mercurial > code > home > repos > infra
annotate kube.py @ 269:665a199f7c8a
upstream
author | drewp@bigasterisk.com |
---|---|
date | Wed, 14 Feb 2024 18:48:27 -0800 |
parents | 34ab4aec7d4b |
children | 4e424a144183 |
rev | line source |
---|---|
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
1 import os |
103
8b8ef9d8f0fd
dead code and templates, reformat, maybe a little refactor
drewp@bigasterisk.com
parents:
99
diff
changeset
|
2 |
8 | 3 from pyinfra import host |
4 from pyinfra.facts.files import FindInFile | |
12 | 5 from pyinfra.facts.server import Arch, LinuxDistribution |
6 from pyinfra.operations import files, server, systemd | |
8 | 7 |
8 is_pi = host.get_fact(LinuxDistribution)['name'] in ['Debian', 'Raspbian GNU/Linux'] | |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
9 |
40 | 10 # https://github.com/GoogleContainerTools/skaffold/releases |
269 | 11 skaffold_version = 'v2.10.1' |
40 | 12 |
8 | 13 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
14 def download_k3s(k3s_version): |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
15 tail = 'k3s' if host.get_fact(Arch) == 'x86_64' else 'k3s-armhf' |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
16 files.download( |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
17 src=f'https://github.com/rancher/k3s/releases/download/{k3s_version}/{tail}', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
18 dest='/usr/local/bin/k3s', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
19 user='root', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
20 group='root', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
21 mode='755', |
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
22 cache_time=43000, |
263 | 23 # force=True, # to get a new version |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
24 ) |
8 | 25 |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
26 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
27 def install_skaffold(): |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
28 files.download(src=f'https://storage.googleapis.com/skaffold/releases/{skaffold_version}/skaffold-linux-amd64', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
29 dest='/usr/local/bin/skaffold', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
30 user='root', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
31 group='root', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
32 mode='755', |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
33 cache_time=1000) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
34 # one time; writes to $HOME |
220
416aa647efd9
registry hostname, add garage pi back to k8s
drewp@bigasterisk.com
parents:
204
diff
changeset
|
35 server.shell("skaffold config set --global insecure-registries reg:5000") |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
36 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
37 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
38 def pi_cgroup_setup(): |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
39 old_cmdline = host.get_fact(FindInFile, path='/boot/cmdline.txt', pattern=r'.*')[0] |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
40 if 'cgroup' not in old_cmdline: |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
41 cmdline = old_cmdline + ' cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory' |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
42 files.line(path='/boot/cmdline.txt', line='.*', replace=cmdline) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
43 # pi needs reboot now |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
44 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
45 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
46 def host_prep(): |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
47 server.sysctl(key='net.ipv4.ip_forward', value="1", persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
48 server.sysctl(key='net.ipv6.conf.all.forwarding', value="1", persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
49 server.sysctl(key='fs.inotify.max_user_instances', value='8192', persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
50 server.sysctl(key='fs.inotify.max_user_watches', value='524288', persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
51 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
52 # https://sysctl-explorer.net/net/ipv4/rp_filter/ |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
53 none, strict, loose = 0, 1, 2 |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
54 server.sysctl(key='net.ipv4.conf.default.rp_filter', value=loose, persist=True) |
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
55 |
27
7b22ff272001
refactor (may not be a correct commit)
drewp@bigasterisk.com
parents:
21
diff
changeset
|
56 if is_pi: |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
57 pi_cgroup_setup() |
8 | 58 |
268
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
59 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
60 # don't try to get aufs-dkms on rpi-- https://github.com/docker/for-linux/issues/709 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
61 def podman_insecure_registry(reg): |
222 | 62 files.template(src='templates/kube/podman_registries.conf.j2', dest='/etc/containers/registries.conf.d/reg.conf', reg=reg) |
240 | 63 systemd.service(service='podman', user_mode=True) |
64 systemd.service(service='podman.socket', user_mode=True) | |
65 | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
66 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
67 def config_and_run_service(k3s_version, server_node, server_ip): |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
68 download_k3s(k3s_version) |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
69 service_name = 'k3s.service' if host.name == server_node else 'k3s-node.service' |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
70 role = 'server' if host.name == server_node else 'agent' |
103
8b8ef9d8f0fd
dead code and templates, reformat, maybe a little refactor
drewp@bigasterisk.com
parents:
99
diff
changeset
|
71 which_conf = 'config-server.yaml.j2' if host.name == server_node else 'config-agent.yaml.j2' |
8 | 72 |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
73 # /var/lib/rancher/k3s/server/node-token is the source of the string in secrets/k3s_token, |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
74 # so this presumes a previous run |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
75 if host.name == server_node: |
99
6e159d3bdd40
rewrite k3s to match current config. many tests lying around in comments.
drewp@bigasterisk.com
parents:
89
diff
changeset
|
76 token = "ununsed" |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
77 else: |
267 | 78 # this assumes localhost is the k3s server. |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
79 if not os.path.exists('/var/lib/rancher/k3s/server/node-token'): |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
80 print("first pass is for server only- skipping other nodes") |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
81 return |
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
82 token = open('/var/lib/rancher/k3s/server/node-token', 'rt').read().strip() |
8 | 83 files.template( |
28 | 84 src=f'templates/kube/{which_conf}', |
21
948d9d72267d
k3s update and some config refactoring
drewp@bigasterisk.com
parents:
19
diff
changeset
|
85 dest='/etc/k3s_config.yaml', |
112 | 86 server_ip=server_ip, |
28 | 87 token=token, |
88 wg_ip=host.host_data['wireguard_address'], | |
8 | 89 ) |
84
eb38553a6806
trying to fix k3s networking but this doesn't work yet
drewp@bigasterisk.com
parents:
80
diff
changeset
|
90 files.template( |
28 | 91 src='templates/kube/k3s.service.j2', |
92 dest=f'/etc/systemd/system/{service_name}', | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
93 role=role, |
8 | 94 ) |
268
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
95 if host.name in ['bang', 'garage']: |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
96 # no supported gpu |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
97 ''' |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
98 kubectl label --overwrite node bang nvidia.com/gpu.deploy.gpu-feature-discovery=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
99 kubectl label --overwrite node bang nvidia.com/gpu.deploy.container-toolkit=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
100 kubectl label --overwrite node bang nvidia.com/gpu.deploy.dcgm-exporter=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
101 kubectl label --overwrite node bang nvidia.com/gpu.deploy.device-plugin=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
102 kubectl label --overwrite node bang nvidia.com/gpu.deploy.driver=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
103 kubectl label --overwrite node bang nvidia.com/gpu.deploy.mig-manager=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
104 kubectl label --overwrite node bang nvidia.com/gpu.deploy.operator-validator=false |
34ab4aec7d4b
notes and changes for getting nvidia gpu k3d support going, which was very hard
drewp@bigasterisk.com
parents:
267
diff
changeset
|
105 ''' |
28 | 106 systemd.service(service=service_name, daemon_reload=True, enabled=True, restarted=True) |
8 | 107 |
267 | 108 |
254 | 109 def setupNvidiaToolkit(): |
265 | 110 # guides: |
111 # https://github.com/NVIDIA/k8s-device-plugin#prerequisites | |
112 # https://docs.k3s.io/advanced#nvidia-container-runtime-support | |
113 # apply this once to kube-system: https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.3/nvidia-device-plugin.yml | |
114 # apply this once: https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/nfd.yaml | |
115 # and: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.2/deployments/static/gpu-feature-discovery-daemonset.yaml | |
116 | |
117 # k3s says they do this: | |
118 #server.shell('nvidia-ctk runtime configure --runtime=containerd --config /var/lib/rancher/k3s/agent/etc/containerd/config.toml') | |
119 | |
254 | 120 # then caller restarts k3s which includes containerd |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
121 |
265 | 122 # tried https://github.com/k3s-io/k3s/discussions/9231#discussioncomment-8114243 |
123 pass | |
124 | |
267 | 125 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
126 def make_cluster( |
267 | 127 server_ip, |
128 server_node, | |
129 nodes, | |
130 admin_from, | |
131 # https://github.com/k3s-io/k3s/releases | |
132 # 1.23.6 per https://github.com/cilium/cilium/issues/20331 | |
133 k3s_version, | |
134 ): | |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
135 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
136 if host.name in nodes + [server_node]: |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
137 host_prep() |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
138 files.directory(path='/etc/rancher/k3s') |
89
2fddde57231b
no connman to surprisingly rewrite net configs
drewp@bigasterisk.com
parents:
84
diff
changeset
|
139 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
140 # docs: https://rancher.com/docs/k3s/latest/en/installation/private-registry/ |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
141 # user confusions: https://github.com/rancher/k3s/issues/1802 |
220
416aa647efd9
registry hostname, add garage pi back to k8s
drewp@bigasterisk.com
parents:
204
diff
changeset
|
142 files.template(src='templates/kube/registries.yaml.j2', dest='/etc/rancher/k3s/registries.yaml', reg='reg:5000') |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
143 # also note that podman dropped the default `docker.io/` prefix on image names (see https://unix.stackexchange.com/a/701785/419418) |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
144 config_and_run_service(k3s_version, server_node, server_ip) |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
145 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
146 if host.name in admin_from: |
260 | 147 podman_insecure_registry(reg='reg:5000') |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
148 files.directory(path='/etc/rancher/k3s') |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
149 install_skaffold() |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
150 files.link(path='/usr/local/bin/kubectl', target='/usr/local/bin/k3s') |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
151 files.directory(path='/home/drewp/.kube', user='drewp', group='drewp') |
8 | 152 |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
153 # assumes our pyinfra process is running on server_node |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
154 files.put( |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
155 src='/etc/rancher/k3s/k3s.yaml', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
156 dest='/etc/rancher/k3s/k3s.yaml', # |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
157 user='root', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
158 group='drewp', |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
159 mode='640') |
267 | 160 server.shell( |
161 f"kubectl config set-cluster default --server=https://{server_ip}:6443 --kubeconfig=/etc/rancher/k3s/k3s.yaml") | |
204
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
162 |
ad6e997fd323
prepare for k8s server host and registry host to change
drewp@bigasterisk.com
parents:
203
diff
changeset
|
163 |
267 | 164 make_cluster(server_ip="10.5.0.7", |
165 server_node='ditto', | |
166 nodes=['slash', 'dash', 'bang'], | |
167 admin_from=['bang', 'slash', 'dash', 'ditto'], | |
168 k3s_version='v1.29.1+k3s1') |