changeset 254:11b738d4c4ae

work on k8s/nvidia setup
author drewp@bigasterisk.com
date Thu, 25 Jan 2024 18:50:44 -0800
parents 67fb7b27bfea
children 3f57cb70d592
files apt.py kube.py package_lists.py packages.py templates/sources.list.j2
diffstat 5 files changed, 17 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/apt.py	Tue Jan 09 19:56:01 2024 -0800
+++ b/apt.py	Thu Jan 25 18:50:44 2024 -0800
@@ -31,6 +31,7 @@
             ('https://ftp-master.debian.org/keys/archive-key-11-security.asc', 'bullseye-security.gpg'),
             ('https://packages.cloud.google.com/apt/doc/apt-key.gpg', 'coral.gpg'),
             ('https://hub.unity3d.com/linux/keys/public', 'unityhub.gpg'),
+            ('https://nvidia.github.io/libnvidia-container/gpgkey', 'nvidia.gpg'),
         ]
     ])
     if host.get_fact(Arch) == 'armv7l' or host.name == 'bang':  # I mean raspbian/debian
--- a/kube.py	Tue Jan 09 19:56:01 2024 -0800
+++ b/kube.py	Thu Jan 25 18:50:44 2024 -0800
@@ -90,8 +90,13 @@
         dest=f'/etc/systemd/system/{service_name}',
         role=role,
     )
+    if host.name in ['dash', 'bang', 'ditto']:
+        setupNvidiaToolkit()
     systemd.service(service=service_name, daemon_reload=True, enabled=True, restarted=True)
 
+def setupNvidiaToolkit():
+    server.shell('nvidia-ctk runtime configure --runtime=containerd --config /var/lib/rancher/k3s/agent/etc/containerd/config.toml')
+    # then caller restarts k3s which includes containerd
 
 def make_cluster(
         server_ip = "10.5.0.1",
--- a/package_lists.py	Tue Jan 09 19:56:01 2024 -0800
+++ b/package_lists.py	Thu Jan 25 18:50:44 2024 -0800
@@ -306,3 +306,6 @@
     'screen-resolution-extra',
     'xserver-xorg',
 ]
+k8s_node_with_nvidia_gpu = [
+    'nvidia-container-toolkit',
+    ]
--- a/packages.py	Tue Jan 09 19:56:01 2024 -0800
+++ b/packages.py	Thu Jan 25 18:50:44 2024 -0800
@@ -82,6 +82,9 @@
 if host.name == 'plus':
     apt.packages(packages=package_lists.laptop, **kw)
 
+if host.name in ['dash', 'bang', 'ditto']:
+    apt.packages(packages=package_lists.k8s_node_with_nvidia_gpu, **kw)
+
 
 if not is_pi:
     apt.packages(packages=package_lists.non_pi, **kw)
--- a/templates/sources.list.j2	Tue Jan 09 19:56:01 2024 -0800
+++ b/templates/sources.list.j2	Thu Jan 25 18:50:44 2024 -0800
@@ -11,6 +11,11 @@
 deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_18.x nodistro main
 {% endif %}
 
+# k8s node with nvidia gpu
+{% if host.name in ['dash', 'bang', 'ditto'] %}
+deb [signed-by=/etc/apt/keyrings/nvidia.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/$(ARCH) /
+{% endif %}
+
 {% if host.name in ['ditto'] %}
 deb [signed-by=/etc/apt/keyrings/coral.gpg] https://packages.cloud.google.com/apt coral-edgetpu-stable main
 {% endif %}