update
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
---
|
||||
dependencies:
|
||||
- role: kubernetes-apps/container_engine_accelerator/nvidia_gpu
|
||||
when: nvidia_accelerator_enabled
|
||||
tags:
|
||||
- apps
|
||||
- nvidia_gpu
|
||||
- container_engine_accelerator
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
nvidia_accelerator_enabled: false
|
||||
nvidia_driver_version: "390.87"
|
||||
nvidia_gpu_tesla_base_url: https://us.download.nvidia.com/tesla/
|
||||
nvidia_gpu_gtx_base_url: http://us.download.nvidia.com/XFree86/Linux-x86_64/
|
||||
nvidia_gpu_flavor: tesla
|
||||
nvidia_url_end: "{{ nvidia_driver_version }}/NVIDIA-Linux-x86_64-{{ nvidia_driver_version }}.run"
|
||||
nvidia_driver_install_container: false
|
||||
nvidia_driver_install_centos_container: atzedevries/nvidia-centos-driver-installer:2
|
||||
nvidia_driver_install_ubuntu_container: registry.k8s.io/ubuntu-nvidia-driver-installer@sha256:7df76a0f0a17294e86f691c81de6bbb7c04a1b4b3d4ea4e7e2cccdc42e1f6d63
|
||||
nvidia_driver_install_supported: false
|
||||
nvidia_gpu_device_plugin_container: "registry.k8s.io/nvidia-gpu-device-plugin@sha256:0842734032018be107fa2490c98156992911e3e1f2a21e059ff0105b07dd8e9e"
|
||||
nvidia_gpu_nodes: []
|
||||
nvidia_gpu_device_plugin_memory: 30Mi
|
||||
@@ -0,0 +1,55 @@
|
||||
---
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU| gather os specific variables
|
||||
include_vars: "{{ item }}"
|
||||
with_first_found:
|
||||
- files:
|
||||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_version|lower|replace('/', '_') }}.yml"
|
||||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_release }}.yml"
|
||||
- "{{ ansible_distribution|lower }}-{{ ansible_distribution_major_version|lower|replace('/', '_') }}.yml"
|
||||
- "{{ ansible_distribution|lower }}.yml"
|
||||
- "{{ ansible_os_family|lower }}.yml"
|
||||
skip: true
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU | Set fact of download url Tesla
|
||||
set_fact:
|
||||
nvidia_driver_download_url_default: "{{ nvidia_gpu_tesla_base_url }}{{ nvidia_url_end }}"
|
||||
when: nvidia_gpu_flavor|lower == "tesla"
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU | Set fact of download url GTX
|
||||
set_fact:
|
||||
nvidia_driver_download_url_default: "{{ nvidia_gpu_gtx_base_url }}{{ nvidia_url_end }}"
|
||||
when: nvidia_gpu_flavor|lower == "gtx"
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU | Create addon dir
|
||||
file:
|
||||
path: "{{ kube_config_dir }}/addons/container_engine_accelerator"
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0755
|
||||
recurse: true
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU | Create manifests for nvidia accelerators
|
||||
template:
|
||||
src: "{{ item.file }}.j2"
|
||||
dest: "{{ kube_config_dir }}/addons/container_engine_accelerator/{{ item.file }}"
|
||||
mode: 0644
|
||||
with_items:
|
||||
- { name: nvidia-driver-install-daemonset, file: nvidia-driver-install-daemonset.yml, type: daemonset }
|
||||
- { name: k8s-device-plugin-nvidia-daemonset, file: k8s-device-plugin-nvidia-daemonset.yml, type: daemonset }
|
||||
register: container_engine_accelerator_manifests
|
||||
when:
|
||||
- inventory_hostname == groups['kube_control_plane'][0] and nvidia_driver_install_container
|
||||
|
||||
- name: Container Engine Acceleration Nvidia GPU | Apply manifests for nvidia accelerators
|
||||
kube:
|
||||
name: "{{ item.item.name }}"
|
||||
namespace: "kube-system"
|
||||
kubectl: "{{ bin_dir }}/kubectl"
|
||||
resource: "{{ item.item.type }}"
|
||||
filename: "{{ kube_config_dir }}/addons/container_engine_accelerator/{{ item.item.file }}"
|
||||
state: "latest"
|
||||
with_items:
|
||||
- "{{ container_engine_accelerator_manifests.results }}"
|
||||
when:
|
||||
- inventory_hostname == groups['kube_control_plane'][0] and nvidia_driver_install_container and nvidia_driver_install_supported
|
||||
@@ -0,0 +1,60 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-gpu-device-plugin
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: nvidia-gpu-device-plugin
|
||||
addonmanager.kubernetes.io/mode: Reconcile
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: nvidia-gpu-device-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: nvidia-gpu-device-plugin
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: "nvidia.com/gpu"
|
||||
operator: Exists
|
||||
tolerations:
|
||||
- operator: "Exists"
|
||||
effect: "NoExecute"
|
||||
- operator: "Exists"
|
||||
effect: "NoSchedule"
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
hostPID: true
|
||||
volumes:
|
||||
- name: device-plugin
|
||||
hostPath:
|
||||
path: /var/lib/kubelet/device-plugins
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
containers:
|
||||
- image: "{{ nvidia_gpu_device_plugin_container }}"
|
||||
command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"]
|
||||
name: nvidia-gpu-device-plugin
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: {{ nvidia_gpu_device_plugin_memory }}
|
||||
limits:
|
||||
cpu: 50m
|
||||
memory: {{ nvidia_gpu_device_plugin_memory }}
|
||||
securityContext:
|
||||
privileged: true
|
||||
volumeMounts:
|
||||
- name: device-plugin
|
||||
mountPath: /device-plugin
|
||||
- name: dev
|
||||
mountPath: /dev
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
@@ -0,0 +1,82 @@
|
||||
# Copyright 2017 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: nvidia-driver-installer
|
||||
namespace: kube-system
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: nvidia-driver-installer
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: nvidia-driver-installer
|
||||
spec:
|
||||
priorityClassName: system-node-critical
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: "nvidia.com/gpu"
|
||||
operator: Exists
|
||||
tolerations:
|
||||
- key: "nvidia.com/gpu"
|
||||
effect: "NoSchedule"
|
||||
operator: "Exists"
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
hostPID: true
|
||||
volumes:
|
||||
- name: dev
|
||||
hostPath:
|
||||
path: /dev
|
||||
- name: nvidia-install-dir-host
|
||||
hostPath:
|
||||
path: /home/kubernetes/bin/nvidia
|
||||
- name: root-mount
|
||||
hostPath:
|
||||
path: /
|
||||
initContainers:
|
||||
- image: "{{ nvidia_driver_install_container }}"
|
||||
name: nvidia-driver-installer
|
||||
resources:
|
||||
requests:
|
||||
cpu: 0.15
|
||||
securityContext:
|
||||
privileged: true
|
||||
env:
|
||||
- name: NVIDIA_INSTALL_DIR_HOST
|
||||
value: /home/kubernetes/bin/nvidia
|
||||
- name: NVIDIA_INSTALL_DIR_CONTAINER
|
||||
value: /usr/local/nvidia
|
||||
- name: ROOT_MOUNT_DIR
|
||||
value: /root
|
||||
- name: NVIDIA_DRIVER_VERSION
|
||||
value: "{{ nvidia_driver_version }}"
|
||||
- name: NVIDIA_DRIVER_DOWNLOAD_URL
|
||||
value: "{{ nvidia_driver_download_url_default }}"
|
||||
volumeMounts:
|
||||
- name: nvidia-install-dir-host
|
||||
mountPath: /usr/local/nvidia
|
||||
- name: dev
|
||||
mountPath: /dev
|
||||
- name: root-mount
|
||||
mountPath: /root
|
||||
containers:
|
||||
- image: "{{ pod_infra_image_repo }}:{{ pod_infra_image_tag }}"
|
||||
name: pause
|
||||
@@ -0,0 +1,3 @@
|
||||
---
|
||||
nvidia_driver_install_container: "{{ nvidia_driver_install_centos_container }}"
|
||||
nvidia_driver_install_supported: true
|
||||
@@ -0,0 +1,3 @@
|
||||
---
|
||||
nvidia_driver_install_container: "{{ nvidia_driver_install_ubuntu_container }}"
|
||||
nvidia_driver_install_supported: true
|
||||
@@ -0,0 +1,3 @@
|
||||
---
|
||||
nvidia_driver_install_container: "{{ nvidia_driver_install_ubuntu_container }}"
|
||||
nvidia_driver_install_supported: true
|
||||
Reference in New Issue
Block a user