I time sliced my gpu into 4 replicas.
microk8s kubectl describe node sigmind-survey | grep -A8 Capacity
Capacity:
cpu: 8
ephemeral-storage: 459850824Ki
hugepages-1Gi: 0
hugepages-2Mi: 0
memory: 32813884Ki
nvidia.com/gpu: 4
pods: 110
Allocatable:
But when I am trying to deploy this yaml I am getting the error.
.yaml
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
global: {ngcImagePullSecretName: ""}
nvcf:
dockerRegSecrets: []
additionalSecrets: []
localStorageProvisioner: []
vss:
applicationSpecs:
vss-deployment:
containers:
vss:
env:
- name: FRONTEND_PORT
value: '9000'
- name: BACKEND_PORT
value: '8000'
- name: GRAPH_DB_URI
value: bolt://neo-4-j-service:7687
- name: GRAPH_DB_USERNAME
value: neo4j
- name: GRAPH_DB_PASSWORD
value: password
- name: MILVUS_DB_HOST
value: milvus-milvus-deployment-milvus-service
- name: MILVUS_DB_PORT
value: '19530'
- name: VLM_MODEL_TO_USE
# value: vila-1.5
value: openai-compat
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: openai-api-key-secret
key: OPENAI_API_KEY
# - name: MODEL_PATH
# value: ngc:nim/nvidia/vila-1.5-40b:vila-yi-34b-siglip-stage3_1003_video_v8
- name: DISABLE_GUARDRAILS
value: 'false'
- name: OPENAI_API_KEY_NAME
value: VSS_OPENAI_API_KEY
- name: NVIDIA_API_KEY_NAME
value: VSS_NVIDIA_API_KEY
- name: NGC_API_KEY_NAME
value: VSS_NGC_API_KEY
- name: TRT_LLM_MODE
value: int4_awq
- name: VLM_BATCH_SIZE
value: ''
- name: VIA_VLM_OPENAI_MODEL_DEPLOYMENT_NAME
value: ''
- name: VIA_VLM_ENDPOINT
value: ''
- name: VIA_VLM_API_KEY
value: ''
- name: OPENAI_API_VERSION
value: ''
- name: AZURE_OPENAI_API_VERSION
value: ''
# - name: NVIDIA_VISIBLE_DEVICES
# value: "0"
initContainers:
- command:
- sh
- -c
- until nc -z -w 2 milvus-milvus-deployment-milvus-service 19530; do echo
waiting for milvus; sleep 2; done
image: busybox:1.28
imagePullPolicy: IfNotPresent
name: check-milvus-up
- command:
- sh
- -c
- until nc -z -w 2 neo-4-j-service 7687; do echo waiting for neo4j; sleep
2; done
image: busybox:1.28
imagePullPolicy: IfNotPresent
name: check-neo4j-up
- args:
- "while ! curl -s -f -o /dev/null http://llm-nim-svc:8000/v1/health/live;\
\ do\n echo \"Waiting for LLM...\"\n sleep 2\ndone\n"
command:
- sh
- -c
image: curlimages/curl:latest
name: check-llm-up
llmModel: meta/llama-3.1-8b-instruct
llmModelChat: meta/llama-3.1-8b-instruct
resources:
limits:
nvidia.com/gpu: 1
# vlmModelPath: ngc:nim/nvidia/vila-1.5-40b:vila-yi-34b-siglip-stage3_1003_video_v8
# vlmModelType: vila-1.5
configs:
ca_rag_config.yaml:
chat:
embedding:
base_url: http://nemo-embedding-embedding-deployment-embedding-service:8000/v1
llm:
base_url: http://llm-nim-svc:8000/v1
model: meta/llama-3.1-8b-instruct
reranker:
base_url: http://nemo-rerank-ranking-deployment-ranking-service:8000/v1
summarization:
embedding:
base_url: http://nemo-embedding-embedding-deployment-embedding-service:8000/v1
llm:
base_url: http://llm-nim-svc:8000/v1
model: meta/llama-3.1-8b-instruct
guardrails_config.yaml:
models:
- engine: nim
model: meta/llama-3.1-8b-instruct
parameters:
base_url: http://llm-nim-svc:8000/v1
type: main
- engine: nim_patch
model: nvidia/llama-3.2-nv-embedqa-1b-v2
parameters:
base_url: http://nemo-embedding-embedding-deployment-embedding-service:8000/v1
type: embeddings
extraPodVolumes:
- name: secret-ngc-api-key-volume
secret:
secretName: ngc-api-key-secret
items:
- key: NGC_API_KEY
path: ngc-api-key
- name: secret-graph-db-username-volume
secret:
secretName: graph-db-creds-secret
items:
- key: username
path: graph-db-username
- name: secret-graph-db-password-volume
secret:
secretName: graph-db-creds-secret
items:
- key: password
path: graph-db-password
extraPodVolumeMounts:
- name: secret-ngc-api-key-volume
mountPath: /secrets/ngc-api-key
subPath: ngc-api-key
readOnly: true
- name: secret-graph-db-username-volume
mountPath: /secrets/graph-db-username
subPath: graph-db-username
readOnly: true
- name: secret-graph-db-password-volume
mountPath: /secrets/graph-db-password
subPath: graph-db-password
readOnly: true
egress:
milvus:
address: milvus-milvus-deployment-milvus-service
port: 19530
neo4j-bolt:
address: neo-4-j-service
port: 7687
llm-openai-api:
address: llm-nim-svc
port: 8000
nemo-embed:
address: nemo-embedding-embedding-deployment-embedding-service
port: 8000
nemo-rerank:
address: nemo-rerank-ranking-deployment-ranking-service
port: 8000
milvus:
applicationSpecs:
milvus-deployment:
containers:
milvus-container:
env:
- name: ETCD_ENDPOINTS
value: etcd-etcd-deployment-etcd-service:2379
- name: MINIO_ADDRESS
value: minio-minio-deployment-minio-service:9010
- name: KNOWHERE_GPU_MEM_POOL_SIZE
value: 2048;4096
egress:
etcd:
address: etcd-etcd-deployment-etcd-service
port: 2379
minio:
address: minio-minio-deployment-minio-service
port: 9010
neo4j:
extraPodVolumes:
- name: secret-db-username-volume
secret:
secretName: graph-db-creds-secret
items:
- key: username
path: db-username
- name: secret-db-password-volume
secret:
secretName: graph-db-creds-secret
items:
- key: password
path: db-password
extraPodVolumeMounts:
- name: secret-db-username-volume
mountPath: /secrets/db-username
subPath: db-username
readOnly: true
- name: secret-db-password-volume
mountPath: /secrets/db-password
subPath: db-password
readOnly: true
nim-llm:
# env:
# - name: NVIDIA_VISIBLE_DEVICES
# value: "0"
image:
repository: nvcr.io/nim/meta/llama-3.1-8b-instruct
tag: 1.3.0
resources:
limits:
nvidia.com/gpu: 1
model:
name: meta/llama-3.1-8b-instruct
ngcAPISecret: ngc-api-key-secret
persistence:
enabled: true
hostPath:
enabled: true
service:
name: llm-nim-svc
llmModel: meta/llama-3.1-8b-instruct
nemo-embedding:
# env:
# - name: NVIDIA_VISIBLE_DEVICES
# value: "0"
resources:
limits:
nvidia.com/gpu: 1
nemo-rerank:
# env:
# - name: NVIDIA_VISIBLE_DEVICES
# value: "0"
resources:
limits:
nvidia.com/gpu: 1
Error:
The use of this model is governed by the NVIDIA AI Foundation Models Community License Agreement (found at https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-ai-foundation-models-community-license-agreement/).
Third Party Software Attributions and Licenses can be found under /opt/nim/NOTICE
WARNING: The NVIDIA Driver was not detected. GPU functionality will not be available.
Use the NVIDIA Container Toolkit to start this container with GPU support; see
https://docs.nvidia.com/datacenter/cloud-native/ .
libnvidia-ml.so.1 not found under /usr.
containerd-template.toml
version = 2
oom_score = 0
[grpc]
uid = 0
gid = 0
max_recv_message_size = 16777216
max_send_message_size = 16777216
[debug]
address = ""
uid = 0
gid = 0
[metrics]
address = "127.0.0.1:1338"
grpc_histogram = false
[cgroup]
path = ""
[plugins."io.containerd.grpc.v1.cri"]
stream_server_address = "127.0.0.1"
stream_server_port = "0"
enable_selinux = false
sandbox_image = "registry.k8s.io/pause:3.7"
stats_collect_period = 10
enable_tls_streaming = false
max_container_log_line_size = 16384
[plugins."io.containerd.grpc.v1.cri".containerd]
snapshotter = "${SNAPSHOTTER}"
no_pivot = false
default_runtime_name = "${RUNTIME}"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "${RUNTIME_TYPE}"
# Original NVIDIA runtime configuration
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime]
runtime_type = "${RUNTIME_TYPE}"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia-container-runtime.options]
BinaryName = "nvidia-container-runtime"
# Added new NVIDIA runtime configuration that matches what the pods expect
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
runtime_type = "${RUNTIME_TYPE}"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "nvidia-container-runtime"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
runtime_type = "io.containerd.kata.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata.options]
BinaryName = "kata-runtime"
[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "${SNAP_DATA}/opt/cni/bin"
conf_dir = "${SNAP_DATA}/args/cni-network"
[plugins."io.containerd.grpc.v1.cri".registry]
config_path = "${SNAP_DATA}/args/certs.d"