Hi,
I followed the instruction regarding deploying VSS on AWS cloud nodes and came up with this config.yml file:
schema_version: '0.0.9'
name: "via-aws-cns-{{ lookup('env', 'VIA_DEPLOY_ENV') }}"
spec:
infra:
csp: 'aws'
backend:
access_key: "{{ lookup('env', 'AWS_ACCESS_KEY_ID') }}"
secret_key: "{{ lookup('env', 'AWS_SECRET_ACCESS_KEY') }}"
dynamodb_table: "{{ lookup('env', 'VIA_DEPLOY_AWS_DYT') }}"
bucket: "{{ lookup('env', 'VIA_DEPLOY_AWS_S3B') }}"
region: "{{ lookup('env', 'VIA_DEPLOY_AWS_S3BR') }}"
encrypt: true
provider:
access_key: "{{ lookup('env', 'AWS_ACCESS_KEY_ID') }}"
secret_key: "{{ lookup('env', 'AWS_SECRET_ACCESS_KEY') }}"
configs:
cns:
version: 12.2
git_ref: 4d97cb7e8ca6e45fe9252888b7a918b2677f1fc9
override_values:
cns_nvidia_driver: yes
gpu_driver_version: '535.216.03'
access_cidrs:
- '99.79.65.21/32'
region: 'ca-central-1'
ssh_public_key: "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/id_rsa.pub') }}"
ssh_private_key_path: "{{ lookup('env', 'HOME') + '/.ssh/id_rsa' }}"
additional_ssh_public_keys:
- "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/my-colleague-1.pub') }}"
clusters:
app:
private_instance: false
master:
type: 'p5.48xlarge'
az: 'ca-central-1c'
labels: {}
taints: []
capacity_reservation_id: 'cr-3b7e4c9f1a6d8e2b'
# nodes:
# A100:
# type: 'p4d.24xlarge'
# az: 'ca-central-1'
# labels: {}
# taints: []
# capacity_reservation_id: 'cr-foobar'
# L40S:
# type: 'g6e.48xlarge'
# az: 'ca-central-1'
# labels: {}
# taints: []
ports:
backend:
port: 30081
frontend:
port: 30082
features:
cns: true
platform: true
app: true
platform:
configs:
namespace: 'default'
app:
configs:
namespace: 'default'
backend_port: 'backend'
frontend_port: 'frontend'
ngc_api_key: "{{ lookup('env', 'NGC_API_KEY') }}"
openai_api_key: "{{ lookup('env', 'OPENAI_API_KEY') }}"
db_username: 'neo4j'
db_password: "{{ lookup('env', 'VIA_DB_PASSWORD') | default('password') }}"
vss_chart:
repo:
name: 'nvidia-blueprint'
url: 'https://helm.ngc.nvidia.com/nvidia/blueprint'
chart: 'nvidia-blueprint-vss'
version: '2.1.0'
# override_values_file_absolute_path: '/home/nvidia/aws/dist/override.yaml'
All env variables are set properly, but I couldn’t find the override.yml file to uncomment the related line, so I kept it commented. I get this error every time I try to use the config to deploy the VSS, and after each time I remove the tmp folder.