I am trying to use Nvidia Agent Studio on Jetson Orin Nano.
When i drag and drop the nano llm block, the following error is raised:
14:29:55 | ERROR | Exception occurred handling websocket message: | 1/327 [00:02<14:21, 2.64s/tensors]
{ 'add_plugin': { 'api': 'MLC',
'chat_template': 'auto',
'drop_inputs': 'false',
'layout_node': {'x': 10, 'y': 10},
'model': 'princeton-nlp/Sheared-LLaMA-2.7B-ShareGPT',
'name': 'NanoLLM',
'quantization': 'q4f16_ft',
'type': 'NanoLLM'}}
Traceback (most recent call last):
File "/opt/NanoLLM/nano_llm/web/server.py", line 193, in on_message
callback(payload, payload_size=payload_size, msg_type=msg_type, msg_id=msg_id,
File "/opt/NanoLLM/nano_llm/agents/dynamic_agent.py", line 442, in on_websocket
on_message(self, message)
File "/opt/NanoLLM/nano_llm/agents/dynamic_agent.py", line 432, in on_message
if invoke_handler(obj, key, msg):
File "/opt/NanoLLM/nano_llm/agents/dynamic_agent.py", line 414, in invoke_handler
response = func(**msg)
File "/opt/NanoLLM/nano_llm/agents/dynamic_agent.py", line 58, in add_plugin
threading.Thread(target=self.add_plugin, kwargs={'type': type, 'wait': True, 'state_dict': state_dict, 'layout_node': layout_node, **kwargs}).run()
File "/usr/lib/python3.10/threading.py", line 953, in run
self._target(*self._args, **self._kwargs)
File "/opt/NanoLLM/nano_llm/agents/dynamic_agent.py", line 65, in add_plugin
plugin = DynamicPlugin(type, **init_kwargs)
File "/opt/NanoLLM/nano_llm/plugins/dynamic_plugin.py", line 35, in __new__
instance = plugin(*args, **kwargs)
File "/opt/NanoLLM/nano_llm/plugins/llm/nano_llm.py", line 62, in __init__
self.model = NanoModel.from_pretrained(
File "/opt/NanoLLM/nano_llm/nano_llm.py", line 91, in from_pretrained
model = MLCModel(model_path, **kwargs)
File "/opt/NanoLLM/nano_llm/models/mlc.py", line 60, in __init__
quant = MLCModel.quantize(self.model_path, self.config, method=quantization, max_context_len=max_context_len, **kwargs)
File "/opt/NanoLLM/nano_llm/models/mlc.py", line 276, in quantize
subprocess.run(cmd, executable='/bin/bash', shell=True, check=True)
File "/usr/lib/python3.10/subprocess.py", line 526, in run
raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command 'python3 -m mlc_llm.build --model /data/models/mlc/dist/models/Sheared-LLaMA-2.7B-ShareGPT --quantization q4f16_ft --target cuda --use-cuda-graph --use-flash-attn-mqa --sep-embed --max-seq-len 4096 --artifact-path /data/models/mlc/dist/Sheared-LLaMA-2.7B-ShareGPT/ctx4096 ' died with <Signals.SIGKILL: 9>.
Any ideea what I am doing wrong?