when I test it ,it’s wrong display this:
python3 -m local_llm.agents.video_query --api=mlc --verbose
–model /data/models/text-generation-webui/llava-v1.5-7b
–max-new-tokens 32
–video-input /dev/video1
–video-output webrtc://@:8554/output
–prompt “Describe the image concisely.”
/usr/local/lib/python3.10/dist-packages/transformers/utils/hub.py:124: FutureWarning: Using TRANSFORMERS_CACHE
is deprecated and will be removed in v5 of Transformers. Use HF_HOME
instead.
warnings.warn(
10:57:10 | DEBUG | Namespace(model=‘/data/models/text-generation-webui/llava-v1.5-7b’, quant=None, api=‘mlc’, vision_model=None, prompt=[‘Describe the image concisely.’], save_mermaid=None, chat_template=None, system_prompt=None, max_new_tokens=32, min_new_tokens=-1, do_sample=False, temperature=0.7, top_p=0.95, repetition_penalty=1.0, video_input=‘/dev/video1’, video_input_width=None, video_input_height=None, video_input_codec=None, video_input_framerate=None, video_input_save=None, video_output=‘webrtc://@:8554/output’, video_output_codec=None, video_output_bitrate=None, video_output_save=None, log_level=‘debug’, debug=True)
10:57:10 | DEBUG | subprocess 108 started
10:57:10 | INFO | loading /data/models/text-generation-webui/llava-v1.5-7b with MLC
Process Process-1:
Traceback (most recent call last):
File “/usr/lib/python3.10/runpy.py”, line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File “/usr/lib/python3.10/runpy.py”, line 86, in _run_code
exec(code, run_globals)
File “/opt/local_llm/local_llm/agents/video_query.py”, line 128, in
agent = VideoQuery(**vars(args)).run()
File “/opt/local_llm/local_llm/agents/video_query.py”, line 23, in init
self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
File “/opt/local_llm/local_llm/plugins/process_proxy.py”, line 31, in init
raise RuntimeError(f"subprocess has an invalid initialization status ({init_msg[‘status’]})")
RuntimeError: subprocess has an invalid initialization status (<class ‘tvm._ffi.base.TVMError’>)
Traceback (most recent call last):
File “/usr/lib/python3.10/multiprocessing/process.py”, line 314, in _bootstrap
self.run()
File “/usr/lib/python3.10/multiprocessing/process.py”, line 108, in run
self._target(*self._args, **self._kwargs)
File “/opt/local_llm/local_llm/plugins/process_proxy.py”, line 62, in run_process
raise error
File “/opt/local_llm/local_llm/plugins/process_proxy.py”, line 59, in run_process
plugin = factory(**kwargs)
File “/opt/local_llm/local_llm/agents/video_query.py”, line 23, in
self.llm = ProcessProxy((lambda **kwargs: ChatQuery(model, drop_inputs=True, **kwargs)), **kwargs)
File “/opt/local_llm/local_llm/plugins/chat_query.py”, line 63, in init
self.model = LocalLM.from_pretrained(model, **kwargs)
File “/opt/local_llm/local_llm/local_llm.py”, line 72, in from_pretrained
model = MLCModel(model_path, *kwargs)
File “/opt/local_llm/local_llm/models/mlc.py”, line 72, in init
logging.info(f"device={self.device}, name={self.device.device_name}, compute={self.device.compute_version}, max_clocks={self.device.max_clock_rate}, multiprocessors={self.device.multi_processor_count}, max_thread_dims={self.device.max_thread_dimensions}, api_version={self.device.api_version}, driver_version={self.device.driver_version}")
File “/usr/local/lib/python3.10/dist-packages/tvm/_ffi/runtime_ctypes.py”, line 403, in device_name
return self._GetDeviceAttr(self.device_type, self.device_id, 5)
File “/usr/local/lib/python3.10/dist-packages/tvm/_ffi/runtime_ctypes.py”, line 303, in _GetDeviceAttr
return tvm.runtime._ffi_api.GetDeviceAttr(device_type, device_id, attr_id)
File “tvm/_ffi/_cython/./packed_func.pxi”, line 332, in tvm._ffi._cy3.core.PackedFuncBase.call
File “tvm/_ffi/_cython/./packed_func.pxi”, line 263, in tvm._ffi._cy3.core.FuncCall
File “tvm/_ffi/_cython/./packed_func.pxi”, line 252, in tvm._ffi._cy3.core.FuncCall3
File “tvm/_ffi/_cython/./base.pxi”, line 182, in tvm._ffi._cy3.core.CHECK_CALL
File “/usr/local/lib/python3.10/dist-packages/tvm/_ffi/base.py”, line 481, in raise_last_ffi_error
raise py_err
tvm._ffi.base.TVMError: Traceback (most recent call last):
[bt] (5) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(TVMFuncCall+0x68) [0xfffec70dd798]
[bt] (4) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(+0x303d34c) [0xfffec70dd34c]
[bt] (3) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(tvm::runtime::CUDADeviceAPI::GetAttr(DLDevice, tvm::runtime::DeviceAttrKind, tvm::runtime::TVMRetValue)+0xd28) [0xfffec72075c8]
[bt] (2) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(+0x316452c) [0xfffec720452c]
[bt] (1) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(tvm::runtime::detail::LogFatal::Entry::Finalize()+0x68) [0xfffec52c6508]
[bt] (0) /usr/local/lib/python3.10/dist-packages/tvm/libtvm.so(tvm::runtime::Backtraceabi:cxx11+0x30) [0xfffec7125380]
File “/opt/mlc-llm/3rdparty/tvm/src/runtime/cuda/cuda_device_api.cc”, line 73
CUDAError: cuDeviceGetName(&name[0], name.size(), dev.device_id) failed with error: CUDA_ERROR_NOT_INITIALIZED