I’m on a Jetson AGX Xavier with JetPack 5.1.2 using the container l4t-ml:r36.2.0-py3 with this command:
docker run -it --rm --runtime nvidia --network host -v /home/jetson/Services/jupyter:/root/.jupyter -v /home/jetson//Apps/:/home/Apps/ -v /home/jetson/Datas/:/home/Datas/ nvcr.io/nvidia/l4t-ml:r36.2.0-py3
I tried to load and use with Pytorch the model mistral7B with the notebook Mistral 7B Instruc
“”"
!pip3 install transformers
“”"
“”"
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
“”"
“”"
torch.set_default_device(‘cuda’)
“”"
“”"
model = AutoModelForCausalLM.from_pretrained(“mistralai/Mistral-7B-Instruct-v0.1”,
torch_dtype=“auto”)
“”"
and this model= … cell gave me this error:
/usr/local/lib/python3.10/dist-packages/torch/cuda/__init__.py:190: UserWarning:
Found GPU0 Xavier which is of cuda capability 7.2.
PyTorch no longer supports this GPU because it is too old.
The minimum cuda capability supported by this library is 8.7.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/torch/cuda/__init__.py:215: UserWarning:
Xavier with CUDA capability sm_72 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_87.
If you want to use the Xavier GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/
warnings.warn(
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[4], line 1
----> 1 model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1",
2 torch_dtype="auto")
File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:566, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)
--> 566 return model_class.from_pretrained(
567 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
568 )
569 raise ValueError(
570 f"Unrecognized configuration class {config.__class__} for this kind of AutoModel: {cls.__name__}.\n"
571 f"Model type should be one of {', '.join(c.__name__ for c in cls._model_mapping.keys())}."
572 )
File /usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py:3594, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3588 config = cls._autoset_attn_implementation(
3589 config, use_flash_attention_2=use_flash_attention_2, torch_dtype=torch_dtype, device_map=device_map
3590 )
3592 with ContextManagers(init_contexts):
3593 # Let's make sure we don't run the init function of buffer modules
-> 3594 model = cls(config, *model_args, **model_kwargs)
3596 # make sure we use the model's config since the __init__ call might have copied it
3597 config = model.config
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:1081, in MistralForCausalLM.__init__(self, config)
1079 def __init__(self, config):
1080 super().__init__(config)
-> 1081 self.model = MistralModel(config)
1082 self.vocab_size = config.vocab_size
1083 self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:913, in MistralModel.__init__(self, config)
909 self.vocab_size = config.vocab_size
911 self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
912 self.layers = nn.ModuleList(
--> 913 [MistralDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
914 )
915 self._attn_implementation = config._attn_implementation
916 self.norm = MistralRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:913, in <listcomp>(.0)
909 self.vocab_size = config.vocab_size
911 self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
912 self.layers = nn.ModuleList(
--> 913 [MistralDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
914 )
915 self._attn_implementation = config._attn_implementation
916 self.norm = MistralRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:715, in MistralDecoderLayer.__init__(self, config, layer_idx)
712 super().__init__()
713 self.hidden_size = config.hidden_size
--> 715 self.self_attn = MISTRAL_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
717 self.mlp = MistralMLP(config)
718 self.input_layernorm = MistralRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:230, in MistralAttention.__init__(self, config, layer_idx)
227 self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=False)
228 self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
--> 230 self.rotary_emb = MistralRotaryEmbedding(
231 self.head_dim,
232 max_position_embeddings=self.max_position_embeddings,
233 base=self.rope_theta,
234 )
File /usr/local/lib/python3.10/dist-packages/transformers/models/mistral/modeling_mistral.py:99, in MistralRotaryEmbedding.__init__(self, dim, max_position_embeddings, base, device)
97 self.max_position_embeddings = max_position_embeddings
98 self.base = base
---> 99 inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
100 self.register_buffer("inv_freq", inv_freq, persistent=False)
102 # Build here to make `torch.jit.trace` work.
File /usr/local/lib/python3.10/dist-packages/torch/utils/_device.py:77, in DeviceContext.__torch_function__(self, func, types, args, kwargs)
75 if func in _device_constructors() and kwargs.get('device') is None:
76 kwargs['device'] = self.device
---> 77 return func(*args, **kwargs)
RuntimeError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
Since the error is about Cuda7.2 i check cuda’s version within the container which seems good:
nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:08:11_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
notebook + error.zip (263.7 KB)
any idea on how to solve this please ?
I zipped the notebook and added it as an attached file.