CUDA Version:11.8
Container: nvcr.io/nvidia/nemo:24.01.speech
Local Environment: WSL2
Hi
I’m trying to use Nemo container from NGC to try out tutorials with ASR, LLM and TTS.
While running through this tutorial, I encountered an error saying ModuleNotFoundError: No module named 'megatron.core' after installing megatron-core-0.6.0rc0 in the same notebook, has anyone encountered the same issue?
Here is the process I went through,
-
Download the container from NGC to WSL2 and run
sudo docker run --gpus all -it --rm -v /mnt/c/Users/lzlal/nemo_speech:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:24.01.speech -
run Jupyter notebook
./start-jupyter.sh -
Update Megatron to the latest
-
an error occurs as follows (with complete error log)
ModuleNotFoundError Traceback (most recent call last)
Cell In[13], line 2
1 # Check what GPT .nemo models we have available on NGC
----> 2 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
3 megatron_gpt_345m_nemo_url = MegatronGPTModel.list_available_models()[0].location
4 megatron_gpt_345m_nemo_url # should point to the 345m megatron gpt model '.nemo' file
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/__init__.py:15
1 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 15 from nemo.collections.nlp import data, losses, models, modules
16 from nemo.package_info import __version__
18 # Set collection version equal to NeMo version.
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/__init__.py:42
37 from nemo.collections.nlp.data.text_normalization.test_dataset import TextNormalizationTestDataset
38 from nemo.collections.nlp.data.token_classification.token_classification_dataset import (
39 BertTokenClassificationDataset,
40 BertTokenClassificationInferDataset,
41 )
---> 42 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
43 ZeroShotIntentDataset,
44 ZeroShotIntentInferenceDataset,
45 )
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/__init__.py:16
1 # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 16 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
17 ZeroShotIntentInferenceDataset,
18 calc_class_weights_from_dataloader,
19 )
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/zero_shot_intent_dataset.py:30
28 from nemo.collections.nlp.data.glue_benchmark.data_processors import InputExample
29 from nemo.collections.nlp.data.glue_benchmark.glue_benchmark_dataset import GLUEDataset
---> 30 from nemo.collections.nlp.parts.utils_funcs import tensor2list
31 from nemo.core.neural_types import CategoricalValuesType, ChannelType, MaskType, NeuralType
32 from nemo.utils import logging
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/__init__.py:17
1 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
16 from nemo.collections.nlp.parts.megatron_lr_schedulers import CosineAnnealingExp
---> 17 from nemo.collections.nlp.parts.utils_funcs import list2str, tensor2list
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/utils_funcs.py:37
34 from sklearn.metrics import classification_report, confusion_matrix
35 from torch import Tensor
---> 37 from nemo.collections.nlp.modules.common.megatron.utils import erf_gelu
38 from nemo.collections.nlp.modules.common.megatron.utils import openai_gelu as openai_gelu_func
39 from nemo.collections.nlp.modules.common.megatron.utils import squared_relu
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/__init__.py:16
1 # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
(...)
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
---> 16 from nemo.collections.nlp.modules.common import (
17 AlbertEncoder,
18 BertEncoder,
19 BertModule,
20 CamembertEncoder,
21 DistilBertEncoder,
22 PromptEncoder,
23 RobertaEncoder,
24 SequenceClassifier,
25 SequenceRegression,
26 SequenceTokenClassifier,
27 get_lm_model,
28 get_pretrained_lm_models_list,
29 get_tokenizer,
30 get_tokenizer_list,
31 )
32 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_decoder import SGDDecoder
33 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_encoder import SGDEncoder
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/__init__.py:36
34 from nemo.collections.nlp.modules.common.sequence_token_classifier import SequenceTokenClassifier
35 from nemo.collections.nlp.modules.common.token_classifier import BertPretrainingTokenClassifier, TokenClassifier
---> 36 from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer, get_tokenizer_list
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/tokenizer_utils.py:29
27 from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import get_huggingface_pretrained_lm_models_list
28 from nemo.collections.nlp.modules.common.lm_utils import get_pretrained_lm_models_list
---> 29 from nemo.collections.nlp.parts.nlp_overrides import HAVE_MEGATRON_CORE
30 from nemo.utils import logging
32 try:
File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/nlp_overrides.py:30
28 from lightning_fabric.utilities.cloud_io import get_filesystem
29 from lightning_fabric.utilities.optimizer import _optimizer_to_device
---> 30 from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate
31 from omegaconf import OmegaConf
32 from pytorch_lightning.callbacks.progress import TQDMProgressBar
ModuleNotFoundError: No module named 'megatron.core'

