NeMo Tutorial ModuleNotFoundError: No module named 'megatron.core'

CUDA Version:11.8
Container: nvcr.io/nvidia/nemo:24.01.speech
Local Environment: WSL2

Hi
I’m trying to use Nemo container from NGC to try out tutorials with ASR, LLM and TTS.
While running through this tutorial, I encountered an error saying ModuleNotFoundError: No module named 'megatron.core' after installing megatron-core-0.6.0rc0 in the same notebook, has anyone encountered the same issue?
Here is the process I went through,

  1. Download the container from NGC to WSL2 and run sudo docker run --gpus all -it --rm -v /mnt/c/Users/lzlal/nemo_speech:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:24.01.speech

  2. run Jupyter notebook ./start-jupyter.sh

  3. Update Megatron to the latest

  4. an error occurs as follows (with complete error log)

ModuleNotFoundError                       Traceback (most recent call last)
Cell In[13], line 2
      1 # Check what GPT .nemo models we have available on NGC
----> 2 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
      3 megatron_gpt_345m_nemo_url = MegatronGPTModel.list_available_models()[0].location
      4 megatron_gpt_345m_nemo_url # should point to the 345m megatron gpt model '.nemo' file

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/__init__.py:15
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 15 from nemo.collections.nlp import data, losses, models, modules
     16 from nemo.package_info import __version__
     18 # Set collection version equal to NeMo version.

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/__init__.py:42
     37 from nemo.collections.nlp.data.text_normalization.test_dataset import TextNormalizationTestDataset
     38 from nemo.collections.nlp.data.token_classification.token_classification_dataset import (
     39     BertTokenClassificationDataset,
     40     BertTokenClassificationInferDataset,
     41 )
---> 42 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
     43     ZeroShotIntentDataset,
     44     ZeroShotIntentInferenceDataset,
     45 )

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/__init__.py:16
      1 # Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 16 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
     17     ZeroShotIntentInferenceDataset,
     18     calc_class_weights_from_dataloader,
     19 )

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/zero_shot_intent_dataset.py:30
     28 from nemo.collections.nlp.data.glue_benchmark.data_processors import InputExample
     29 from nemo.collections.nlp.data.glue_benchmark.glue_benchmark_dataset import GLUEDataset
---> 30 from nemo.collections.nlp.parts.utils_funcs import tensor2list
     31 from nemo.core.neural_types import CategoricalValuesType, ChannelType, MaskType, NeuralType
     32 from nemo.utils import logging

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/__init__.py:17
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     16 from nemo.collections.nlp.parts.megatron_lr_schedulers import CosineAnnealingExp
---> 17 from nemo.collections.nlp.parts.utils_funcs import list2str, tensor2list

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/utils_funcs.py:37
     34 from sklearn.metrics import classification_report, confusion_matrix
     35 from torch import Tensor
---> 37 from nemo.collections.nlp.modules.common.megatron.utils import erf_gelu
     38 from nemo.collections.nlp.modules.common.megatron.utils import openai_gelu as openai_gelu_func
     39 from nemo.collections.nlp.modules.common.megatron.utils import squared_relu

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/__init__.py:16
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 16 from nemo.collections.nlp.modules.common import (
     17     AlbertEncoder,
     18     BertEncoder,
     19     BertModule,
     20     CamembertEncoder,
     21     DistilBertEncoder,
     22     PromptEncoder,
     23     RobertaEncoder,
     24     SequenceClassifier,
     25     SequenceRegression,
     26     SequenceTokenClassifier,
     27     get_lm_model,
     28     get_pretrained_lm_models_list,
     29     get_tokenizer,
     30     get_tokenizer_list,
     31 )
     32 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_decoder import SGDDecoder
     33 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_encoder import SGDEncoder

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/__init__.py:36
     34 from nemo.collections.nlp.modules.common.sequence_token_classifier import SequenceTokenClassifier
     35 from nemo.collections.nlp.modules.common.token_classifier import BertPretrainingTokenClassifier, TokenClassifier
---> 36 from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer, get_tokenizer_list

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/tokenizer_utils.py:29
     27 from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import get_huggingface_pretrained_lm_models_list
     28 from nemo.collections.nlp.modules.common.lm_utils import get_pretrained_lm_models_list
---> 29 from nemo.collections.nlp.parts.nlp_overrides import HAVE_MEGATRON_CORE
     30 from nemo.utils import logging
     32 try:

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/nlp_overrides.py:30
     28 from lightning_fabric.utilities.cloud_io import get_filesystem
     29 from lightning_fabric.utilities.optimizer import _optimizer_to_device
---> 30 from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate
     31 from omegaconf import OmegaConf
     32 from pytorch_lightning.callbacks.progress import TQDMProgressBar

ModuleNotFoundError: No module named 'megatron.core'

Problem solved. I’m using the wrong conda environment in Jupyter Notebook.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.