NeMo Tutorial ModuleNotFoundError: No module named 'megatron.core'

CUDA Version:11.8
Container: nvcr.io/nvidia/nemo:24.01.speech
Local Environment: WSL2

Hi
I’m trying to use Nemo container from NGC to try out tutorials with ASR, LLM and TTS.
While running through this tutorial, I encountered an error saying ModuleNotFoundError: No module named 'megatron.core' after installing megatron-core-0.6.0rc0 in the same notebook, has anyone encountered the same issue?
Here is the process I went through,

  1. Download the container from NGC to WSL2 and run sudo docker run --gpus all -it --rm -v /mnt/c/Users/lzlal/nemo_speech:/NeMo --shm-size=8g -p 8888:8888 -p 6006:6006 --ulimit memlock=-1 --ulimit stack=67108864 nvcr.io/nvidia/nemo:24.01.speech

  2. run Jupyter notebook ./start-jupyter.sh

  3. Update Megatron to the latest

  4. an error occurs as follows (with complete error log)

ModuleNotFoundError                       Traceback (most recent call last)
Cell In[13], line 2
      1 # Check what GPT .nemo models we have available on NGC
----> 2 from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
      3 megatron_gpt_345m_nemo_url = MegatronGPTModel.list_available_models()[0].location
      4 megatron_gpt_345m_nemo_url # should point to the 345m megatron gpt model '.nemo' file

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/__init__.py:15
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 15 from nemo.collections.nlp import data, losses, models, modules
     16 from nemo.package_info import __version__
     18 # Set collection version equal to NeMo version.

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/__init__.py:42
     37 from nemo.collections.nlp.data.text_normalization.test_dataset import TextNormalizationTestDataset
     38 from nemo.collections.nlp.data.token_classification.token_classification_dataset import (
     39     BertTokenClassificationDataset,
     40     BertTokenClassificationInferDataset,
     41 )
---> 42 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
     43     ZeroShotIntentDataset,
     44     ZeroShotIntentInferenceDataset,
     45 )

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/__init__.py:16
      1 # Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 16 from nemo.collections.nlp.data.zero_shot_intent_recognition.zero_shot_intent_dataset import (
     17     ZeroShotIntentInferenceDataset,
     18     calc_class_weights_from_dataloader,
     19 )

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/data/zero_shot_intent_recognition/zero_shot_intent_dataset.py:30
     28 from nemo.collections.nlp.data.glue_benchmark.data_processors import InputExample
     29 from nemo.collections.nlp.data.glue_benchmark.glue_benchmark_dataset import GLUEDataset
---> 30 from nemo.collections.nlp.parts.utils_funcs import tensor2list
     31 from nemo.core.neural_types import CategoricalValuesType, ChannelType, MaskType, NeuralType
     32 from nemo.utils import logging

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/__init__.py:17
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     16 from nemo.collections.nlp.parts.megatron_lr_schedulers import CosineAnnealingExp
---> 17 from nemo.collections.nlp.parts.utils_funcs import list2str, tensor2list

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/utils_funcs.py:37
     34 from sklearn.metrics import classification_report, confusion_matrix
     35 from torch import Tensor
---> 37 from nemo.collections.nlp.modules.common.megatron.utils import erf_gelu
     38 from nemo.collections.nlp.modules.common.megatron.utils import openai_gelu as openai_gelu_func
     39 from nemo.collections.nlp.modules.common.megatron.utils import squared_relu

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/__init__.py:16
      1 # Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
   (...)
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
---> 16 from nemo.collections.nlp.modules.common import (
     17     AlbertEncoder,
     18     BertEncoder,
     19     BertModule,
     20     CamembertEncoder,
     21     DistilBertEncoder,
     22     PromptEncoder,
     23     RobertaEncoder,
     24     SequenceClassifier,
     25     SequenceRegression,
     26     SequenceTokenClassifier,
     27     get_lm_model,
     28     get_pretrained_lm_models_list,
     29     get_tokenizer,
     30     get_tokenizer_list,
     31 )
     32 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_decoder import SGDDecoder
     33 from nemo.collections.nlp.modules.dialogue_state_tracking.sgd_encoder import SGDEncoder

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/__init__.py:36
     34 from nemo.collections.nlp.modules.common.sequence_token_classifier import SequenceTokenClassifier
     35 from nemo.collections.nlp.modules.common.token_classifier import BertPretrainingTokenClassifier, TokenClassifier
---> 36 from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer, get_tokenizer_list

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/modules/common/tokenizer_utils.py:29
     27 from nemo.collections.nlp.modules.common.huggingface.huggingface_utils import get_huggingface_pretrained_lm_models_list
     28 from nemo.collections.nlp.modules.common.lm_utils import get_pretrained_lm_models_list
---> 29 from nemo.collections.nlp.parts.nlp_overrides import HAVE_MEGATRON_CORE
     30 from nemo.utils import logging
     32 try:

File /usr/local/lib/python3.10/dist-packages/nemo/collections/nlp/parts/nlp_overrides.py:30
     28 from lightning_fabric.utilities.cloud_io import get_filesystem
     29 from lightning_fabric.utilities.optimizer import _optimizer_to_device
---> 30 from megatron.core.tensor_parallel.layers import param_is_not_tensor_parallel_duplicate
     31 from omegaconf import OmegaConf
     32 from pytorch_lightning.callbacks.progress import TQDMProgressBar

ModuleNotFoundError: No module named 'megatron.core'

Problem solved. I’m using the wrong conda environment in Jupyter Notebook.