I’m trying to use the library https://github.com/NVIDIA/Megatron-LM
When I run command
OMP_NUM_THREADS=10 bash scripts/pretrain_gpt2_distributed.sh
I got an error
> initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234
configuring data
Traceback (most recent call last):
File "pretrain_gpt2.py", line 625, in <module>
main()
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
Traceback (most recent call last):
Traceback (most recent call last):
File "pretrain_gpt2.py", line 625, in <module>
File "pretrain_gpt2.py", line 625, in <module>
main()
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
Traceback (most recent call last):
Traceback (most recent call last):
main()
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
args)
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
Traceback (most recent call last):
File "pretrain_gpt2.py", line 625, in <module>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
main()
File "pretrain_gpt2.py", line 569, in main
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
Traceback (most recent call last):
File "pretrain_gpt2.py", line 625, in <module>
main()
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
Traceback (most recent call last):
File "pretrain_gpt2.py", line 625, in <module>
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
main()
File "pretrain_gpt2.py", line 569, in main
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 625, in <module>
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
args)
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
File "pretrain_gpt2.py", line 625, in <module>
main()
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
main()
File "pretrain_gpt2.py", line 569, in main
args.eod_token = get_train_val_test_data(args)
File "pretrain_gpt2.py", line 515, in get_train_val_test_data
args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 34, in apply
return make_loaders(args)
File "/home/ubuntu/Megatron-LM/configure_data.py", line 170, in make_loaders
train, tokenizer = data_utils.make_dataset(**data_set_args)
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in make_dataset
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/__init__.py", line 114, in <listcomp>
ds = [GPT2Dataset(d, max_seq_len=seq_length) if d is not None else None for d in ds]
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 477, in __init__
self.init_weighting()
File "/home/ubuntu/Megatron-LM/data_utils/datasets.py", line 487, in init_weighting
self.weighting = list(accumulate(lens))
TypeError: iteration over a 0-d array
Traceback (most recent call last):
File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home/ubuntu/Env/ml/lib/python3.6/site-packages/torch/distributed/launch.py", line 246, in <module>
main()
File "/home/ubuntu/Env/ml/lib/python3.6/site-packages/torch/distributed/launch.py", line 242, in main
cmd=cmd)
subprocess.CalledProcessError: Command '['/home/ubuntu/Env/ml/bin/python', '-u', 'pretrain_gpt2.py', '--local_rank=7', '--num-layers', '24', '--hidden-size', '1024', '--num-attention-heads', '16', '--batch-size', '8', '--seq-length', '1024', '--max-position-embeddings', '1024', '--train-iters', '320000', '--save', 'checkpoints/gpt2_345m', '--load', 'checkpoints/gpt2_345m', '--resume-dataloader', '--train-data', 'wikipedia', '--lazy-loader', '--tokenizer-type', 'GPT2BPETokenizer', '--cache-dir', 'cache', '--split', '949,50,1', '--distributed-backend', 'nccl', '--lr', '0.00015', '--lr-decay-style', 'cosine', '--weight-decay', '1e-2', '--clip-grad', '1.0', '--warmup', '.01', '--checkpoint-activations', '--fp16']' returned non-zero exit status 1.
Could you help me to deal whith thi issue?