train_spec.txt
random_seed: 42
yolov4_config {
big_anchor_shape: "[(128.00, 340.00), (176.00, 428.00), (228.00, 596.00)]"
mid_anchor_shape: "[ (35.66, 74.01), (65.83, 104.91), (100.00, 268.00)]"
small_anchor_shape: "[(14.00, 24.48), (24.69, 42.35), (51.84, 49.37)]"
box_matching_iou: 0.25
arch: "cspdarknet"
nlayers: 19
arch_conv_blocks: 2
loss_loc_weight: 0.8
loss_neg_obj_weights: 100.0
loss_class_weights: 0.5
label_smoothing: 0.0
big_grid_xy_extend: 0.05
mid_grid_xy_extend: 0.1
small_grid_xy_extend: 0.2
freeze_bn: false
#freeze_blocks: 0
force_relu: false
}
training_config {
batch_size_per_gpu: 4
num_epochs: 400
enable_qat: false
checkpoint_interval: 5
learning_rate {
soft_start_cosine_annealing_schedule {
min_learning_rate: 1e-3
max_learning_rate: 1e-1
soft_start: 0.3
}
}
regularizer {
type: L1
weight: 3e-5
}
optimizer {
adam {
epsilon: 1e-7
beta1: 0.9
beta2: 0.999
amsgrad: false
}
}
pretrain_model_path: "/workspace/tlt_pretrained_object_detection_vcspdarknet19/cspdarknet_19.hdf5"
}
eval_config {
average_precision_mode: SAMPLE
batch_size: 8
matching_iou_threshold: 0.5
}
nms_config {
confidence_threshold: 0.001
clustering_iou_threshold: 0.5
top_k: 200
}
augmentation_config {
hue: 0.1
saturation: 1.5
exposure:1.5
vertical_flip:0
horizontal_flip: 0.5
jitter: 0.3
output_width: 3840
output_height: 2160
randomize_input_shape_period: 0
mosaic_prob: 0.5
mosaic_min_ratio:0.2
}
dataset_config {
data_sources: {
label_directory_path: "/workspace/v1.2_split/train/labels"
image_directory_path: "/workspace/v1.2_split/train/images"
}
include_difficult_in_training: true
target_class_mapping {
key: "person"
value: "person"
}
validation_data_sources: {
label_directory_path: "/workspace/v1.2_split/val/labels"
image_directory_path: "/workspace/v1.2_split/val/images"
}
}
Command used for training :
`
tlt yolo_v4 train -e /workspace/specs/yolov4/yolo_v4_train_resnet18_kitti.txt -r /workspace/results -k xyz --gpus 4
`
Error :
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecat
ed. Please use tf.compat.v1.image.resize_nearest_neighbor instead.
2021-04-01 16:40:56,435 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_n
earest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 209, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 205, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 83, in run_experiment
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 162, in build_data_and_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 98, in _build_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 223, in YOLO
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 95, in YOLO_FCN
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 431, in __call__
self.build(unpack_singleton(input_shapes))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py", line 362, in build
'Got inputs shapes: %s' % (input_shape))
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 136, 240), (None, 512, 135,
240)]
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 209, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 205, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 83, in run_experiment
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 162, in build_data_and_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 98, in _build_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 223, in YOLO
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 95, in YOLO_FCN
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 431, in __call__
self.build(unpack_singleton(input_shapes))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py", line 362, in build
'Got inputs shapes: %s' % (input_shape))
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 136, 240), (None, 512, 135,
240)]
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 209, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 205, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 83, in run_experiment
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 162, in build_data_and_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 98, in _build_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 223, in YOLO
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 95, in YOLO_FCN
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 431, in __call__
self.build(unpack_singleton(input_shapes))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py", line 362, in build
'Got inputs shapes: %s' % (input_shape))
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 136, 240), (None, 512, 135,
240)]
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 209, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 205, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 83, in run_experiment
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 162, in build_data_and_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 98, in _build_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 223, in YOLO
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 95, in YOLO_FCN
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 431, in __call__
self.build(unpack_singleton(input_shapes))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py", line 362, in build
'Got inputs shapes: %s' % (input_shape))
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 136, 240), (None, 512, 135,
240)]
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 209, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 205, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py", line 83, in run_experiment
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 162, in build_data_and_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/builders/model_builder.py", line 98, in _build_model
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 223, in YOLO
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/architecture/yolo_arch.py", line 95, in YOLO_FCN
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 431, in __call__
self.build(unpack_singleton(input_shapes))
File "/usr/local/lib/python3.6/dist-packages/keras/layers/merge.py", line 362, in build
'Got inputs shapes: %s' % (input_shape))
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 256, 136, 240), (None, 512, 135,
240)]
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun.real detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:
Process name: [[63215,1],3]
Exit code: 1
--------------------------------------------------------------------------
Traceback (most recent call last):
File "/usr/local/bin/yolo_v4", line 8, in <module>
sys.exit(main())
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/yolo_v4/entrypoint/yolo_v4.py", line 12, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wh
eel.runfiles/ai_infra/iva/common/entrypoint/entrypoint.py", line 296, in launch_job
AssertionError: Process run failed.
2021-04-01 16:40:58,873 [INFO] tlt.components.docker_handler.docker_handler: Stopping container.