YOLO V4 not training

Hi I have been trying to train yolov4 using 4 gpus but it has got stuck from past 7 hours and has not thrown any error.

Command passed -
print(“To run with multigpu, please change --gpus based on the number of available GPUs in your machine.”)
!yolo_v4 train -e $SPECS_DIR/yolo_v4_train_darknet53_kitti.txt
-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned
-k $KEY
–gpus 4

config file -
random_seed: 42
yolov4_config {
big_anchor_shape: “[(106.00, 124.00), (81.00, 197.00), (178.00, 216.00)]”
mid_anchor_shape: “[(39.00, 108.00), (60.00, 71.00), (57.00, 127.00)]”
small_anchor_shape: “[(24.00, 39.00), (39.00, 49.00), (29.00, 71.00)]”
box_matching_iou: 0.25
arch: “darknet”
nlayers: 53
arch_conv_blocks: 2
loss_loc_weight: 0.8
loss_neg_obj_weights: 100.0
loss_class_weights: 0.5
label_smoothing: 0.0
big_grid_xy_extend: 0.05
mid_grid_xy_extend: 0.1
small_grid_xy_extend: 0.2
freeze_bn: false
#freeze_blocks: 0
force_relu: false
}
training_config {
batch_size_per_gpu: 8
num_epochs: 100
enable_qat: false
checkpoint_interval: 10
learning_rate {
soft_start_cosine_annealing_schedule {
min_learning_rate: 1e-7
max_learning_rate: 1e-4
soft_start: 0.3
}
}
regularizer {
type: L1
weight: 3e-5
}
optimizer {
adam {
epsilon: 1e-7
beta1: 0.9
beta2: 0.999
amsgrad: false
}
}
pretrain_model_path: “/workspace/examples/yolo_v4/pretrained_darknet53/tlt_pretrained_object_detection_vdarknet53/darknet_53.hdf5”
}
eval_config {
average_precision_mode: SAMPLE
batch_size: 8
matching_iou_threshold: 0.5
}
nms_config {
confidence_threshold: 0.001
clustering_iou_threshold: 0.5
top_k: 200
}
augmentation_config {
hue: 0.1
saturation: 1.5
exposure:1.5
vertical_flip:0
horizontal_flip: 0.5
jitter: 0.3
output_width: 960
output_height: 544
randomize_input_shape_period: 0
mosaic_prob: 0.5
mosaic_min_ratio:0.2
}
dataset_config {
data_sources: {
label_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/training_labels”
image_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/training”
}
include_difficult_in_training: true
target_class_mapping {
key: “person”
value: “person”
}

validation_data_sources: {
label_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/val_labels”
image_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/val”
}
}

After running the output -

To run with multigpu, please change --gpus based on the number of available GPUs in your machine.
Using TensorFlow backend.
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
Using TensorFlow backend.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

2021-06-17 19:15:12,713 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

2021-06-17 19:15:12,714 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

Using TensorFlow backend.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

2021-06-17 19:15:13,335 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

2021-06-17 19:15:13,336 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

Using TensorFlow backend.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

2021-06-17 19:15:13,533 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

2021-06-17 19:15:13,533 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

Using TensorFlow backend.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

2021-06-17 19:15:13,536 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

2021-06-17 19:15:13,536 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/init.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:49: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

2021-06-17 19:15:13,620 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

WARNING:tensorflow:From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

2021-06-17 19:15:13,621 [WARNING] tensorflow: From /home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py:52: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

2021-06-17 19:15:14,618 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

2021-06-17 19:15:14,620 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

2021-06-17 19:15:14,631 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

2021-06-17 19:15:14,633 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

2021-06-17 19:15:14,637 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

2021-06-17 19:15:14,639 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

2021-06-17 19:15:14,649 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

2021-06-17 19:15:14,653 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

2021-06-17 19:15:14,656 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

2021-06-17 19:15:14,662 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

2021-06-17 19:15:14,668 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

2021-06-17 19:15:14,684 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

2021-06-17 19:15:16,364 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

2021-06-17 19:15:16,510 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

2021-06-17 19:15:16,529 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

2021-06-17 19:15:16,544 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:183: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

2021-06-17 19:15:16,654 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

2021-06-17 19:15:16,856 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

2021-06-17 19:15:16,858 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

2021-06-17 19:15:16,869 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:2018: The name tf.image.resize_nearest_neighbor is deprecated. Please use tf.compat.v1.image.resize_nearest_neighbor instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

2021-06-17 19:15:19,389 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

2021-06-17 19:15:19,486 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

2021-06-17 19:15:19,599 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

WARNING:tensorflow:From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

2021-06-17 19:15:19,616 [WARNING] tensorflow: From /opt/nvidia/third_party/keras/tensorflow_backend.py:187: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

2021-06-17 19:15:19,739 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

2021-06-17 19:15:19,739 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

2021-06-17 19:15:19,840 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

2021-06-17 19:15:19,841 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

2021-06-17 19:15:19,935 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

2021-06-17 19:15:19,937 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

2021-06-17 19:15:19,949 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

2021-06-17 19:15:19,950 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

2021-06-17 19:15:20,921 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

2021-06-17 19:15:21,013 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

2021-06-17 19:15:21,131 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

2021-06-17 19:15:21,132 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2021-06-17 19:15:22,668 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

2021-06-17 19:15:22,672 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2021-06-17 19:15:22,715 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

2021-06-17 19:15:22,719 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2021-06-17 19:15:22,825 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

2021-06-17 19:15:22,828 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

2021-06-17 19:15:22,834 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

2021-06-17 19:15:22,840 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

2021-06-17 19:15:23,891 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

2021-06-17 19:15:23,935 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

2021-06-17 19:15:24,013 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

2021-06-17 19:15:24,147 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

2021-06-17 19:15:24,188 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

2021-06-17 19:15:24,268 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

2021-06-17 19:15:24,735 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.

WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.

2021-06-17 19:15:25,070 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:973: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.


Layer (type) Output Shape Param # Connected to

Input (InputLayer) (None, 3, 544, 960) 0


conv1 (Conv2D) (None, 32, 544, 960) 864 Input[0][0]


conv1_bn (BatchNormalization) (None, 32, 544, 960) 128 conv1[0][0]


conv1_lrelu (LeakyReLU) (None, 32, 544, 960) 0 conv1_bn[0][0]


conv2 (Conv2D) (None, 64, 272, 480) 18432 conv1_lrelu[0][0]


conv2_bn (BatchNormalization) (None, 64, 272, 480) 256 conv2[0][0]


conv2_lrelu (LeakyReLU) (None, 64, 272, 480) 0 conv2_bn[0][0]


b1_conv1_1 (Conv2D) (None, 32, 272, 480) 2048 conv2_lrelu[0][0]


b1_conv1_1_bn (BatchNormalizati (None, 32, 272, 480) 128 b1_conv1_1[0][0]


b1_conv1_1_lrelu (LeakyReLU) (None, 32, 272, 480) 0 b1_conv1_1_bn[0][0]


b1_conv1_2 (Conv2D) (None, 64, 272, 480) 18432 b1_conv1_1_lrelu[0][0]


b1_conv1_2_bn (BatchNormalizati (None, 64, 272, 480) 256 b1_conv1_2[0][0]


b1_conv1_2_lrelu (LeakyReLU) (None, 64, 272, 480) 0 b1_conv1_2_bn[0][0]


b1_add1 (Add) (None, 64, 272, 480) 0 conv2_lrelu[0][0]
b1_conv1_2_lrelu[0][0]


conv3 (Conv2D) (None, 128, 136, 240 73728 b1_add1[0][0]


conv3_bn (BatchNormalization) (None, 128, 136, 240 512 conv3[0][0]


conv3_lrelu (LeakyReLU) (None, 128, 136, 240 0 conv3_bn[0][0]


b2_conv1_1 (Conv2D) (None, 64, 136, 240) 8192 conv3_lrelu[0][0]


b2_conv1_1_bn (BatchNormalizati (None, 64, 136, 240) 256 b2_conv1_1[0][0]


b2_conv1_1_lrelu (LeakyReLU) (None, 64, 136, 240) 0 b2_conv1_1_bn[0][0]


b2_conv1_2 (Conv2D) (None, 128, 136, 240 73728 b2_conv1_1_lrelu[0][0]


b2_conv1_2_bn (BatchNormalizati (None, 128, 136, 240 512 b2_conv1_2[0][0]


b2_conv1_2_lrelu (LeakyReLU) (None, 128, 136, 240 0 b2_conv1_2_bn[0][0]


b2_add1 (Add) (None, 128, 136, 240 0 conv3_lrelu[0][0]
b2_conv1_2_lrelu[0][0]


b2_conv2_1 (Conv2D) (None, 64, 136, 240) 8192 b2_add1[0][0]


b2_conv2_1_bn (BatchNormalizati (None, 64, 136, 240) 256 b2_conv2_1[0][0]


b2_conv2_1_lrelu (LeakyReLU) (None, 64, 136, 240) 0 b2_conv2_1_bn[0][0]


b2_conv2_2 (Conv2D) (None, 128, 136, 240 73728 b2_conv2_1_lrelu[0][0]


b2_conv2_2_bn (BatchNormalizati (None, 128, 136, 240 512 b2_conv2_2[0][0]


b2_conv2_2_lrelu (LeakyReLU) (None, 128, 136, 240 0 b2_conv2_2_bn[0][0]


b2_add2 (Add) (None, 128, 136, 240 0 b2_add1[0][0]
b2_conv2_2_lrelu[0][0]


conv4 (Conv2D) (None, 256, 68, 120) 294912 b2_add2[0][0]


conv4_bn (BatchNormalization) (None, 256, 68, 120) 1024 conv4[0][0]


conv4_lrelu (LeakyReLU) (None, 256, 68, 120) 0 conv4_bn[0][0]


b3_conv1_1 (Conv2D) (None, 128, 68, 120) 32768 conv4_lrelu[0][0]


b3_conv1_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv1_1[0][0]


b3_conv1_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv1_1_bn[0][0]


b3_conv1_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv1_1_lrelu[0][0]


b3_conv1_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv1_2[0][0]


b3_conv1_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv1_2_bn[0][0]


b3_add1 (Add) (None, 256, 68, 120) 0 conv4_lrelu[0][0]
b3_conv1_2_lrelu[0][0]


b3_conv2_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add1[0][0]


b3_conv2_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv2_1[0][0]


b3_conv2_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv2_1_bn[0][0]


b3_conv2_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv2_1_lrelu[0][0]


b3_conv2_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv2_2[0][0]


b3_conv2_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv2_2_bn[0][0]


b3_add2 (Add) (None, 256, 68, 120) 0 b3_add1[0][0]
b3_conv2_2_lrelu[0][0]


b3_conv3_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add2[0][0]


b3_conv3_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv3_1[0][0]


b3_conv3_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv3_1_bn[0][0]


b3_conv3_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv3_1_lrelu[0][0]


b3_conv3_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv3_2[0][0]


b3_conv3_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv3_2_bn[0][0]


b3_add3 (Add) (None, 256, 68, 120) 0 b3_add2[0][0]
b3_conv3_2_lrelu[0][0]


b3_conv4_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add3[0][0]


b3_conv4_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv4_1[0][0]


b3_conv4_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv4_1_bn[0][0]


b3_conv4_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv4_1_lrelu[0][0]


b3_conv4_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv4_2[0][0]


b3_conv4_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv4_2_bn[0][0]


b3_add4 (Add) (None, 256, 68, 120) 0 b3_add3[0][0]
b3_conv4_2_lrelu[0][0]


b3_conv5_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add4[0][0]


b3_conv5_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv5_1[0][0]


b3_conv5_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv5_1_bn[0][0]


b3_conv5_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv5_1_lrelu[0][0]


b3_conv5_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv5_2[0][0]


b3_conv5_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv5_2_bn[0][0]


b3_add5 (Add) (None, 256, 68, 120) 0 b3_add4[0][0]
b3_conv5_2_lrelu[0][0]


b3_conv6_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add5[0][0]


b3_conv6_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv6_1[0][0]


b3_conv6_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv6_1_bn[0][0]


b3_conv6_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv6_1_lrelu[0][0]


b3_conv6_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv6_2[0][0]


b3_conv6_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv6_2_bn[0][0]


b3_add6 (Add) (None, 256, 68, 120) 0 b3_add5[0][0]
b3_conv6_2_lrelu[0][0]


b3_conv7_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add6[0][0]


b3_conv7_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv7_1[0][0]


b3_conv7_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv7_1_bn[0][0]


b3_conv7_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv7_1_lrelu[0][0]


b3_conv7_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv7_2[0][0]


b3_conv7_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv7_2_bn[0][0]


b3_add7 (Add) (None, 256, 68, 120) 0 b3_add6[0][0]
b3_conv7_2_lrelu[0][0]


b3_conv8_1 (Conv2D) (None, 128, 68, 120) 32768 b3_add7[0][0]


b3_conv8_1_bn (BatchNormalizati (None, 128, 68, 120) 512 b3_conv8_1[0][0]


b3_conv8_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 b3_conv8_1_bn[0][0]


b3_conv8_2 (Conv2D) (None, 256, 68, 120) 294912 b3_conv8_1_lrelu[0][0]


b3_conv8_2_bn (BatchNormalizati (None, 256, 68, 120) 1024 b3_conv8_2[0][0]


b3_conv8_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 b3_conv8_2_bn[0][0]


b3_add8 (Add) (None, 256, 68, 120) 0 b3_add7[0][0]
b3_conv8_2_lrelu[0][0]


conv5 (Conv2D) (None, 512, 34, 60) 1179648 b3_add8[0][0]


conv5_bn (BatchNormalization) (None, 512, 34, 60) 2048 conv5[0][0]


conv5_lrelu (LeakyReLU) (None, 512, 34, 60) 0 conv5_bn[0][0]


b4_conv1_1 (Conv2D) (None, 256, 34, 60) 131072 conv5_lrelu[0][0]


b4_conv1_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv1_1[0][0]


b4_conv1_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv1_1_bn[0][0]


b4_conv1_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv1_1_lrelu[0][0]


b4_conv1_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv1_2[0][0]


b4_conv1_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv1_2_bn[0][0]


b4_add1 (Add) (None, 512, 34, 60) 0 conv5_lrelu[0][0]
b4_conv1_2_lrelu[0][0]


b4_conv2_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add1[0][0]


b4_conv2_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv2_1[0][0]


b4_conv2_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv2_1_bn[0][0]


b4_conv2_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv2_1_lrelu[0][0]


b4_conv2_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv2_2[0][0]


b4_conv2_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv2_2_bn[0][0]


b4_add2 (Add) (None, 512, 34, 60) 0 b4_add1[0][0]
b4_conv2_2_lrelu[0][0]


b4_conv3_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add2[0][0]


b4_conv3_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv3_1[0][0]


b4_conv3_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv3_1_bn[0][0]


b4_conv3_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv3_1_lrelu[0][0]


b4_conv3_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv3_2[0][0]


b4_conv3_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv3_2_bn[0][0]


b4_add3 (Add) (None, 512, 34, 60) 0 b4_add2[0][0]
b4_conv3_2_lrelu[0][0]


b4_conv4_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add3[0][0]


b4_conv4_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv4_1[0][0]


b4_conv4_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv4_1_bn[0][0]


b4_conv4_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv4_1_lrelu[0][0]


b4_conv4_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv4_2[0][0]


b4_conv4_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv4_2_bn[0][0]


b4_add4 (Add) (None, 512, 34, 60) 0 b4_add3[0][0]
b4_conv4_2_lrelu[0][0]


b4_conv5_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add4[0][0]


b4_conv5_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv5_1[0][0]


b4_conv5_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv5_1_bn[0][0]


b4_conv5_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv5_1_lrelu[0][0]


b4_conv5_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv5_2[0][0]


b4_conv5_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv5_2_bn[0][0]


b4_add5 (Add) (None, 512, 34, 60) 0 b4_add4[0][0]
b4_conv5_2_lrelu[0][0]


b4_conv6_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add5[0][0]


b4_conv6_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv6_1[0][0]


b4_conv6_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv6_1_bn[0][0]


b4_conv6_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv6_1_lrelu[0][0]


b4_conv6_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv6_2[0][0]


b4_conv6_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv6_2_bn[0][0]


b4_add6 (Add) (None, 512, 34, 60) 0 b4_add5[0][0]
b4_conv6_2_lrelu[0][0]


b4_conv7_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add6[0][0]


b4_conv7_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv7_1[0][0]


b4_conv7_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv7_1_bn[0][0]


b4_conv7_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv7_1_lrelu[0][0]


b4_conv7_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv7_2[0][0]


b4_conv7_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv7_2_bn[0][0]


b4_add7 (Add) (None, 512, 34, 60) 0 b4_add6[0][0]
b4_conv7_2_lrelu[0][0]


b4_conv8_1 (Conv2D) (None, 256, 34, 60) 131072 b4_add7[0][0]


b4_conv8_1_bn (BatchNormalizati (None, 256, 34, 60) 1024 b4_conv8_1[0][0]


b4_conv8_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 b4_conv8_1_bn[0][0]


b4_conv8_2 (Conv2D) (None, 512, 34, 60) 1179648 b4_conv8_1_lrelu[0][0]


b4_conv8_2_bn (BatchNormalizati (None, 512, 34, 60) 2048 b4_conv8_2[0][0]


b4_conv8_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 b4_conv8_2_bn[0][0]


b4_add8 (Add) (None, 512, 34, 60) 0 b4_add7[0][0]
b4_conv8_2_lrelu[0][0]


conv6 (Conv2D) (None, 1024, 17, 30) 4718592 b4_add8[0][0]


conv6_bn (BatchNormalization) (None, 1024, 17, 30) 4096 conv6[0][0]


conv6_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 conv6_bn[0][0]


b5_conv1_1 (Conv2D) (None, 512, 17, 30) 524288 conv6_lrelu[0][0]


b5_conv1_1_bn (BatchNormalizati (None, 512, 17, 30) 2048 b5_conv1_1[0][0]


b5_conv1_1_lrelu (LeakyReLU) (None, 512, 17, 30) 0 b5_conv1_1_bn[0][0]


b5_conv1_2 (Conv2D) (None, 1024, 17, 30) 4718592 b5_conv1_1_lrelu[0][0]


b5_conv1_2_bn (BatchNormalizati (None, 1024, 17, 30) 4096 b5_conv1_2[0][0]


b5_conv1_2_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 b5_conv1_2_bn[0][0]


b5_add1 (Add) (None, 1024, 17, 30) 0 conv6_lrelu[0][0]
b5_conv1_2_lrelu[0][0]


b5_conv2_1 (Conv2D) (None, 512, 17, 30) 524288 b5_add1[0][0]


b5_conv2_1_bn (BatchNormalizati (None, 512, 17, 30) 2048 b5_conv2_1[0][0]


b5_conv2_1_lrelu (LeakyReLU) (None, 512, 17, 30) 0 b5_conv2_1_bn[0][0]


b5_conv2_2 (Conv2D) (None, 1024, 17, 30) 4718592 b5_conv2_1_lrelu[0][0]


b5_conv2_2_bn (BatchNormalizati (None, 1024, 17, 30) 4096 b5_conv2_2[0][0]


b5_conv2_2_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 b5_conv2_2_bn[0][0]


b5_add2 (Add) (None, 1024, 17, 30) 0 b5_add1[0][0]
b5_conv2_2_lrelu[0][0]


b5_conv3_1 (Conv2D) (None, 512, 17, 30) 524288 b5_add2[0][0]


b5_conv3_1_bn (BatchNormalizati (None, 512, 17, 30) 2048 b5_conv3_1[0][0]


b5_conv3_1_lrelu (LeakyReLU) (None, 512, 17, 30) 0 b5_conv3_1_bn[0][0]


b5_conv3_2 (Conv2D) (None, 1024, 17, 30) 4718592 b5_conv3_1_lrelu[0][0]


b5_conv3_2_bn (BatchNormalizati (None, 1024, 17, 30) 4096 b5_conv3_2[0][0]


b5_conv3_2_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 b5_conv3_2_bn[0][0]


b5_add3 (Add) (None, 1024, 17, 30) 0 b5_add2[0][0]
b5_conv3_2_lrelu[0][0]


b5_conv4_1 (Conv2D) (None, 512, 17, 30) 524288 b5_add3[0][0]


b5_conv4_1_bn (BatchNormalizati (None, 512, 17, 30) 2048 b5_conv4_1[0][0]


b5_conv4_1_lrelu (LeakyReLU) (None, 512, 17, 30) 0 b5_conv4_1_bn[0][0]


b5_conv4_2 (Conv2D) (None, 1024, 17, 30) 4718592 b5_conv4_1_lrelu[0][0]


b5_conv4_2_bn (BatchNormalizati (None, 1024, 17, 30) 4096 b5_conv4_2[0][0]


b5_conv4_2_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 b5_conv4_2_bn[0][0]


b5_add4 (Add) (None, 1024, 17, 30) 0 b5_add3[0][0]
b5_conv4_2_lrelu[0][0]


yolo_spp_pool_1 (MaxPooling2D) (None, 1024, 17, 30) 0 b5_add4[0][0]


yolo_spp_pool_2 (MaxPooling2D) (None, 1024, 17, 30) 0 b5_add4[0][0]


yolo_spp_pool_3 (MaxPooling2D) (None, 1024, 17, 30) 0 b5_add4[0][0]


yolo_spp_concat (Concatenate) (None, 4096, 17, 30) 0 yolo_spp_pool_1[0][0]
yolo_spp_pool_2[0][0]
yolo_spp_pool_3[0][0]
b5_add4[0][0]


yolo_spp_conv (Conv2D) (None, 512, 17, 30) 2097152 yolo_spp_concat[0][0]


yolo_spp_conv_bn (BatchNormaliz (None, 512, 17, 30) 2048 yolo_spp_conv[0][0]


yolo_spp_conv_lrelu (LeakyReLU) (None, 512, 17, 30) 0 yolo_spp_conv_bn[0][0]


yolo_conv1_1 (Conv2D) (None, 512, 17, 30) 262144 yolo_spp_conv_lrelu[0][0]


yolo_conv1_1_bn (BatchNormaliza (None, 512, 17, 30) 2048 yolo_conv1_1[0][0]


yolo_conv1_1_lrelu (LeakyReLU) (None, 512, 17, 30) 0 yolo_conv1_1_bn[0][0]


yolo_conv1_2 (Conv2D) (None, 1024, 17, 30) 4718592 yolo_conv1_1_lrelu[0][0]


yolo_conv1_2_bn (BatchNormaliza (None, 1024, 17, 30) 4096 yolo_conv1_2[0][0]


yolo_conv1_2_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 yolo_conv1_2_bn[0][0]


yolo_conv1_3 (Conv2D) (None, 512, 17, 30) 524288 yolo_conv1_2_lrelu[0][0]


yolo_conv1_3_bn (BatchNormaliza (None, 512, 17, 30) 2048 yolo_conv1_3[0][0]


yolo_conv1_3_lrelu (LeakyReLU) (None, 512, 17, 30) 0 yolo_conv1_3_bn[0][0]


yolo_conv1_4 (Conv2D) (None, 1024, 17, 30) 4718592 yolo_conv1_3_lrelu[0][0]


yolo_conv1_4_bn (BatchNormaliza (None, 1024, 17, 30) 4096 yolo_conv1_4[0][0]


yolo_conv1_4_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 yolo_conv1_4_bn[0][0]


yolo_conv1_5 (Conv2D) (None, 512, 17, 30) 524288 yolo_conv1_4_lrelu[0][0]


yolo_conv1_5_bn (BatchNormaliza (None, 512, 17, 30) 2048 yolo_conv1_5[0][0]


yolo_conv1_5_lrelu (LeakyReLU) (None, 512, 17, 30) 0 yolo_conv1_5_bn[0][0]


yolo_conv2 (Conv2D) (None, 256, 17, 30) 131072 yolo_conv1_5_lrelu[0][0]


yolo_conv2_bn (BatchNormalizati (None, 256, 17, 30) 1024 yolo_conv2[0][0]


yolo_conv2_lrelu (LeakyReLU) (None, 256, 17, 30) 0 yolo_conv2_bn[0][0]


upsample0 (UpSampling2D) (None, 256, 34, 60) 0 yolo_conv2_lrelu[0][0]


concatenate_3 (Concatenate) (None, 768, 34, 60) 0 upsample0[0][0]
b4_add7[0][0]


yolo_conv3_1 (Conv2D) (None, 256, 34, 60) 196608 concatenate_3[0][0]


yolo_conv3_1_bn (BatchNormaliza (None, 256, 34, 60) 1024 yolo_conv3_1[0][0]


yolo_conv3_1_lrelu (LeakyReLU) (None, 256, 34, 60) 0 yolo_conv3_1_bn[0][0]


yolo_conv3_2 (Conv2D) (None, 512, 34, 60) 1179648 yolo_conv3_1_lrelu[0][0]


yolo_conv3_2_bn (BatchNormaliza (None, 512, 34, 60) 2048 yolo_conv3_2[0][0]


yolo_conv3_2_lrelu (LeakyReLU) (None, 512, 34, 60) 0 yolo_conv3_2_bn[0][0]


yolo_conv3_3 (Conv2D) (None, 256, 34, 60) 131072 yolo_conv3_2_lrelu[0][0]


yolo_conv3_3_bn (BatchNormaliza (None, 256, 34, 60) 1024 yolo_conv3_3[0][0]


yolo_conv3_3_lrelu (LeakyReLU) (None, 256, 34, 60) 0 yolo_conv3_3_bn[0][0]


yolo_conv3_4 (Conv2D) (None, 512, 34, 60) 1179648 yolo_conv3_3_lrelu[0][0]


yolo_conv3_4_bn (BatchNormaliza (None, 512, 34, 60) 2048 yolo_conv3_4[0][0]


yolo_conv3_4_lrelu (LeakyReLU) (None, 512, 34, 60) 0 yolo_conv3_4_bn[0][0]


yolo_conv3_5 (Conv2D) (None, 256, 34, 60) 131072 yolo_conv3_4_lrelu[0][0]


yolo_conv3_5_bn (BatchNormaliza (None, 256, 34, 60) 1024 yolo_conv3_5[0][0]


yolo_conv3_5_lrelu (LeakyReLU) (None, 256, 34, 60) 0 yolo_conv3_5_bn[0][0]


yolo_conv4 (Conv2D) (None, 128, 34, 60) 32768 yolo_conv3_5_lrelu[0][0]


yolo_conv4_bn (BatchNormalizati (None, 128, 34, 60) 512 yolo_conv4[0][0]


yolo_conv4_lrelu (LeakyReLU) (None, 128, 34, 60) 0 yolo_conv4_bn[0][0]


upsample1 (UpSampling2D) (None, 128, 68, 120) 0 yolo_conv4_lrelu[0][0]


concatenate_4 (Concatenate) (None, 384, 68, 120) 0 upsample1[0][0]
b3_add7[0][0]


yolo_conv5_1 (Conv2D) (None, 128, 68, 120) 49152 concatenate_4[0][0]


yolo_conv5_1_bn (BatchNormaliza (None, 128, 68, 120) 512 yolo_conv5_1[0][0]


yolo_conv5_1_lrelu (LeakyReLU) (None, 128, 68, 120) 0 yolo_conv5_1_bn[0][0]


yolo_conv5_2 (Conv2D) (None, 256, 68, 120) 294912 yolo_conv5_1_lrelu[0][0]


yolo_conv5_2_bn (BatchNormaliza (None, 256, 68, 120) 1024 yolo_conv5_2[0][0]


yolo_conv5_2_lrelu (LeakyReLU) (None, 256, 68, 120) 0 yolo_conv5_2_bn[0][0]


yolo_conv5_3 (Conv2D) (None, 128, 68, 120) 32768 yolo_conv5_2_lrelu[0][0]


yolo_conv5_3_bn (BatchNormaliza (None, 128, 68, 120) 512 yolo_conv5_3[0][0]


yolo_conv5_3_lrelu (LeakyReLU) (None, 128, 68, 120) 0 yolo_conv5_3_bn[0][0]


yolo_conv5_4 (Conv2D) (None, 256, 68, 120) 294912 yolo_conv5_3_lrelu[0][0]


yolo_conv5_4_bn (BatchNormaliza (None, 256, 68, 120) 1024 yolo_conv5_4[0][0]


yolo_conv5_4_lrelu (LeakyReLU) (None, 256, 68, 120) 0 yolo_conv5_4_bn[0][0]


yolo_conv5_5 (Conv2D) (None, 128, 68, 120) 32768 yolo_conv5_4_lrelu[0][0]


yolo_conv5_5_bn (BatchNormaliza (None, 128, 68, 120) 512 yolo_conv5_5[0][0]


yolo_conv5_5_lrelu (LeakyReLU) (None, 128, 68, 120) 0 yolo_conv5_5_bn[0][0]


yolo_conv1_6 (Conv2D) (None, 1024, 17, 30) 4718592 yolo_conv1_5_lrelu[0][0]


yolo_conv3_6 (Conv2D) (None, 512, 34, 60) 1179648 yolo_conv3_5_lrelu[0][0]


yolo_conv5_6 (Conv2D) (None, 256, 68, 120) 294912 yolo_conv5_5_lrelu[0][0]


yolo_conv1_6_bn (BatchNormaliza (None, 1024, 17, 30) 4096 yolo_conv1_6[0][0]


yolo_conv3_6_bn (BatchNormaliza (None, 512, 34, 60) 2048 yolo_conv3_6[0][0]


yolo_conv5_6_bn (BatchNormaliza (None, 256, 68, 120) 1024 yolo_conv5_6[0][0]


yolo_conv1_6_lrelu (LeakyReLU) (None, 1024, 17, 30) 0 yolo_conv1_6_bn[0][0]


yolo_conv3_6_lrelu (LeakyReLU) (None, 512, 34, 60) 0 yolo_conv3_6_bn[0][0]


yolo_conv5_6_lrelu (LeakyReLU) (None, 256, 68, 120) 0 yolo_conv5_6_bn[0][0]


conv_big_object (Conv2D) (None, 18, 17, 30) 18450 yolo_conv1_6_lrelu[0][0]


conv_mid_object (Conv2D) (None, 18, 34, 60) 9234 yolo_conv3_6_lrelu[0][0]


conv_sm_object (Conv2D) (None, 18, 68, 120) 4626 yolo_conv5_6_lrelu[0][0]


bg_permute (Permute) (None, 17, 30, 18) 0 conv_big_object[0][0]


md_permute (Permute) (None, 34, 60, 18) 0 conv_mid_object[0][0]


sm_permute (Permute) (None, 68, 120, 18) 0 conv_sm_object[0][0]


bg_reshape (Reshape) (None, 1530, 6) 0 bg_permute[0][0]


md_reshape (Reshape) (None, 6120, 6) 0 md_permute[0][0]


sm_reshape (Reshape) (None, 24480, 6) 0 sm_permute[0][0]


bg_anchor (YOLOAnchorBox) (None, 1530, 6) 0 conv_big_object[0][0]


bg_bbox_processor (BBoxPostProc (None, 1530, 6) 0 bg_reshape[0][0]


md_anchor (YOLOAnchorBox) (None, 6120, 6) 0 conv_mid_object[0][0]


md_bbox_processor (BBoxPostProc (None, 6120, 6) 0 md_reshape[0][0]


sm_anchor (YOLOAnchorBox) (None, 24480, 6) 0 conv_sm_object[0][0]


sm_bbox_processor (BBoxPostProc (None, 24480, 6) 0 sm_reshape[0][0]


encoded_bg (Concatenate) (None, 1530, 12) 0 bg_anchor[0][0]
bg_bbox_processor[0][0]


encoded_md (Concatenate) (None, 6120, 12) 0 md_anchor[0][0]
md_bbox_processor[0][0]


encoded_sm (Concatenate) (None, 24480, 12) 0 sm_anchor[0][0]
sm_bbox_processor[0][0]


encoded_detections (Concatenate (None, 32130, 12) 0 encoded_bg[0][0]
encoded_md[0][0]
encoded_sm[0][0]

Total params: 63,413,398
Trainable params: 63,359,766
Non-trainable params: 53,632


2021-06-17 19:17:37,591 [INFO] main: Number of images in the training dataset: 9356
Epoch 1/100
e7ed2085a8ec:450:984 [0] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:450:984 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
e7ed2085a8ec:450:984 [0] NCCL INFO NET/IB : No device found.
e7ed2085a8ec:450:984 [0] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:450:984 [0] NCCL INFO Using network Socket
NCCL version 2.7.8+cuda11.1
e7ed2085a8ec:451:978 [1] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:451:978 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
e7ed2085a8ec:453:981 [3] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:452:985 [2] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:453:981 [3] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
e7ed2085a8ec:452:985 [2] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
e7ed2085a8ec:451:978 [1] NCCL INFO NET/IB : No device found.
e7ed2085a8ec:453:981 [3] NCCL INFO NET/IB : No device found.
e7ed2085a8ec:452:985 [2] NCCL INFO NET/IB : No device found.
e7ed2085a8ec:451:978 [1] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:451:978 [1] NCCL INFO Using network Socket
e7ed2085a8ec:452:985 [2] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:452:985 [2] NCCL INFO Using network Socket
e7ed2085a8ec:453:981 [3] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.10<0>
e7ed2085a8ec:453:981 [3] NCCL INFO Using network Socket
e7ed2085a8ec:450:984 [0] NCCL INFO Channel 00/02 : 0 1 2 3
e7ed2085a8ec:450:984 [0] NCCL INFO Channel 01/02 : 0 1 2 3
e7ed2085a8ec:450:984 [0] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/64
e7ed2085a8ec:450:984 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1|-1->0->1/-1/-1 [1] 1/-1/-1->0->-1|-1->0->1/-1/-1
e7ed2085a8ec:451:978 [1] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/64
e7ed2085a8ec:451:978 [1] NCCL INFO Trees [0] 2/-1/-1->1->0|0->1->2/-1/-1 [1] 2/-1/-1->1->0|0->1->2/-1/-1
e7ed2085a8ec:451:978 [1] NCCL INFO Setting affinity for GPU 1 to 7fffffff,ffffffff,00000000,00000000,ffffffff,ffffffff,00000000,00000000
e7ed2085a8ec:452:985 [2] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/64
e7ed2085a8ec:452:985 [2] NCCL INFO Trees [0] 3/-1/-1->2->1|1->2->3/-1/-1 [1] 3/-1/-1->2->1|1->2->3/-1/-1
e7ed2085a8ec:452:985 [2] NCCL INFO Setting affinity for GPU 2 to 7fffffff,ffffffff,00000000,00000000,ffffffff,ffffffff,00000000,00000000
e7ed2085a8ec:453:981 [3] NCCL INFO threadThresholds 8/8/64 | 32/8/64 | 8/8/64
e7ed2085a8ec:453:981 [3] NCCL INFO Trees [0] -1/-1/-1->3->2|2->3->-1/-1/-1 [1] -1/-1/-1->3->2|2->3->-1/-1/-1
e7ed2085a8ec:453:981 [3] NCCL INFO Setting affinity for GPU 3 to 7fffffff,ffffffff,00000000,00000000,ffffffff,ffffffff,00000000,00000000
e7ed2085a8ec:450:984 [0] NCCL INFO Setting affinity for GPU 0 to ffffffff,ffffffff,00000000,00000000,ffffffff,ffffffff
e7ed2085a8ec:453:981 [3] NCCL INFO Channel 00 : 3[c3000] → 0[27000] via P2P/IPC
e7ed2085a8ec:451:978 [1] NCCL INFO Channel 00 : 1[83000] → 2[a3000] via P2P/IPC
e7ed2085a8ec:452:985 [2] NCCL INFO Channel 00 : 2[a3000] → 3[c3000] via P2P/IPC
e7ed2085a8ec:450:984 [0] NCCL INFO Channel 00 : 0[27000] → 1[83000] via P2P/IPC
e7ed2085a8ec:453:981 [3] NCCL INFO Channel 00 : 3[c3000] → 2[a3000] via P2P/IPC
e7ed2085a8ec:451:978 [1] NCCL INFO Channel 00 : 1[83000] → 0[27000] via P2P/IPC
e7ed2085a8ec:452:985 [2] NCCL INFO Channel 00 : 2[a3000] → 1[83000] via P2P/IPC
e7ed2085a8ec:453:981 [3] NCCL INFO Channel 01 : 3[c3000] → 0[27000] via P2P/IPC
e7ed2085a8ec:450:984 [0] NCCL INFO Channel 01 : 0[27000] → 1[83000] via P2P/IPC
e7ed2085a8ec:451:978 [1] NCCL INFO Channel 01 : 1[83000] → 2[a3000] via P2P/IPC
e7ed2085a8ec:452:985 [2] NCCL INFO Channel 01 : 2[a3000] → 3[c3000] via P2P/IPC
e7ed2085a8ec:453:981 [3] NCCL INFO Channel 01 : 3[c3000] → 2[a3000] via P2P/IPC
e7ed2085a8ec:451:978 [1] NCCL INFO Channel 01 : 1[83000] → 0[27000] via P2P/IPC
e7ed2085a8ec:453:981 [3] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
e7ed2085a8ec:453:981 [3] NCCL INFO comm 0x7f5c10430aa0 rank 3 nranks 4 cudaDev 3 busId c3000 - Init COMPLETE
e7ed2085a8ec:450:984 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
e7ed2085a8ec:452:985 [2] NCCL INFO Channel 01 : 2[a3000] → 1[83000] via P2P/IPC
e7ed2085a8ec:450:984 [0] NCCL INFO comm 0x7f37b7099cf0 rank 0 nranks 4 cudaDev 0 busId 27000 - Init COMPLETE
e7ed2085a8ec:451:978 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
e7ed2085a8ec:452:985 [2] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
e7ed2085a8ec:451:978 [1] NCCL INFO comm 0x7fb884430d10 rank 1 nranks 4 cudaDev 1 busId 83000 - Init COMPLETE
e7ed2085a8ec:452:985 [2] NCCL INFO comm 0x7f8127098aa0 rank 2 nranks 4 cudaDev 2 busId a3000 - Init COMPLETE
e7ed2085a8ec:450:984 [0] NCCL INFO Launch mode Parallel

It has got stuck here…

To narrow down, could you try to train with one gpu?

The problem still prevails

You are running on your host PC, right? Which dgpus? Can you run nvidia-smi and share the results?

so, I was able to run it on 1 gpu on a different machine

It starts to work but it is very unstable as sometimes it gets stuck…so what can be the reason

One more issue, it does not seem to work on multi GPUs …what can be the problem there too??

Do you mean nvidia-smi cannot work on your host PC?

No no NVIDIA-SMI is working. What I am saying is that yolov4 is running on 1 GPU. But it is very unstable as it gets stuck if I try to run it on multi GPUs. So is there any solution to that because the training is slow on 1 GPU and is it possible to train on 3 GPUs?

Sorry, I get confused. See above comments, you mentioned that “The problem still prevails” with one gpu.
Do you mean it cannot run with 1gpu , right? But just now you mention that “yolov4 is running on 1 GPU”.
Could you elaborate more?

sorry for that…It wasn’t working on 1 GPU either. Then I shutdown the notebook and ran it again so it started running. Now I am asking that how can we run it on more gpus? The training is too slow on 1 GPU

So, do you mean 1 gpu training will not “get stuck”, right?

yes

The error while running on 2 GPUs

Traceback (most recent call last):
File “/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py”, line 209, in
File “/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py”, line 205, in main
File “/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/scripts/train.py”, line 162, in run_experiment
File “/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py”, line 91, in wrapper
return func(*args, **kwargs)
File “/usr/local/lib/python3.6/dist-packages/keras/engine/training.py”, line 1418, in fit_generator
initial_epoch=initial_epoch)
File “/usr/local/lib/python3.6/dist-packages/keras/engine/training_generator.py”, line 217, in fit_generator
class_weight=class_weight)
File “/usr/local/lib/python3.6/dist-packages/keras/engine/training.py”, line 1217, in train_on_batch
outputs = self.train_function(ins)
File “/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py”, line 2715, in call
return self._call(inputs)
File “/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py”, line 2675, in _call
fetched = self._callable_fn(*array_vals)
File “/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/client/session.py”, line 1472, in call
run_metadata_ptr)
tensorflow.python.framework.errors_impl.ResourceExhaustedError: 2 root error(s) found.
(0) Resource exhausted: OOM when allocating tensor with shape[256,546,962] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node conv2_1/Pad}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

 [[loss_1/add_75/_7817]]

Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

(1) Resource exhausted: OOM when allocating tensor with shape[256,546,962] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[{{node conv2_1/Pad}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored.

Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.


mpirun.real detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:

Process name: [[37614,1],0]
Exit code: 1

Traceback (most recent call last):
File “/usr/local/bin/yolo_v4”, line 8, in
sys.exit(main())
File “/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/yolo_v4/entrypoint/yolo_v4.py”, line 12, in main
File “/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/common/entrypoint/entrypoint.py”, line 296, in launch_job
AssertionError: Process run failed.

For above 2gpus training log, it is OOM error. Can you try to decrease batch-size or output_wight/height and retry?

It gets stuck after decreasing the batch size

062788b81554:8571:8836 [0] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.3<0>
062788b81554:8571:8836 [0] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
062788b81554:8571:8836 [0] NCCL INFO NET/IB : No device found.
062788b81554:8571:8836 [0] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.3<0>
062788b81554:8571:8836 [0] NCCL INFO Using network Socket
NCCL version 2.7.8+cuda11.1
062788b81554:8572:8835 [1] NCCL INFO Bootstrap : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.3<0>
062788b81554:8572:8835 [1] NCCL INFO NET/Plugin : No plugin found (libnccl-net.so), using internal implementation
062788b81554:8572:8835 [1] NCCL INFO NET/IB : No device found.
062788b81554:8572:8835 [1] NCCL INFO NET/Socket : Using [0]lo:127.0.0.1<0> [1]eth0:172.17.0.3<0>
062788b81554:8572:8835 [1] NCCL INFO Using network Socket
062788b81554:8571:8836 [0] NCCL INFO Channel 00/02 : 0 1
062788b81554:8571:8836 [0] NCCL INFO Channel 01/02 : 0 1
062788b81554:8571:8836 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/64
062788b81554:8572:8835 [1] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 8/8/64
062788b81554:8572:8835 [1] NCCL INFO Trees [0] -1/-1/-1->1->0|0->1->-1/-1/-1 [1] -1/-1/-1->1->0|0->1->-1/-1/-1
062788b81554:8572:8835 [1] NCCL INFO Setting affinity for GPU 1 to 7fffffff,ffffffff,00000000,00000000,ffffffff,ffffffff,00000000,00000000
062788b81554:8571:8836 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1|-1->0->1/-1/-1 [1] 1/-1/-1->0->-1|-1->0->1/-1/-1
062788b81554:8571:8836 [0] NCCL INFO Setting affinity for GPU 0 to ffffffff,ffffffff,00000000,00000000,ffffffff,ffffffff
062788b81554:8572:8835 [1] NCCL INFO Channel 00 : 1[83000] → 0[27000] via P2P/IPC
062788b81554:8571:8836 [0] NCCL INFO Channel 00 : 0[27000] → 1[83000] via P2P/IPC
062788b81554:8572:8835 [1] NCCL INFO Channel 01 : 1[83000] → 0[27000] via P2P/IPC
062788b81554:8571:8836 [0] NCCL INFO Channel 01 : 0[27000] → 1[83000] via P2P/IPC
062788b81554:8572:8835 [1] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
062788b81554:8572:8835 [1] NCCL INFO comm 0x7f1843095800 rank 1 nranks 2 cudaDev 1 busId 83000 - Init COMPLETE
062788b81554:8571:8836 [0] NCCL INFO 2 coll channels, 2 p2p channels, 2 p2p channels per peer
062788b81554:8571:8836 [0] NCCL INFO comm 0x7f807b095d00 rank 0 nranks 2 cudaDev 0 busId 27000 - Init COMPLETE
062788b81554:8571:8836 [0] NCCL INFO Launch mode Parallel

Can you share full command line and the full training log as a txt file?

training text file

random_seed: 42
yolov4_config {
big_anchor_shape: “[(60.00, 155.00), (103.00, 160.00), (185.00, 217.00)]”
mid_anchor_shape: “[(32.00, 108.00), (46.00, 108.00), (78.00, 94.00)]”
small_anchor_shape: “[(30.00, 41.00), (33.00, 65.00), (54.00, 62.00)]”
box_matching_iou: 0.25
arch: “darknet”
nlayers: 53
arch_conv_blocks: 2
loss_loc_weight: 0.8
loss_neg_obj_weights: 100.0
loss_class_weights: 0.5
label_smoothing: 0.0
big_grid_xy_extend: 0.05
mid_grid_xy_extend: 0.1
small_grid_xy_extend: 0.2
freeze_bn: false
#freeze_blocks: 0
force_relu: false
}
training_config {
batch_size_per_gpu: 4
num_epochs: 100
enable_qat: false
checkpoint_interval: 10
learning_rate {
soft_start_cosine_annealing_schedule {
min_learning_rate: 1e-7
max_learning_rate: 1e-4
soft_start: 0.3
}
}
regularizer {
type: L1
weight: 3e-5
}
optimizer {
adam {
epsilon: 1e-7
beta1: 0.9
beta2: 0.999
amsgrad: false
}
}
pretrain_model_path: “/workspace/examples/yolo_v4/pretrained_darknet53/tlt_pretrained_object_detection_vdarknet53/darknet_53.hdf5”
}
eval_config {
average_precision_mode: SAMPLE
batch_size: 8
matching_iou_threshold: 0.5
}
nms_config {
confidence_threshold: 0.001
clustering_iou_threshold: 0.5
top_k: 200
}
augmentation_config {
hue: 0.1
saturation: 1.5
exposure:1.5
vertical_flip:0
horizontal_flip: 0.5
jitter: 0.3
output_width: 960
output_height: 544
randomize_input_shape_period: 0
mosaic_prob: 0.5
mosaic_min_ratio:0.2
}
dataset_config {
data_sources: {
label_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/training_labels”
image_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/training”
}
include_difficult_in_training: true
target_class_mapping {
key: “person”
value: “person”
}

validation_data_sources: {
label_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/testing_labels”
image_directory_path: “/workspace/examples/yolo_v4/DATA_DOWNLOAD_DIR/testing”
}
}

command

print(“To run with multigpu, please change --gpus based on the number of available GPUs in your machine.”)
!yolo_v4 train -e $SPECS_DIR/yolo_v4_train_darknet53_kitti.txt
-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned
-k $KEY
–gpus 2

Can you paste the result of running below
$ nvidia-smi
$ nvidia-smi -L