Hi, I’m training a detectnet_v2 model and I have a bad performance, even though loss is very small. Which is the relation between loss (near to 0.00002) and mAP (near to 11%)?
My dataset is made from an original dataset of 3k images and augmented with rotations and dilatations, for a grand total of 15k images.
specs/detectnet_v2_tfrecords_kitti_trainval.txt:
kitti_config {
root_directory_path: "/workspace/tlt-experiments/210823/data/train"
image_dir_name: "images"
label_dir_name: "labels"
image_extension: ".png"
#partition_mode: "sequence" # need kitti_sequence_to_frames_file
partition_mode: "random"
num_partitions: 2 # use 2 to create train-valid partition. Modify "validation_fold: 0" in train_specs.txt to select the pastition fold used for validation.
val_split: 1
num_shards: 10
#kitti_sequence_to_frames_file: "kstff.txt"
}
# For most cases, this will be the same as the root_directory_path. If
# for some reason the images are in a different directory, then
# the images will be dereferenced as
# image_directory_path/image_dir_name/<xxxx><image_extension>
image_directory_path: "/workspace/tlt-experiments/210823/data/train"
specs/detectnet_v2_tfrecords_kitti_validval.txt: analogue to trainval
specs/detectnet_v2_tfrecords_kitti_testval.txt: analogue to trainval
specs/detectnet_v2_trainval_resnet18_kitti.txt:
random_seed: 42
dataset_config {
data_sources {
tfrecords_path: "/workspace/tlt-experiments/210823/data/tfrecords/kitti_trainval/*"
image_directory_path: "/workspace/tlt-experiments/210823/data/train"
}
image_extension: "png"
target_class_mapping {
key: "car"
value: "car"
}
target_class_mapping {
key: "cyclist"
value: "cycle"
}
target_class_mapping {
key: "motorcycle"
value: "cycle"
}
target_class_mapping {
key: "cycle"
value: "cycle"
}
target_class_mapping {
key: "pedestrian"
value: "pedestrian"
}
target_class_mapping {
key: "pedestrians"
value: "pedestrian"
}
target_class_mapping {
key: "person_sitting"
value: "pedestrian"
}
target_class_mapping {
key: "person"
value: "pedestrian"
}
target_class_mapping {
key: "girl"
value: "pedestrian"
}
target_class_mapping {
key: "boy"
value: "pedestrian"
}
target_class_mapping {
key: "van"
value: "truck"
}
target_class_mapping {
key: "truck"
value: "truck"
}
target_class_mapping {
key: "bus"
value: "truck"
}
# validation_fold: 0 #use together with random partition of tfrecords
validation_data_source: {
tfrecords_path: "/workspace/tlt-experiments/210823/data/tfrecords/kitti_validval/*"
image_directory_path: "/workspace/tlt-experiments/210823/data/valid"
}
}
augmentation_config {
preprocessing {
output_image_width: 1280
output_image_height: 720
min_bbox_width: 1.0
min_bbox_height: 1.0
output_image_channel: 3
}
spatial_augmentation {
hflip_probability: 1
zoom_min: 0.9
zoom_max: 1.2
translate_max_x: 8.0
translate_max_y: 8.0
}
color_augmentation {
hue_rotation_max: 25.0
saturation_shift_max: 0.20000000298
contrast_scale_max: 0.10000000149
contrast_center: 0.5
}
}
postprocessing_config {
target_class_config {
key: "car"
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.20000000298
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 10
}
}
}
target_class_config {
key: "cycle"
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 10
}
}
}
target_class_config {
key: "pedestrian"
value {
clustering_config {
coverage_threshold: 0.00749999983236
dbscan_eps: 0.230000004172
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 10
}
}
}
target_class_config {
key: "truck"
value {
clustering_config {
coverage_threshold: 0.00499999988824
dbscan_eps: 0.15000000596
dbscan_min_samples: 0.0500000007451
minimum_bounding_box_height: 10
}
}
}
}
model_config {
pretrained_model_file: "/workspace/tlt-experiments/210823/pretrained_resnet18/tlt_pretrained_detectnet_v2_vresnet18/resnet18.hdf5"
num_layers: 18
use_batch_norm: true
objective_set {
bbox {
scale: 35.0
offset: 0.5
}
cov {
}
}
training_precision {
backend_floatx: FLOAT32
}
arch: "resnet"
}
evaluation_config {
validation_period_during_training: 10
first_validation_epoch: 30
minimum_detection_ground_truth_overlap {
key: "car"
value: 0.699999988079
}
minimum_detection_ground_truth_overlap {
key: "cycle"
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: "pedestrian"
value: 0.5
}
minimum_detection_ground_truth_overlap {
key: "truck"
value: 0.5
}
evaluation_box_config {
key: "car"
value {
minimum_height: 10
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: "cycle"
value {
minimum_height: 10
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: "pedestrian"
value {
minimum_height: 10
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
evaluation_box_config {
key: "truck"
value {
minimum_height: 10
maximum_height: 9999
minimum_width: 10
maximum_width: 9999
}
}
average_precision_mode: INTEGRATE
}
cost_function_config {
target_classes {
name: "car"
class_weight: 1.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "cycle"
class_weight: 8.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 1.0
}
}
target_classes {
name: "pedestrian"
class_weight: 4.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 10.0
}
}
target_classes {
name: "truck"
class_weight: 8.0
coverage_foreground_weight: 0.0500000007451
objectives {
name: "cov"
initial_weight: 1.0
weight_target: 1.0
}
objectives {
name: "bbox"
initial_weight: 10.0
weight_target: 1.0
}
}
enable_autoweighting: true
max_objective_weight: 0.999899983406
min_objective_weight: 9.99999974738e-05
}
training_config {
batch_size_per_gpu: 4
num_epochs: 120
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 5e-04
soft_start: 0.10000000149
annealing: 0.699999988079
}
}
regularizer {
type: L1
weight: 3.00000002618e-09
}
optimizer {
adam {
epsilon: 9.99999993923e-09
beta1: 0.899999976158
beta2: 0.999000012875
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
bbox_rasterizer_config {
target_class_config {
key: "car"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 0.40000000596
cov_radius_y: 0.40000000596
bbox_min_radius: 1.0
}
}
target_class_config {
key: "cycle"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: "pedestrian"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: "cycle"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
target_class_config {
key: "truck"
value {
cov_center_x: 0.5
cov_center_y: 0.5
cov_radius_x: 1.0
cov_radius_y: 1.0
bbox_min_radius: 1.0
}
}
deadzone_radius: 0.400000154972
}
Command:
!tlt-train detectnet_v2 -e $SPECS_DIR/detectnet_v2_train_resnet18_kitti.txt
-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned
-k $KEY
-n resnet18_detector
–gpus 1
Log:
Using TensorFlow backend.
2021-08-27 06:42:35.521520: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-27 06:42:46.568050: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-08-27 06:42:46.720579: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties:
name: GeForce RTX 2070 SUPER major: 7 minor: 5 memoryClockRate(GHz): 1.785
pciBusID: 0000:41:00.0
2021-08-27 06:42:46.720647: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-27 06:42:46.720723: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-27 06:42:46.817061: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2021-08-27 06:42:46.862962: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2021-08-27 06:42:47.057800: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2021-08-27 06:42:47.189880: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2021-08-27 06:42:47.190064: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2021-08-27 06:42:47.191902: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1746] Adding visible gpu devices: 0
2021-08-27 06:42:47.202330: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-27 06:42:56.503019: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-08-27 06:42:56.503075: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165] 0
2021-08-27 06:42:56.503088: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1178] 0: N
2021-08-27 06:42:56.506055: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6853 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2070 SUPER, pci bus id: 0000:41:00.0, compute capability: 7.5)
2021-08-27 06:42:56,539 [INFO] iva.detectnet_v2.scripts.train: Loading experiment spec at /workspace/tlt-experiments/210823/specs/detectnet_v2_train_resnet18_kitti.txt.
2021-08-27 06:42:56,541 [INFO] iva.detectnet_v2.spec_handler.spec_loader: Merging specification from /workspace/tlt-experiments/210823/specs/detectnet_v2_train_resnet18_kitti.txt
2021-08-27 06:42:58,048 [INFO] iva.detectnet_v2.scripts.train: Cannot iterate over exactly 12170 samples with a batch size of 4; each epoch will therefore take one extra step.
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 3, 720, 1280) 0
__________________________________________________________________________________________________
conv1 (Conv2D) (None, 64, 360, 640) 9472 input_1[0][0]
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization) (None, 64, 360, 640) 256 conv1[0][0]
__________________________________________________________________________________________________
activation_1 (Activation) (None, 64, 360, 640) 0 bn_conv1[0][0]
__________________________________________________________________________________________________
block_1a_conv_1 (Conv2D) (None, 64, 180, 320) 36928 activation_1[0][0]
__________________________________________________________________________________________________
block_1a_bn_1 (BatchNormalizati (None, 64, 180, 320) 256 block_1a_conv_1[0][0]
__________________________________________________________________________________________________
block_1a_relu_1 (Activation) (None, 64, 180, 320) 0 block_1a_bn_1[0][0]
__________________________________________________________________________________________________
block_1a_conv_2 (Conv2D) (None, 64, 180, 320) 36928 block_1a_relu_1[0][0]
__________________________________________________________________________________________________
block_1a_conv_shortcut (Conv2D) (None, 64, 180, 320) 4160 activation_1[0][0]
__________________________________________________________________________________________________
block_1a_bn_2 (BatchNormalizati (None, 64, 180, 320) 256 block_1a_conv_2[0][0]
__________________________________________________________________________________________________
block_1a_bn_shortcut (BatchNorm (None, 64, 180, 320) 256 block_1a_conv_shortcut[0][0]
__________________________________________________________________________________________________
add_1 (Add) (None, 64, 180, 320) 0 block_1a_bn_2[0][0]
block_1a_bn_shortcut[0][0]
__________________________________________________________________________________________________
block_1a_relu (Activation) (None, 64, 180, 320) 0 add_1[0][0]
__________________________________________________________________________________________________
block_1b_conv_1 (Conv2D) (None, 64, 180, 320) 36928 block_1a_relu[0][0]
__________________________________________________________________________________________________
block_1b_bn_1 (BatchNormalizati (None, 64, 180, 320) 256 block_1b_conv_1[0][0]
__________________________________________________________________________________________________
block_1b_relu_1 (Activation) (None, 64, 180, 320) 0 block_1b_bn_1[0][0]
__________________________________________________________________________________________________
block_1b_conv_2 (Conv2D) (None, 64, 180, 320) 36928 block_1b_relu_1[0][0]
__________________________________________________________________________________________________
block_1b_bn_2 (BatchNormalizati (None, 64, 180, 320) 256 block_1b_conv_2[0][0]
__________________________________________________________________________________________________
add_2 (Add) (None, 64, 180, 320) 0 block_1b_bn_2[0][0]
block_1a_relu[0][0]
__________________________________________________________________________________________________
block_1b_relu (Activation) (None, 64, 180, 320) 0 add_2[0][0]
__________________________________________________________________________________________________
block_2a_conv_1 (Conv2D) (None, 128, 90, 160) 73856 block_1b_relu[0][0]
__________________________________________________________________________________________________
block_2a_bn_1 (BatchNormalizati (None, 128, 90, 160) 512 block_2a_conv_1[0][0]
__________________________________________________________________________________________________
block_2a_relu_1 (Activation) (None, 128, 90, 160) 0 block_2a_bn_1[0][0]
__________________________________________________________________________________________________
block_2a_conv_2 (Conv2D) (None, 128, 90, 160) 147584 block_2a_relu_1[0][0]
__________________________________________________________________________________________________
block_2a_conv_shortcut (Conv2D) (None, 128, 90, 160) 8320 block_1b_relu[0][0]
__________________________________________________________________________________________________
block_2a_bn_2 (BatchNormalizati (None, 128, 90, 160) 512 block_2a_conv_2[0][0]
__________________________________________________________________________________________________
block_2a_bn_shortcut (BatchNorm (None, 128, 90, 160) 512 block_2a_conv_shortcut[0][0]
__________________________________________________________________________________________________
add_3 (Add) (None, 128, 90, 160) 0 block_2a_bn_2[0][0]
block_2a_bn_shortcut[0][0]
__________________________________________________________________________________________________
block_2a_relu (Activation) (None, 128, 90, 160) 0 add_3[0][0]
__________________________________________________________________________________________________
block_2b_conv_1 (Conv2D) (None, 128, 90, 160) 147584 block_2a_relu[0][0]
__________________________________________________________________________________________________
block_2b_bn_1 (BatchNormalizati (None, 128, 90, 160) 512 block_2b_conv_1[0][0]
__________________________________________________________________________________________________
block_2b_relu_1 (Activation) (None, 128, 90, 160) 0 block_2b_bn_1[0][0]
__________________________________________________________________________________________________
block_2b_conv_2 (Conv2D) (None, 128, 90, 160) 147584 block_2b_relu_1[0][0]
__________________________________________________________________________________________________
block_2b_bn_2 (BatchNormalizati (None, 128, 90, 160) 512 block_2b_conv_2[0][0]
__________________________________________________________________________________________________
add_4 (Add) (None, 128, 90, 160) 0 block_2b_bn_2[0][0]
block_2a_relu[0][0]
__________________________________________________________________________________________________
block_2b_relu (Activation) (None, 128, 90, 160) 0 add_4[0][0]
__________________________________________________________________________________________________
block_3a_conv_1 (Conv2D) (None, 256, 45, 80) 295168 block_2b_relu[0][0]
__________________________________________________________________________________________________
block_3a_bn_1 (BatchNormalizati (None, 256, 45, 80) 1024 block_3a_conv_1[0][0]
__________________________________________________________________________________________________
block_3a_relu_1 (Activation) (None, 256, 45, 80) 0 block_3a_bn_1[0][0]
__________________________________________________________________________________________________
block_3a_conv_2 (Conv2D) (None, 256, 45, 80) 590080 block_3a_relu_1[0][0]
__________________________________________________________________________________________________
block_3a_conv_shortcut (Conv2D) (None, 256, 45, 80) 33024 block_2b_relu[0][0]
__________________________________________________________________________________________________
block_3a_bn_2 (BatchNormalizati (None, 256, 45, 80) 1024 block_3a_conv_2[0][0]
__________________________________________________________________________________________________
block_3a_bn_shortcut (BatchNorm (None, 256, 45, 80) 1024 block_3a_conv_shortcut[0][0]
__________________________________________________________________________________________________
add_5 (Add) (None, 256, 45, 80) 0 block_3a_bn_2[0][0]
block_3a_bn_shortcut[0][0]
__________________________________________________________________________________________________
block_3a_relu (Activation) (None, 256, 45, 80) 0 add_5[0][0]
__________________________________________________________________________________________________
block_3b_conv_1 (Conv2D) (None, 256, 45, 80) 590080 block_3a_relu[0][0]
__________________________________________________________________________________________________
block_3b_bn_1 (BatchNormalizati (None, 256, 45, 80) 1024 block_3b_conv_1[0][0]
__________________________________________________________________________________________________
block_3b_relu_1 (Activation) (None, 256, 45, 80) 0 block_3b_bn_1[0][0]
__________________________________________________________________________________________________
block_3b_conv_2 (Conv2D) (None, 256, 45, 80) 590080 block_3b_relu_1[0][0]
__________________________________________________________________________________________________
block_3b_bn_2 (BatchNormalizati (None, 256, 45, 80) 1024 block_3b_conv_2[0][0]
__________________________________________________________________________________________________
add_6 (Add) (None, 256, 45, 80) 0 block_3b_bn_2[0][0]
block_3a_relu[0][0]
__________________________________________________________________________________________________
block_3b_relu (Activation) (None, 256, 45, 80) 0 add_6[0][0]
__________________________________________________________________________________________________
block_4a_conv_1 (Conv2D) (None, 512, 45, 80) 1180160 block_3b_relu[0][0]
__________________________________________________________________________________________________
block_4a_bn_1 (BatchNormalizati (None, 512, 45, 80) 2048 block_4a_conv_1[0][0]
__________________________________________________________________________________________________
block_4a_relu_1 (Activation) (None, 512, 45, 80) 0 block_4a_bn_1[0][0]
__________________________________________________________________________________________________
block_4a_conv_2 (Conv2D) (None, 512, 45, 80) 2359808 block_4a_relu_1[0][0]
__________________________________________________________________________________________________
block_4a_conv_shortcut (Conv2D) (None, 512, 45, 80) 131584 block_3b_relu[0][0]
__________________________________________________________________________________________________
block_4a_bn_2 (BatchNormalizati (None, 512, 45, 80) 2048 block_4a_conv_2[0][0]
__________________________________________________________________________________________________
block_4a_bn_shortcut (BatchNorm (None, 512, 45, 80) 2048 block_4a_conv_shortcut[0][0]
__________________________________________________________________________________________________
add_7 (Add) (None, 512, 45, 80) 0 block_4a_bn_2[0][0]
block_4a_bn_shortcut[0][0]
__________________________________________________________________________________________________
block_4a_relu (Activation) (None, 512, 45, 80) 0 add_7[0][0]
__________________________________________________________________________________________________
block_4b_conv_1 (Conv2D) (None, 512, 45, 80) 2359808 block_4a_relu[0][0]
__________________________________________________________________________________________________
block_4b_bn_1 (BatchNormalizati (None, 512, 45, 80) 2048 block_4b_conv_1[0][0]
__________________________________________________________________________________________________
block_4b_relu_1 (Activation) (None, 512, 45, 80) 0 block_4b_bn_1[0][0]
__________________________________________________________________________________________________
block_4b_conv_2 (Conv2D) (None, 512, 45, 80) 2359808 block_4b_relu_1[0][0]
__________________________________________________________________________________________________
block_4b_bn_2 (BatchNormalizati (None, 512, 45, 80) 2048 block_4b_conv_2[0][0]
__________________________________________________________________________________________________
add_8 (Add) (None, 512, 45, 80) 0 block_4b_bn_2[0][0]
block_4a_relu[0][0]
__________________________________________________________________________________________________
block_4b_relu (Activation) (None, 512, 45, 80) 0 add_8[0][0]
__________________________________________________________________________________________________
output_bbox (Conv2D) (None, 16, 45, 80) 8208 block_4b_relu[0][0]
__________________________________________________________________________________________________
output_cov (Conv2D) (None, 4, 45, 80) 2052 block_4b_relu[0][0]
==================================================================================================
Total params: 11,205,588
Trainable params: 11,195,860
Non-trainable params: 9,728
__________________________________________________________________________________________________
2021-08-27 06:43:06,657 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Serial augmentation enabled = False
2021-08-27 06:43:06,657 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Pseudo sharding enabled = False
2021-08-27 06:43:06,657 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Max Image Dimensions (all sources): (0, 0)
2021-08-27 06:43:06,657 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: number of cpus: 24, io threads: 48, compute threads: 24, buffered batches: 4
2021-08-27 06:43:06,657 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: total dataset size 12170, number of sources: 1, batch size per gpu: 4, steps: 3043
2021-08-27 06:43:06,782 [INFO] iva.detectnet_v2.dataloader.default_dataloader: Bounding box coordinates were detected in the input specification! Bboxes will be automatically converted to polygon coordinates.
2021-08-27 06:43:06.842958: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties:
name: GeForce RTX 2070 SUPER major: 7 minor: 5 memoryClockRate(GHz): 1.785
pciBusID: 0000:41:00.0
2021-08-27 06:43:06.843004: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-27 06:43:06.843049: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-27 06:43:06.843081: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2021-08-27 06:43:06.843104: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2021-08-27 06:43:06.843124: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2021-08-27 06:43:06.843144: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2021-08-27 06:43:06.843161: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2021-08-27 06:43:06.844017: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1746] Adding visible gpu devices: 0
2021-08-27 06:43:07,029 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: shuffle: True - shard 0 of 1
2021-08-27 06:43:07,034 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: sampling 1 datasets with weights:
2021-08-27 06:43:07,035 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: source: 0 weight: 1.000000
2021-08-27 06:43:07,592 [INFO] iva.detectnet_v2.scripts.train: Found 12170 samples in training set
2021-08-27 06:43:09,798 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Serial augmentation enabled = False
2021-08-27 06:43:09,798 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Pseudo sharding enabled = False
2021-08-27 06:43:09,798 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Max Image Dimensions (all sources): (0, 0)
2021-08-27 06:43:09,798 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: number of cpus: 24, io threads: 48, compute threads: 24, buffered batches: 4
2021-08-27 06:43:09,798 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: total dataset size 1390, number of sources: 1, batch size per gpu: 4, steps: 348
2021-08-27 06:43:09,824 [INFO] iva.detectnet_v2.dataloader.default_dataloader: Bounding box coordinates were detected in the input specification! Bboxes will be automatically converted to polygon coordinates.
2021-08-27 06:43:10,032 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: shuffle: False - shard 0 of 1
2021-08-27 06:43:10,038 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: sampling 1 datasets with weights:
2021-08-27 06:43:10,038 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: source: 0 weight: 1.000000
2021-08-27 06:43:10,318 [INFO] iva.detectnet_v2.scripts.train: Found 1390 samples in validation set
2021-08-27 06:43:15.778936: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties:
name: GeForce RTX 2070 SUPER major: 7 minor: 5 memoryClockRate(GHz): 1.785
pciBusID: 0000:41:00.0
2021-08-27 06:43:15.778999: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-27 06:43:15.779070: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-27 06:43:15.779104: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2021-08-27 06:43:15.779127: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2021-08-27 06:43:15.779145: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2021-08-27 06:43:15.779164: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10.0
2021-08-27 06:43:15.779179: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2021-08-27 06:43:15.780011: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1746] Adding visible gpu devices: 0
2021-08-27 06:43:16.375182: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1159] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-08-27 06:43:16.375215: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1165] 0
2021-08-27 06:43:16.375226: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1178] 0: N
2021-08-27 06:43:16.376290: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6853 MB memory) -> physical GPU (device: 0, name: GeForce RTX 2070 SUPER, pci bus id: 0000:41:00.0, compute capability: 7.5)
2021-08-27 06:43:41.677393: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-27 06:43:43.724811: I tensorflow/core/kernels/cuda_solvers.cc:159] Creating CudaSolver handles for stream 0x82475d0
2021-08-27 06:43:43.725033: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10.0
2021-08-27 06:43:45.422712: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-27 06:43:45.490020: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2021-08-27 06:43:53,209 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 0.292
2021-08-27 06:44:01,523 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 4.545
2021-08-27 06:44:06,603 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.689
2021-08-27 06:44:11,691 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.653
2021-08-27 06:44:16,781 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.651
...
2021-08-27 06:53:59,447 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.680
2021-08-27 06:54:04,549 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.604
2021-08-27 06:54:06,398 [INFO] /usr/local/lib/python3.6/dist-packages/modulus/hooks/task_progress_monitor_hook.pyc: Epoch 101/120: loss: 0.00006 Time taken: 0:10:54.011939 ETA: 3:27:06.226842
2021-08-27 06:54:09,663 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.552
2021-08-27 06:54:14,750 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.660
...
2021-08-27 07:04:15,721 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.584
2021-08-27 07:04:20,798 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.698
2021-08-27 07:04:25,885 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.659
2021-08-27 07:04:26,292 [INFO] /usr/local/lib/python3.6/dist-packages/modulus/hooks/task_progress_monitor_hook.pyc: Epoch 102/120: loss: 0.00002 Time taken: 0:10:19.906578 ETA: 3:05:58.318401
2021-08-27 07:04:30,989 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.594
...
2021-08-27 07:14:36,781 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.661
2021-08-27 07:14:41,862 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.681
2021-08-27 07:14:45,926 [INFO] /usr/local/lib/python3.6/dist-packages/modulus/hooks/task_progress_monitor_hook.pyc: Epoch 103/120: loss: 0.00002 Time taken: 0:10:19.635089 ETA: 2:55:33.796516
2021-08-27 07:14:46,946 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.672
2021-08-27 07:14:52,043 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.618
...
2021-08-27 09:59:52,822 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.673
2021-08-27 09:59:57,899 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.696
2021-08-27 09:59:59,524 [INFO] /usr/local/lib/python3.6/dist-packages/modulus/hooks/task_progress_monitor_hook.pyc: Epoch 119/120: loss: 0.00003 Time taken: 0:10:19.345579 ETA: 0:10:19.345579
2021-08-27 10:00:02,983 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.673
2021-08-27 10:00:08,067 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.668
...
2021-08-27 10:10:18,533 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.661
2021-08-27 10:10:23,179 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 19.661
Time taken to run iva.detectnet_v2.scripts.train:main: 3:27:37.173935.
Validatin log
Validation cost: 0.000193
Mean average_precision (in %): 11.9801
class name average precision (in %)
------------ --------------------------
car 33.8977
cycle 5.16111
pedestrian 0.17493
truck 8.68683
Median Inference Time: 0.018317
2021-08-27 10:17:21,151 [INFO] iva.detectnet_v2.scripts.evaluate: Evaluation complete.
Thanks for any help