root@cc8b63e0b034:/workspace/src/openmpi-4.1.5# export NCCL_P2P_LEVEL=NVL root@cc8b63e0b034:/workspace/src/openmpi-4.1.5# mpirun --allow-run-as-root --mca btl_vader_single_copy_mechanism none -np 2 python /usr/local/lib/python3.6/dist-packages/iva/detectnet_v2/scripts/train.py -e /workspace/tao-experiments/specs/detectnet_v2_train_peoplenet_kitti_multi.txt -r /workspace/results -k tlt_encode 2023-05-29 14:15:20.352957: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:20.353002: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them. WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them. 2023-05-29 14:15:22.677372: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:22.681125: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:22.710204: I tensorflow/core/platform/profile_utils/cpu_utils.cc:109] CPU Frequency: 3593295000 Hz 2023-05-29 14:15:22.710191: I tensorflow/core/platform/profile_utils/cpu_utils.cc:109] CPU Frequency: 3593295000 Hz 2023-05-29 14:15:22.710361: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x219f2b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2023-05-29 14:15:22.710358: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5767e00 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2023-05-29 14:15:22.710379: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2023-05-29 14:15:22.710382: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2023-05-29 14:15:22.712147: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1 2023-05-29 14:15:22.712177: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1 2023-05-29 14:15:22.910092: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.910492: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5637810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2023-05-29 14:15:22.910506: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA RTX 6000 Ada Generation, Compute Capability 8.9 2023-05-29 14:15:22.910689: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.910802: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:21:00.0 2023-05-29 14:15:22.910827: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:22.922845: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:15:22.924725: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:15:22.924954: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:15:22.925420: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:15:22.925943: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:15:22.926051: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:15:22.926142: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.961209: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.961240: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.961405: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 0 2023-05-29 14:15:22.961560: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x4b93180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices: 2023-05-29 14:15:22.961582: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): NVIDIA RTX 6000 Ada Generation, Compute Capability 8.9 2023-05-29 14:15:22.961832: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.962150: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:22:00.0 2023-05-29 14:15:22.962184: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:22.978396: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:15:22.980430: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:15:22.980680: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:15:22.981180: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:15:22.981778: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:15:22.981906: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:15:22.982052: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.982266: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:22.982365: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 1 2023-05-29 14:15:23.252974: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1209] Device interconnect StreamExecutor with strength 1 edge matrix: 2023-05-29 14:15:23.253017: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1215] 0 2023-05-29 14:15:23.253023: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1228] 0: N 2023-05-29 14:15:23.253301: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:23.253497: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:23.253623: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1354] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 46288 MB memory) -> physical GPU (device: 0, name: NVIDIA RTX 6000 Ada Generation, pci bus id: 0000:21:00.0, compute capability: 8.9) /usr/local/lib/python3.6/dist-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.5) or chardet (3.0.4) doesn't match a supported version! RequestsDependencyWarning) Using TensorFlow backend. 2023-05-29 14:15:23,254 [INFO] iva.common.logging.logging: Log file already exists at /workspace/results/status.json 2023-05-29 14:15:23,254 [INFO] root: Starting DetectNet_v2 Training job 2023-05-29 14:15:23,254 [INFO] __main__: Loading experiment spec at /workspace/tao-experiments/specs/detectnet_v2_train_peoplenet_kitti_multi.txt. 2023-05-29 14:15:23,255 [INFO] iva.detectnet_v2.spec_handler.spec_loader: Merging specification from /workspace/tao-experiments/specs/detectnet_v2_train_peoplenet_kitti_multi.txt 2023-05-29 14:15:23.262424: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1209] Device interconnect StreamExecutor with strength 1 edge matrix: 2023-05-29 14:15:23.262445: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1215] 1 2023-05-29 14:15:23.262450: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1228] 1: N 2023-05-29 14:15:23.262686: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:23.262868: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:23.262987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1354] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 46279 MB memory) -> physical GPU (device: 1, name: NVIDIA RTX 6000 Ada Generation, pci bus id: 0000:22:00.0, compute capability: 8.9) /usr/local/lib/python3.6/dist-packages/requests/__init__.py:91: RequestsDependencyWarning: urllib3 (1.26.5) or chardet (3.0.4) doesn't match a supported version! RequestsDependencyWarning) Using TensorFlow backend. 2023-05-29 14:15:23,263 [INFO] iva.common.logging.logging: Log file already exists at /workspace/results/status.json 2023-05-29 14:15:23,263 [INFO] root: Starting DetectNet_v2 Training job 2023-05-29 14:15:23,263 [INFO] __main__: Loading experiment spec at /workspace/tao-experiments/specs/detectnet_v2_train_peoplenet_kitti_multi.txt. 2023-05-29 14:15:23,264 [INFO] iva.detectnet_v2.spec_handler.spec_loader: Merging specification from /workspace/tao-experiments/specs/detectnet_v2_train_peoplenet_kitti_multi.txt 2023-05-29 14:15:23,271 [INFO] root: Training gridbox model. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:153: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. 2023-05-29 14:15:23,272 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:153: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. 2023-05-29 14:15:23,411 [INFO] root: Training gridbox model. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:153: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. 2023-05-29 14:15:23,411 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:153: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead. 2023-05-29 14:15:25,108 [INFO] root: Sampling mode of the dataloader was set to user_defined. 2023-05-29 14:15:25,108 [INFO] __main__: Cannot iterate over exactly 47494 samples with a batch size of 24; each epoch will therefore take one extra step. 2023-05-29 14:15:25,108 [INFO] __main__: Cannot iterate over exactly 989 steps per epoch with 24 processors; each processor will therefore take one extra step per epoch. 2023-05-29 14:15:25,220 [INFO] root: Building DetectNet V2 model WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. 2023-05-29 14:15:25,221 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead. 2023-05-29 14:15:25,221 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead. 2023-05-29 14:15:25,236 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead. 2023-05-29 14:15:25,409 [INFO] root: Sampling mode of the dataloader was set to user_defined. 2023-05-29 14:15:25,409 [INFO] __main__: Cannot iterate over exactly 47494 samples with a batch size of 24; each epoch will therefore take one extra step. 2023-05-29 14:15:25,409 [INFO] __main__: Cannot iterate over exactly 989 steps per epoch with 24 processors; each processor will therefore take one extra step per epoch. 2023-05-29 14:15:25,538 [INFO] root: Building DetectNet V2 model WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. 2023-05-29 14:15:25,538 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead. 2023-05-29 14:15:25,539 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead. 2023-05-29 14:15:25,555 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1834: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead. 2023-05-29 14:15:28,516 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead. 2023-05-29 14:15:28,516 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead. 2023-05-29 14:15:28,516 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead. 2023-05-29 14:15:29,338 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead. 2023-05-29 14:15:29,442 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead. 2023-05-29 14:15:29,443 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead. 2023-05-29 14:15:29,443 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:199: The name tf.is_variable_initialized is deprecated. Please use tf.compat.v1.is_variable_initialized instead. WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead. 2023-05-29 14:15:30,466 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:206: The name tf.variables_initializer is deprecated. Please use tf.compat.v1.variables_initializer instead. /usr/local/lib/python3.6/dist-packages/keras/engine/saving.py:292: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually. warnings.warn('No training configuration found in save file: ' 2023-05-29 14:15:58,506 [INFO] iva.detectnet_v2.objectives.bbox_objective: Default L1 loss function will be used. __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) (None, 3, 544, 960) 0 __________________________________________________________________________________________________ conv1 (Conv2D) (None, 64, 272, 480) 9472 input_1[0][0] __________________________________________________________________________________________________ bn_conv1 (BatchNormalization) (None, 64, 272, 480) 256 conv1[0][0] __________________________________________________________________________________________________ activation_1 (Activation) (None, 64, 272, 480) 0 bn_conv1[0][0] __________________________________________________________________________________________________ block_1a_conv_1 (Conv2D) (None, 64, 136, 240) 36928 activation_1[0][0] __________________________________________________________________________________________________ block_1a_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1a_conv_1[0][0] __________________________________________________________________________________________________ block_1a_relu_1 (Activation) (None, 64, 136, 240) 0 block_1a_bn_1[0][0] __________________________________________________________________________________________________ block_1a_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1a_relu_1[0][0] __________________________________________________________________________________________________ block_1a_conv_shortcut (Conv2D) (None, 64, 136, 240) 4160 activation_1[0][0] __________________________________________________________________________________________________ block_1a_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1a_conv_2[0][0] __________________________________________________________________________________________________ block_1a_bn_shortcut (BatchNorm (None, 64, 136, 240) 256 block_1a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_1 (Add) (None, 64, 136, 240) 0 block_1a_bn_2[0][0] block_1a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_1a_relu (Activation) (None, 64, 136, 240) 0 add_1[0][0] __________________________________________________________________________________________________ block_1b_conv_1 (Conv2D) (None, 64, 136, 240) 36928 block_1a_relu[0][0] __________________________________________________________________________________________________ block_1b_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1b_conv_1[0][0] __________________________________________________________________________________________________ block_1b_relu_1 (Activation) (None, 64, 136, 240) 0 block_1b_bn_1[0][0] __________________________________________________________________________________________________ block_1b_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1b_relu_1[0][0] __________________________________________________________________________________________________ block_1b_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1b_conv_2[0][0] __________________________________________________________________________________________________ add_2 (Add) (None, 64, 136, 240) 0 block_1b_bn_2[0][0] block_1a_relu[0][0] __________________________________________________________________________________________________ block_1b_relu (Activation) (None, 64, 136, 240) 0 add_2[0][0] __________________________________________________________________________________________________ block_1c_conv_1 (Conv2D) (None, 64, 136, 240) 36928 block_1b_relu[0][0] __________________________________________________________________________________________________ block_1c_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1c_conv_1[0][0] __________________________________________________________________________________________________ block_1c_relu_1 (Activation) (None, 64, 136, 240) 0 block_1c_bn_1[0][0] __________________________________________________________________________________________________ block_1c_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1c_relu_1[0][0] __________________________________________________________________________________________________ block_1c_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1c_conv_2[0][0] __________________________________________________________________________________________________ add_3 (Add) (None, 64, 136, 240) 0 block_1c_bn_2[0][0] block_1b_relu[0][0] __________________________________________________________________________________________________ block_1c_relu (Activation) (None, 64, 136, 240) 0 add_3[0][0] __________________________________________________________________________________________________ block_2a_conv_1 (Conv2D) (None, 128, 68, 120) 73856 block_1c_relu[0][0] __________________________________________________________________________________________________ block_2a_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2a_conv_1[0][0] __________________________________________________________________________________________________ block_2a_relu_1 (Activation) (None, 128, 68, 120) 0 block_2a_bn_1[0][0] __________________________________________________________________________________________________ block_2a_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2a_relu_1[0][0] __________________________________________________________________________________________________ block_2a_conv_shortcut (Conv2D) (None, 128, 68, 120) 8320 block_1c_relu[0][0] __________________________________________________________________________________________________ block_2a_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2a_conv_2[0][0] __________________________________________________________________________________________________ block_2a_bn_shortcut (BatchNorm (None, 128, 68, 120) 512 block_2a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_4 (Add) (None, 128, 68, 120) 0 block_2a_bn_2[0][0] block_2a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_2a_relu (Activation) (None, 128, 68, 120) 0 add_4[0][0] __________________________________________________________________________________________________ block_2b_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2a_relu[0][0] __________________________________________________________________________________________________ block_2b_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2b_conv_1[0][0] __________________________________________________________________________________________________ block_2b_relu_1 (Activation) (None, 128, 68, 120) 0 block_2b_bn_1[0][0] __________________________________________________________________________________________________ block_2b_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2b_relu_1[0][0] __________________________________________________________________________________________________ block_2b_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2b_conv_2[0][0] __________________________________________________________________________________________________ add_5 (Add) (None, 128, 68, 120) 0 block_2b_bn_2[0][0] block_2a_relu[0][0] __________________________________________________________________________________________________ block_2b_relu (Activation) (None, 128, 68, 120) 0 add_5[0][0] __________________________________________________________________________________________________ block_2c_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2b_relu[0][0] __________________________________________________________________________________________________ block_2c_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2c_conv_1[0][0] __________________________________________________________________________________________________ block_2c_relu_1 (Activation) (None, 128, 68, 120) 0 block_2c_bn_1[0][0] __________________________________________________________________________________________________ block_2c_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2c_relu_1[0][0] __________________________________________________________________________________________________ block_2c_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2c_conv_2[0][0] __________________________________________________________________________________________________ add_6 (Add) (None, 128, 68, 120) 0 block_2c_bn_2[0][0] block_2b_relu[0][0] __________________________________________________________________________________________________ block_2c_relu (Activation) (None, 128, 68, 120) 0 add_6[0][0] __________________________________________________________________________________________________ block_2d_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2c_relu[0][0] __________________________________________________________________________________________________ block_2d_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2d_conv_1[0][0] __________________________________________________________________________________________________ block_2d_relu_1 (Activation) (None, 128, 68, 120) 0 block_2d_bn_1[0][0] __________________________________________________________________________________________________ block_2d_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2d_relu_1[0][0] __________________________________________________________________________________________________ block_2d_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2d_conv_2[0][0] __________________________________________________________________________________________________ add_7 (Add) (None, 128, 68, 120) 0 block_2d_bn_2[0][0] block_2c_relu[0][0] __________________________________________________________________________________________________ block_2d_relu (Activation) (None, 128, 68, 120) 0 add_7[0][0] __________________________________________________________________________________________________ block_3a_conv_1 (Conv2D) (None, 256, 34, 60) 295168 block_2d_relu[0][0] __________________________________________________________________________________________________ block_3a_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3a_conv_1[0][0] __________________________________________________________________________________________________ block_3a_relu_1 (Activation) (None, 256, 34, 60) 0 block_3a_bn_1[0][0] __________________________________________________________________________________________________ block_3a_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3a_relu_1[0][0] __________________________________________________________________________________________________ block_3a_conv_shortcut (Conv2D) (None, 256, 34, 60) 33024 block_2d_relu[0][0] __________________________________________________________________________________________________ block_3a_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3a_conv_2[0][0] __________________________________________________________________________________________________ block_3a_bn_shortcut (BatchNorm (None, 256, 34, 60) 1024 block_3a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_8 (Add) (None, 256, 34, 60) 0 block_3a_bn_2[0][0] block_3a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_3a_relu (Activation) (None, 256, 34, 60) 0 add_8[0][0] __________________________________________________________________________________________________ block_3b_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3a_relu[0][0] __________________________________________________________________________________________________ block_3b_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3b_conv_1[0][0] __________________________________________________________________________________________________ block_3b_relu_1 (Activation) (None, 256, 34, 60) 0 block_3b_bn_1[0][0] __________________________________________________________________________________________________ block_3b_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3b_relu_1[0][0] __________________________________________________________________________________________________ block_3b_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3b_conv_2[0][0] __________________________________________________________________________________________________ add_9 (Add) (None, 256, 34, 60) 0 block_3b_bn_2[0][0] block_3a_relu[0][0] __________________________________________________________________________________________________ block_3b_relu (Activation) (None, 256, 34, 60) 0 add_9[0][0] __________________________________________________________________________________________________ block_3c_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3b_relu[0][0] __________________________________________________________________________________________________ block_3c_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3c_conv_1[0][0] __________________________________________________________________________________________________ block_3c_relu_1 (Activation) (None, 256, 34, 60) 0 block_3c_bn_1[0][0] __________________________________________________________________________________________________ block_3c_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3c_relu_1[0][0] __________________________________________________________________________________________________ block_3c_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3c_conv_2[0][0] __________________________________________________________________________________________________ add_10 (Add) (None, 256, 34, 60) 0 block_3c_bn_2[0][0] block_3b_relu[0][0] __________________________________________________________________________________________________ block_3c_relu (Activation) (None, 256, 34, 60) 0 add_10[0][0] __________________________________________________________________________________________________ block_3d_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3c_relu[0][0] __________________________________________________________________________________________________ block_3d_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3d_conv_1[0][0] __________________________________________________________________________________________________ block_3d_relu_1 (Activation) (None, 256, 34, 60) 0 block_3d_bn_1[0][0] __________________________________________________________________________________________________ block_3d_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3d_relu_1[0][0] __________________________________________________________________________________________________ block_3d_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3d_conv_2[0][0] __________________________________________________________________________________________________ add_11 (Add) (None, 256, 34, 60) 0 block_3d_bn_2[0][0] block_3c_relu[0][0] __________________________________________________________________________________________________ block_3d_relu (Activation) (None, 256, 34, 60) 0 add_11[0][0] __________________________________________________________________________________________________ block_3e_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3d_relu[0][0] __________________________________________________________________________________________________ block_3e_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3e_conv_1[0][0] __________________________________________________________________________________________________ block_3e_relu_1 (Activation) (None, 256, 34, 60) 0 block_3e_bn_1[0][0] __________________________________________________________________________________________________ block_3e_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3e_relu_1[0][0] __________________________________________________________________________________________________ block_3e_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3e_conv_2[0][0] __________________________________________________________________________________________________ add_12 (Add) (None, 256, 34, 60) 0 block_3e_bn_2[0][0] block_3d_relu[0][0] __________________________________________________________________________________________________ block_3e_relu (Activation) (None, 256, 34, 60) 0 add_12[0][0] __________________________________________________________________________________________________ block_3f_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3e_relu[0][0] __________________________________________________________________________________________________ block_3f_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3f_conv_1[0][0] __________________________________________________________________________________________________ block_3f_relu_1 (Activation) (None, 256, 34, 60) 0 block_3f_bn_1[0][0] __________________________________________________________________________________________________ block_3f_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3f_relu_1[0][0] __________________________________________________________________________________________________ block_3f_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3f_conv_2[0][0] __________________________________________________________________________________________________ add_13 (Add) (None, 256, 34, 60) 0 block_3f_bn_2[0][0] block_3e_relu[0][0] __________________________________________________________________________________________________ block_3f_relu (Activation) (None, 256, 34, 60) 0 add_13[0][0] __________________________________________________________________________________________________ block_4a_conv_1 (Conv2D) (None, 512, 34, 60) 1180160 block_3f_relu[0][0] __________________________________________________________________________________________________ block_4a_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4a_conv_1[0][0] __________________________________________________________________________________________________ block_4a_relu_1 (Activation) (None, 512, 34, 60) 0 block_4a_bn_1[0][0] __________________________________________________________________________________________________ block_4a_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4a_relu_1[0][0] __________________________________________________________________________________________________ block_4a_conv_shortcut (Conv2D) (None, 512, 34, 60) 131584 block_3f_relu[0][0] __________________________________________________________________________________________________ block_4a_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4a_conv_2[0][0] __________________________________________________________________________________________________ block_4a_bn_shortcut (BatchNorm (None, 512, 34, 60) 2048 block_4a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_14 (Add) (None, 512, 34, 60) 0 block_4a_bn_2[0][0] block_4a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_4a_relu (Activation) (None, 512, 34, 60) 0 add_14[0][0] __________________________________________________________________________________________________ block_4b_conv_1 (Conv2D) (None, 512, 34, 60) 2359808 block_4a_relu[0][0] __________________________________________________________________________________________________ block_4b_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4b_conv_1[0][0] __________________________________________________________________________________________________ block_4b_relu_1 (Activation) (None, 512, 34, 60) 0 block_4b_bn_1[0][0] __________________________________________________________________________________________________ block_4b_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4b_relu_1[0][0] __________________________________________________________________________________________________ block_4b_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4b_conv_2[0][0] __________________________________________________________________________________________________ add_15 (Add) (None, 512, 34, 60) 0 block_4b_bn_2[0][0] block_4a_relu[0][0] __________________________________________________________________________________________________ block_4b_relu (Activation) (None, 512, 34, 60) 0 add_15[0][0] __________________________________________________________________________________________________ block_4c_conv_1 (Conv2D) (None, 512, 34, 60) 2359808 block_4b_relu[0][0] __________________________________________________________________________________________________ block_4c_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4c_conv_1[0][0] __________________________________________________________________________________________________ block_4c_relu_1 (Activation) (None, 512, 34, 60) 0 block_4c_bn_1[0][0] __________________________________________________________________________________________________ block_4c_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4c_relu_1[0][0] __________________________________________________________________________________________________ block_4c_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4c_conv_2[0][0] __________________________________________________________________________________________________ add_16 (Add) (None, 512, 34, 60) 0 block_4c_bn_2[0][0] block_4b_relu[0][0] __________________________________________________________________________________________________ block_4c_relu (Activation) (None, 512, 34, 60) 0 add_16[0][0] __________________________________________________________________________________________________ output_bbox (Conv2D) (None, 28, 34, 60) 14364 block_4c_relu[0][0] __________________________________________________________________________________________________ output_cov (Conv2D) (None, 7, 34, 60) 3591 block_4c_relu[0][0] ================================================================================================== Total params: 21,332,579 Trainable params: 21,080,227 Non-trainable params: 252,352 __________________________________________________________________________________________________ 2023-05-29 14:15:58,538 [INFO] root: DetectNet V2 model built. 2023-05-29 14:15:58,538 [INFO] root: Building rasterizer. 2023-05-29 14:15:58,539 [INFO] root: Rasterizers built. 2023-05-29 14:15:58,553 [INFO] root: Building training graph. 2023-05-29 14:15:58,554 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Serial augmentation enabled = False 2023-05-29 14:15:58,554 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Pseudo sharding enabled = False 2023-05-29 14:15:58,554 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Max Image Dimensions (all sources): (0, 0) 2023-05-29 14:15:58,554 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: number of cpus: 64, io threads: 64, compute threads: 32, buffered batches: 4 2023-05-29 14:15:58,554 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: total dataset size 47494, number of sources: 1, batch size per gpu: 24, steps: 990 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead. 2023-05-29 14:15:58,589 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead. WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:15:58,624 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:15:58,638 [INFO] iva.detectnet_v2.dataloader.default_dataloader: Bounding box coordinates were detected in the input specification! Bboxes will be automatically converted to polygon coordinates. 2023-05-29 14:15:58.665868: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.666121: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:21:00.0 2023-05-29 14:15:58.666238: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.666360: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 1 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:22:00.0 2023-05-29 14:15:58.666379: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:15:58.666425: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:15:58.666442: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:15:58.666455: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:15:58.666467: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:15:58.666479: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:15:58.666491: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:15:58.666556: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.666735: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.666895: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.667049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:15:58.667158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 0, 1 2023-05-29 14:15:58,822 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: shuffle: True - shard 1 of 2 2023-05-29 14:15:58,826 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: sampling 1 datasets with weights: 2023-05-29 14:15:58,827 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: source: 0 weight: 1.000000 WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:15:58,838 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:15:59,102 [INFO] __main__: Found 47494 samples in training set 2023-05-29 14:15:59,102 [INFO] root: Rasterizing tensors. 2023-05-29 14:15:59,282 [INFO] root: Tensors rasterized. 2023-05-29 14:16:01,837 [INFO] root: Training graph built. 2023-05-29 14:16:01,837 [INFO] root: Running training loop. 2023-05-29 14:16:01,838 [INFO] __main__: Checkpoint interval: 10 2023-05-29 14:16:01,838 [INFO] __main__: Scalars logged at every 99 steps 2023-05-29 14:16:01,838 [INFO] __main__: Images logged at every 0 steps INFO:tensorflow:Graph was finalized. 2023-05-29 14:16:06,232 [INFO] tensorflow: Graph was finalized. 2023-05-29 14:16:06.233710: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:06.233933: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:22:00.0 2023-05-29 14:16:06.233965: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:16:06.234045: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:16:06.234069: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:16:06.234089: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:16:06.234107: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:16:06.234125: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:16:06.234143: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:16:06.234252: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:06.234467: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:06.234598: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 1 2023-05-29 14:16:06.720471: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1209] Device interconnect StreamExecutor with strength 1 edge matrix: 2023-05-29 14:16:06.720519: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1215] 1 2023-05-29 14:16:06.720526: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1228] 1: N 2023-05-29 14:16:06.720811: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:06.721055: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:06.721207: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1354] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 46279 MB memory) -> physical GPU (device: 1, name: NVIDIA RTX 6000 Ada Generation, pci bus id: 0000:22:00.0, compute capability: 8.9) INFO:tensorflow:Restoring parameters from /tmp/tmpyghkjph6/model.ckpt-0 2023-05-29 14:16:06,721 [INFO] tensorflow: Restoring parameters from /tmp/tmpyghkjph6/model.ckpt-0 /usr/local/lib/python3.6/dist-packages/keras/engine/saving.py:292: UserWarning: No training configuration found in save file: the model was *not* compiled. Compile it manually. warnings.warn('No training configuration found in save file: ' 2023-05-29 14:16:07,599 [INFO] iva.detectnet_v2.objectives.bbox_objective: Default L1 loss function will be used. __________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_1 (InputLayer) (None, 3, 544, 960) 0 __________________________________________________________________________________________________ conv1 (Conv2D) (None, 64, 272, 480) 9472 input_1[0][0] __________________________________________________________________________________________________ bn_conv1 (BatchNormalization) (None, 64, 272, 480) 256 conv1[0][0] __________________________________________________________________________________________________ activation_1 (Activation) (None, 64, 272, 480) 0 bn_conv1[0][0] __________________________________________________________________________________________________ block_1a_conv_1 (Conv2D) (None, 64, 136, 240) 36928 activation_1[0][0] __________________________________________________________________________________________________ block_1a_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1a_conv_1[0][0] __________________________________________________________________________________________________ block_1a_relu_1 (Activation) (None, 64, 136, 240) 0 block_1a_bn_1[0][0] __________________________________________________________________________________________________ block_1a_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1a_relu_1[0][0] __________________________________________________________________________________________________ block_1a_conv_shortcut (Conv2D) (None, 64, 136, 240) 4160 activation_1[0][0] __________________________________________________________________________________________________ block_1a_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1a_conv_2[0][0] __________________________________________________________________________________________________ block_1a_bn_shortcut (BatchNorm (None, 64, 136, 240) 256 block_1a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_1 (Add) (None, 64, 136, 240) 0 block_1a_bn_2[0][0] block_1a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_1a_relu (Activation) (None, 64, 136, 240) 0 add_1[0][0] __________________________________________________________________________________________________ block_1b_conv_1 (Conv2D) (None, 64, 136, 240) 36928 block_1a_relu[0][0] __________________________________________________________________________________________________ block_1b_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1b_conv_1[0][0] __________________________________________________________________________________________________ block_1b_relu_1 (Activation) (None, 64, 136, 240) 0 block_1b_bn_1[0][0] __________________________________________________________________________________________________ block_1b_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1b_relu_1[0][0] __________________________________________________________________________________________________ block_1b_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1b_conv_2[0][0] __________________________________________________________________________________________________ add_2 (Add) (None, 64, 136, 240) 0 block_1b_bn_2[0][0] block_1a_relu[0][0] __________________________________________________________________________________________________ block_1b_relu (Activation) (None, 64, 136, 240) 0 add_2[0][0] __________________________________________________________________________________________________ block_1c_conv_1 (Conv2D) (None, 64, 136, 240) 36928 block_1b_relu[0][0] __________________________________________________________________________________________________ block_1c_bn_1 (BatchNormalizati (None, 64, 136, 240) 256 block_1c_conv_1[0][0] __________________________________________________________________________________________________ block_1c_relu_1 (Activation) (None, 64, 136, 240) 0 block_1c_bn_1[0][0] __________________________________________________________________________________________________ block_1c_conv_2 (Conv2D) (None, 64, 136, 240) 36928 block_1c_relu_1[0][0] __________________________________________________________________________________________________ block_1c_bn_2 (BatchNormalizati (None, 64, 136, 240) 256 block_1c_conv_2[0][0] __________________________________________________________________________________________________ add_3 (Add) (None, 64, 136, 240) 0 block_1c_bn_2[0][0] block_1b_relu[0][0] __________________________________________________________________________________________________ block_1c_relu (Activation) (None, 64, 136, 240) 0 add_3[0][0] __________________________________________________________________________________________________ block_2a_conv_1 (Conv2D) (None, 128, 68, 120) 73856 block_1c_relu[0][0] __________________________________________________________________________________________________ block_2a_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2a_conv_1[0][0] __________________________________________________________________________________________________ block_2a_relu_1 (Activation) (None, 128, 68, 120) 0 block_2a_bn_1[0][0] __________________________________________________________________________________________________ block_2a_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2a_relu_1[0][0] __________________________________________________________________________________________________ block_2a_conv_shortcut (Conv2D) (None, 128, 68, 120) 8320 block_1c_relu[0][0] __________________________________________________________________________________________________ block_2a_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2a_conv_2[0][0] __________________________________________________________________________________________________ block_2a_bn_shortcut (BatchNorm (None, 128, 68, 120) 512 block_2a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_4 (Add) (None, 128, 68, 120) 0 block_2a_bn_2[0][0] block_2a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_2a_relu (Activation) (None, 128, 68, 120) 0 add_4[0][0] __________________________________________________________________________________________________ block_2b_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2a_relu[0][0] __________________________________________________________________________________________________ block_2b_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2b_conv_1[0][0] __________________________________________________________________________________________________ block_2b_relu_1 (Activation) (None, 128, 68, 120) 0 block_2b_bn_1[0][0] __________________________________________________________________________________________________ block_2b_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2b_relu_1[0][0] __________________________________________________________________________________________________ block_2b_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2b_conv_2[0][0] __________________________________________________________________________________________________ add_5 (Add) (None, 128, 68, 120) 0 block_2b_bn_2[0][0] block_2a_relu[0][0] __________________________________________________________________________________________________ block_2b_relu (Activation) (None, 128, 68, 120) 0 add_5[0][0] __________________________________________________________________________________________________ block_2c_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2b_relu[0][0] __________________________________________________________________________________________________ block_2c_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2c_conv_1[0][0] __________________________________________________________________________________________________ block_2c_relu_1 (Activation) (None, 128, 68, 120) 0 block_2c_bn_1[0][0] __________________________________________________________________________________________________ block_2c_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2c_relu_1[0][0] __________________________________________________________________________________________________ block_2c_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2c_conv_2[0][0] __________________________________________________________________________________________________ add_6 (Add) (None, 128, 68, 120) 0 block_2c_bn_2[0][0] block_2b_relu[0][0] __________________________________________________________________________________________________ block_2c_relu (Activation) (None, 128, 68, 120) 0 add_6[0][0] __________________________________________________________________________________________________ block_2d_conv_1 (Conv2D) (None, 128, 68, 120) 147584 block_2c_relu[0][0] __________________________________________________________________________________________________ block_2d_bn_1 (BatchNormalizati (None, 128, 68, 120) 512 block_2d_conv_1[0][0] __________________________________________________________________________________________________ block_2d_relu_1 (Activation) (None, 128, 68, 120) 0 block_2d_bn_1[0][0] __________________________________________________________________________________________________ block_2d_conv_2 (Conv2D) (None, 128, 68, 120) 147584 block_2d_relu_1[0][0] __________________________________________________________________________________________________ block_2d_bn_2 (BatchNormalizati (None, 128, 68, 120) 512 block_2d_conv_2[0][0] __________________________________________________________________________________________________ add_7 (Add) (None, 128, 68, 120) 0 block_2d_bn_2[0][0] block_2c_relu[0][0] __________________________________________________________________________________________________ block_2d_relu (Activation) (None, 128, 68, 120) 0 add_7[0][0] __________________________________________________________________________________________________ block_3a_conv_1 (Conv2D) (None, 256, 34, 60) 295168 block_2d_relu[0][0] __________________________________________________________________________________________________ block_3a_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3a_conv_1[0][0] __________________________________________________________________________________________________ block_3a_relu_1 (Activation) (None, 256, 34, 60) 0 block_3a_bn_1[0][0] __________________________________________________________________________________________________ block_3a_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3a_relu_1[0][0] __________________________________________________________________________________________________ block_3a_conv_shortcut (Conv2D) (None, 256, 34, 60) 33024 block_2d_relu[0][0] __________________________________________________________________________________________________ block_3a_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3a_conv_2[0][0] __________________________________________________________________________________________________ block_3a_bn_shortcut (BatchNorm (None, 256, 34, 60) 1024 block_3a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_8 (Add) (None, 256, 34, 60) 0 block_3a_bn_2[0][0] block_3a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_3a_relu (Activation) (None, 256, 34, 60) 0 add_8[0][0] __________________________________________________________________________________________________ block_3b_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3a_relu[0][0] __________________________________________________________________________________________________ block_3b_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3b_conv_1[0][0] __________________________________________________________________________________________________ block_3b_relu_1 (Activation) (None, 256, 34, 60) 0 block_3b_bn_1[0][0] __________________________________________________________________________________________________ block_3b_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3b_relu_1[0][0] __________________________________________________________________________________________________ block_3b_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3b_conv_2[0][0] __________________________________________________________________________________________________ add_9 (Add) (None, 256, 34, 60) 0 block_3b_bn_2[0][0] block_3a_relu[0][0] __________________________________________________________________________________________________ block_3b_relu (Activation) (None, 256, 34, 60) 0 add_9[0][0] __________________________________________________________________________________________________ block_3c_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3b_relu[0][0] __________________________________________________________________________________________________ block_3c_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3c_conv_1[0][0] __________________________________________________________________________________________________ block_3c_relu_1 (Activation) (None, 256, 34, 60) 0 block_3c_bn_1[0][0] __________________________________________________________________________________________________ block_3c_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3c_relu_1[0][0] __________________________________________________________________________________________________ block_3c_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3c_conv_2[0][0] __________________________________________________________________________________________________ add_10 (Add) (None, 256, 34, 60) 0 block_3c_bn_2[0][0] block_3b_relu[0][0] __________________________________________________________________________________________________ block_3c_relu (Activation) (None, 256, 34, 60) 0 add_10[0][0] __________________________________________________________________________________________________ block_3d_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3c_relu[0][0] __________________________________________________________________________________________________ block_3d_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3d_conv_1[0][0] __________________________________________________________________________________________________ block_3d_relu_1 (Activation) (None, 256, 34, 60) 0 block_3d_bn_1[0][0] __________________________________________________________________________________________________ block_3d_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3d_relu_1[0][0] __________________________________________________________________________________________________ block_3d_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3d_conv_2[0][0] __________________________________________________________________________________________________ add_11 (Add) (None, 256, 34, 60) 0 block_3d_bn_2[0][0] block_3c_relu[0][0] __________________________________________________________________________________________________ block_3d_relu (Activation) (None, 256, 34, 60) 0 add_11[0][0] __________________________________________________________________________________________________ block_3e_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3d_relu[0][0] __________________________________________________________________________________________________ block_3e_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3e_conv_1[0][0] __________________________________________________________________________________________________ block_3e_relu_1 (Activation) (None, 256, 34, 60) 0 block_3e_bn_1[0][0] __________________________________________________________________________________________________ block_3e_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3e_relu_1[0][0] __________________________________________________________________________________________________ block_3e_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3e_conv_2[0][0] __________________________________________________________________________________________________ add_12 (Add) (None, 256, 34, 60) 0 block_3e_bn_2[0][0] block_3d_relu[0][0] __________________________________________________________________________________________________ block_3e_relu (Activation) (None, 256, 34, 60) 0 add_12[0][0] __________________________________________________________________________________________________ block_3f_conv_1 (Conv2D) (None, 256, 34, 60) 590080 block_3e_relu[0][0] __________________________________________________________________________________________________ block_3f_bn_1 (BatchNormalizati (None, 256, 34, 60) 1024 block_3f_conv_1[0][0] __________________________________________________________________________________________________ block_3f_relu_1 (Activation) (None, 256, 34, 60) 0 block_3f_bn_1[0][0] __________________________________________________________________________________________________ block_3f_conv_2 (Conv2D) (None, 256, 34, 60) 590080 block_3f_relu_1[0][0] __________________________________________________________________________________________________ block_3f_bn_2 (BatchNormalizati (None, 256, 34, 60) 1024 block_3f_conv_2[0][0] __________________________________________________________________________________________________ add_13 (Add) (None, 256, 34, 60) 0 block_3f_bn_2[0][0] block_3e_relu[0][0] __________________________________________________________________________________________________ block_3f_relu (Activation) (None, 256, 34, 60) 0 add_13[0][0] __________________________________________________________________________________________________ block_4a_conv_1 (Conv2D) (None, 512, 34, 60) 1180160 block_3f_relu[0][0] __________________________________________________________________________________________________ block_4a_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4a_conv_1[0][0] __________________________________________________________________________________________________ block_4a_relu_1 (Activation) (None, 512, 34, 60) 0 block_4a_bn_1[0][0] __________________________________________________________________________________________________ block_4a_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4a_relu_1[0][0] __________________________________________________________________________________________________ block_4a_conv_shortcut (Conv2D) (None, 512, 34, 60) 131584 block_3f_relu[0][0] __________________________________________________________________________________________________ block_4a_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4a_conv_2[0][0] __________________________________________________________________________________________________ block_4a_bn_shortcut (BatchNorm (None, 512, 34, 60) 2048 block_4a_conv_shortcut[0][0] __________________________________________________________________________________________________ add_14 (Add) (None, 512, 34, 60) 0 block_4a_bn_2[0][0] block_4a_bn_shortcut[0][0] __________________________________________________________________________________________________ block_4a_relu (Activation) (None, 512, 34, 60) 0 add_14[0][0] __________________________________________________________________________________________________ block_4b_conv_1 (Conv2D) (None, 512, 34, 60) 2359808 block_4a_relu[0][0] __________________________________________________________________________________________________ block_4b_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4b_conv_1[0][0] __________________________________________________________________________________________________ block_4b_relu_1 (Activation) (None, 512, 34, 60) 0 block_4b_bn_1[0][0] __________________________________________________________________________________________________ block_4b_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4b_relu_1[0][0] __________________________________________________________________________________________________ block_4b_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4b_conv_2[0][0] __________________________________________________________________________________________________ add_15 (Add) (None, 512, 34, 60) 0 block_4b_bn_2[0][0] block_4a_relu[0][0] __________________________________________________________________________________________________ block_4b_relu (Activation) (None, 512, 34, 60) 0 add_15[0][0] __________________________________________________________________________________________________ block_4c_conv_1 (Conv2D) (None, 512, 34, 60) 2359808 block_4b_relu[0][0] __________________________________________________________________________________________________ block_4c_bn_1 (BatchNormalizati (None, 512, 34, 60) 2048 block_4c_conv_1[0][0] __________________________________________________________________________________________________ block_4c_relu_1 (Activation) (None, 512, 34, 60) 0 block_4c_bn_1[0][0] __________________________________________________________________________________________________ block_4c_conv_2 (Conv2D) (None, 512, 34, 60) 2359808 block_4c_relu_1[0][0] __________________________________________________________________________________________________ block_4c_bn_2 (BatchNormalizati (None, 512, 34, 60) 2048 block_4c_conv_2[0][0] __________________________________________________________________________________________________ add_16 (Add) (None, 512, 34, 60) 0 block_4c_bn_2[0][0] block_4b_relu[0][0] __________________________________________________________________________________________________ block_4c_relu (Activation) (None, 512, 34, 60) 0 add_16[0][0] __________________________________________________________________________________________________ output_bbox (Conv2D) (None, 28, 34, 60) 14364 block_4c_relu[0][0] __________________________________________________________________________________________________ output_cov (Conv2D) (None, 7, 34, 60) 3591 block_4c_relu[0][0] ================================================================================================== Total params: 21,332,579 Trainable params: 21,080,227 Non-trainable params: 252,352 __________________________________________________________________________________________________ 2023-05-29 14:16:07,631 [INFO] root: DetectNet V2 model built. 2023-05-29 14:16:07,631 [INFO] root: Building rasterizer. 2023-05-29 14:16:07,632 [INFO] root: Rasterizers built. 2023-05-29 14:16:07,645 [INFO] root: Building training graph. 2023-05-29 14:16:07,647 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Serial augmentation enabled = False 2023-05-29 14:16:07,647 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Pseudo sharding enabled = False 2023-05-29 14:16:07,647 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Max Image Dimensions (all sources): (0, 0) 2023-05-29 14:16:07,647 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: number of cpus: 64, io threads: 64, compute threads: 32, buffered batches: 4 2023-05-29 14:16:07,647 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: total dataset size 47494, number of sources: 1, batch size per gpu: 24, steps: 990 WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead. 2023-05-29 14:16:07,683 [WARNING] tensorflow: From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead. WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:07,717 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:07,731 [INFO] iva.detectnet_v2.dataloader.default_dataloader: Bounding box coordinates were detected in the input specification! Bboxes will be automatically converted to polygon coordinates. 2023-05-29 14:16:07.759049: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.759227: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:21:00.0 2023-05-29 14:16:07.759319: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.759447: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 1 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:22:00.0 2023-05-29 14:16:07.759462: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:16:07.759507: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:16:07.759524: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:16:07.759540: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:16:07.759554: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:16:07.759567: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:16:07.759581: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:16:07.759641: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.759804: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.759976: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.760137: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:07.760251: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 0, 1 2023-05-29 14:16:07,912 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: shuffle: True - shard 0 of 2 2023-05-29 14:16:07,917 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: sampling 1 datasets with weights: 2023-05-29 14:16:07,917 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: source: 0 weight: 1.000000 WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:07,928 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:08,191 [INFO] __main__: Found 47494 samples in training set 2023-05-29 14:16:08,191 [INFO] root: Rasterizing tensors. 2023-05-29 14:16:08,370 [INFO] root: Tensors rasterized. INFO:tensorflow:Running local_init_op. 2023-05-29 14:16:09,719 [INFO] tensorflow: Running local_init_op. INFO:tensorflow:Done running local_init_op. 2023-05-29 14:16:10,168 [INFO] tensorflow: Done running local_init_op. 2023-05-29 14:16:11,115 [INFO] root: Training graph built. 2023-05-29 14:16:11,115 [INFO] root: Building validation graph. 2023-05-29 14:16:11,116 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Serial augmentation enabled = False 2023-05-29 14:16:11,116 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Pseudo sharding enabled = False 2023-05-29 14:16:11,116 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: Max Image Dimensions (all sources): (0, 0) 2023-05-29 14:16:11,116 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: number of cpus: 64, io threads: 128, compute threads: 64, buffered batches: 4 2023-05-29 14:16:11,116 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: total dataset size 11873, number of sources: 1, batch size per gpu: 24, steps: 495 WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:11,127 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:11,142 [INFO] iva.detectnet_v2.dataloader.default_dataloader: Bounding box coordinates were detected in the input specification! Bboxes will be automatically converted to polygon coordinates. 2023-05-29 14:16:11,340 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: shuffle: False - shard 0 of 1 2023-05-29 14:16:11,344 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: sampling 1 datasets with weights: 2023-05-29 14:16:11,344 [INFO] modulus.blocks.data_loaders.multi_source_loader.data_loader: source: 0 weight: 1.000000 WARNING:tensorflow:Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:11,358 [WARNING] tensorflow: Entity > could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of >. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code 2023-05-29 14:16:11,550 [INFO] __main__: Found 11873 samples in validation set 2023-05-29 14:16:11,551 [INFO] root: Rasterizing tensors. 2023-05-29 14:16:11,761 [INFO] root: Tensors rasterized. 2023-05-29 14:16:12,251 [INFO] root: Validation graph built. 2023-05-29 14:16:13,927 [INFO] root: Running training loop. 2023-05-29 14:16:13,928 [INFO] __main__: Checkpoint interval: 10 2023-05-29 14:16:13,928 [INFO] __main__: Scalars logged at every 99 steps 2023-05-29 14:16:13,928 [INFO] __main__: Images logged at every 0 steps INFO:tensorflow:Create CheckpointSaverHook. 2023-05-29 14:16:13,930 [INFO] tensorflow: Create CheckpointSaverHook. INFO:tensorflow:Graph was finalized. 2023-05-29 14:16:19,408 [INFO] tensorflow: Graph was finalized. 2023-05-29 14:16:19.408702: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:19.408872: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1669] Found device 0 with properties: name: NVIDIA RTX 6000 Ada Generation major: 8 minor: 9 memoryClockRate(GHz): 2.505 pciBusID: 0000:21:00.0 2023-05-29 14:16:19.408903: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0 2023-05-29 14:16:19.408958: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:16:19.408977: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10 2023-05-29 14:16:19.408991: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10 2023-05-29 14:16:19.409004: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:16:19.409017: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11 2023-05-29 14:16:19.409031: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 2023-05-29 14:16:19.409106: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:19.409285: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:19.409411: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1797] Adding visible gpu devices: 0 2023-05-29 14:16:19.777004: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1209] Device interconnect StreamExecutor with strength 1 edge matrix: 2023-05-29 14:16:19.777025: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1215] 0 2023-05-29 14:16:19.777030: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1228] 0: N 2023-05-29 14:16:19.777184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:19.777381: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1082] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2023-05-29 14:16:19.777507: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1354] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 46288 MB memory) -> physical GPU (device: 0, name: NVIDIA RTX 6000 Ada Generation, pci bus id: 0000:21:00.0, compute capability: 8.9) INFO:tensorflow:Restoring parameters from /tmp/tmpq_oy6hoc/model.ckpt-0 2023-05-29 14:16:19,777 [INFO] tensorflow: Restoring parameters from /tmp/tmpq_oy6hoc/model.ckpt-0 INFO:tensorflow:Running local_init_op. 2023-05-29 14:16:22,168 [INFO] tensorflow: Running local_init_op. INFO:tensorflow:Done running local_init_op. 2023-05-29 14:16:22,790 [INFO] tensorflow: Done running local_init_op. 2023-05-29 14:16:27.804669: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:16:28.467824: I tensorflow/core/kernels/cuda_solvers.cc:159] Creating CudaSolver handles for stream 0x6de3080 2023-05-29 14:16:28.467993: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:16:28.527132: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:16:28.572715: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 INFO:tensorflow:Saving checkpoints for step-0. 2023-05-29 14:16:34,036 [INFO] tensorflow: Saving checkpoints for step-0. 2023-05-29 14:17:10.122706: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:17:10.879587: I tensorflow/core/kernels/cuda_solvers.cc:159] Creating CudaSolver handles for stream 0x78840c0 2023-05-29 14:17:10.879750: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.11 2023-05-29 14:17:10.940628: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11 2023-05-29 14:17:10.950064: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8 cc8b63e0b034:189857:189876 [0] NCCL INFO Bootstrap : Using eth0:172.17.0.2<0> cc8b63e0b034:189857:189876 [0] NCCL INFO NET/Plugin: Failed to find ncclNetPlugin_v6 symbol. cc8b63e0b034:189857:189876 [0] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin (v5) cc8b63e0b034:189857:189876 [0] NCCL INFO NET/Plugin: Failed to find ncclCollNetPlugin_v6 symbol. cc8b63e0b034:189857:189876 [0] NCCL INFO NET/Plugin: Loaded coll plugin SHARP (v5) cc8b63e0b034:189857:189876 [0] NCCL INFO cudaDriverVersion 12000 NCCL version 2.15.5+cuda11.8 cc8b63e0b034:189857:189876 [0] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so cc8b63e0b034:189857:189876 [0] NCCL INFO P2P plugin IBext cc8b63e0b034:189857:189876 [0] NCCL INFO NET/IB : No device found. cc8b63e0b034:189857:189876 [0] NCCL INFO NET/IB : No device found. cc8b63e0b034:189857:189876 [0] NCCL INFO NET/Socket : Using [0]eth0:172.17.0.2<0> cc8b63e0b034:189857:189876 [0] NCCL INFO Using network Socket cc8b63e0b034:189857:189876 [0] NCCL INFO NCCL_P2P_LEVEL set by environment to NVL cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 00/04 : 0 1 cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 01/04 : 0 1 cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 02/04 : 0 1 cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 03/04 : 0 1 cc8b63e0b034:189857:189876 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] -1/-1/-1->0->1 [2] 1/-1/-1->0->-1 [3] -1/-1/-1->0->1 cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 00 : 0[21000] -> 1[22000] via SHM/direct/direct cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 01 : 0[21000] -> 1[22000] via SHM/direct/direct cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 02 : 0[21000] -> 1[22000] via SHM/direct/direct cc8b63e0b034:189857:189876 [0] NCCL INFO Channel 03 : 0[21000] -> 1[22000] via SHM/direct/direct cc8b63e0b034:189857:189876 [0] NCCL INFO Connected all rings cc8b63e0b034:189857:189876 [0] NCCL INFO Connected all trees cc8b63e0b034:189857:189876 [0] NCCL INFO threadThresholds 8/8/64 | 16/8/64 | 512 | 512 cc8b63e0b034:189857:189876 [0] NCCL INFO 4 coll channels, 4 p2p channels, 2 p2p channels per peer cc8b63e0b034:189857:189876 [0] NCCL INFO comm 0x7ff4cba24870 rank 0 nranks 2 cudaDev 0 busId 21000 - Init COMPLETE INFO:tensorflow:epoch = 0.0, learning_rate = 4.9999994e-06, loss = 0.11156941, step = 0 INFO:tensorflow:epoch = 0.0, learning_rate = 4.9999994e-06, loss = 0.11046392, step = 0 2023-05-29 14:17:17,805 [INFO] tensorflow: epoch = 0.0, learning_rate = 4.9999994e-06, loss = 0.11156941, step = 0 2023-05-29 14:17:17,805 [INFO] tensorflow: epoch = 0.0, learning_rate = 4.9999994e-06, loss = 0.11046392, step = 0 2023-05-29 14:17:17,806 [INFO] root: None 2023-05-29 14:17:17,807 [INFO] iva.detectnet_v2.tfhooks.task_progress_monitor_hook: Epoch 0/100: loss: 0.11157 learning rate: 0.00000 Time taken: 0:00:00 ETA: 0:00:00 2023-05-29 14:17:17,807 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 4.794 INFO:tensorflow:epoch = 0.006060606060606061, learning_rate = 5.0139756e-06, loss = 0.11070599, step = 6 (5.871 sec) 2023-05-29 14:17:23,676 [INFO] tensorflow: epoch = 0.006060606060606061, learning_rate = 5.0139756e-06, loss = 0.11070599, step = 6 (5.871 sec) INFO:tensorflow:epoch = 0.006060606060606061, learning_rate = 5.0139756e-06, loss = 0.11053446, step = 6 (5.913 sec) 2023-05-29 14:17:23,718 [INFO] tensorflow: epoch = 0.006060606060606061, learning_rate = 5.0139756e-06, loss = 0.11053446, step = 6 (5.913 sec) INFO:tensorflow:epoch = 0.01818181818181818, learning_rate = 5.0420417e-06, loss = 0.10803894, step = 18 (5.967 sec) 2023-05-29 14:17:29,684 [INFO] tensorflow: epoch = 0.01818181818181818, learning_rate = 5.0420417e-06, loss = 0.10803894, step = 18 (5.967 sec) INFO:tensorflow:epoch = 0.01818181818181818, learning_rate = 5.0420417e-06, loss = 0.10884521, step = 18 (6.024 sec) 2023-05-29 14:17:29,700 [INFO] tensorflow: epoch = 0.01818181818181818, learning_rate = 5.0420417e-06, loss = 0.10884521, step = 18 (6.024 sec) 2023-05-29 14:17:32,675 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 48.230 INFO:tensorflow:epoch = 0.029292929292929294, learning_rate = 5.0679046e-06, loss = 0.10588858, step = 29 (5.473 sec) 2023-05-29 14:17:35,173 [INFO] tensorflow: epoch = 0.029292929292929294, learning_rate = 5.0679046e-06, loss = 0.10588858, step = 29 (5.473 sec) INFO:tensorflow:epoch = 0.029292929292929294, learning_rate = 5.0679046e-06, loss = 0.10638462, step = 29 (5.499 sec) 2023-05-29 14:17:35,183 [INFO] tensorflow: epoch = 0.029292929292929294, learning_rate = 5.0679046e-06, loss = 0.10638462, step = 29 (5.499 sec) INFO:tensorflow:epoch = 0.04141414141414142, learning_rate = 5.096272e-06, loss = 0.10510203, step = 41 (6.088 sec) 2023-05-29 14:17:41,261 [INFO] tensorflow: epoch = 0.04141414141414142, learning_rate = 5.096272e-06, loss = 0.10510203, step = 41 (6.088 sec) INFO:tensorflow:epoch = 0.04141414141414142, learning_rate = 5.096272e-06, loss = 0.10378541, step = 41 (6.127 sec) 2023-05-29 14:17:41,310 [INFO] tensorflow: epoch = 0.04141414141414142, learning_rate = 5.096272e-06, loss = 0.10378541, step = 41 (6.127 sec) 2023-05-29 14:17:45,329 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 94.834 INFO:tensorflow:epoch = 0.05353535353535353, learning_rate = 5.1248035e-06, loss = 0.10215977, step = 53 (5.938 sec) 2023-05-29 14:17:47,248 [INFO] tensorflow: epoch = 0.05353535353535353, learning_rate = 5.1248035e-06, loss = 0.10215977, step = 53 (5.938 sec) INFO:tensorflow:epoch = 0.05353535353535353, learning_rate = 5.1248035e-06, loss = 0.10244183, step = 53 (6.011 sec) 2023-05-29 14:17:47,272 [INFO] tensorflow: epoch = 0.05353535353535353, learning_rate = 5.1248035e-06, loss = 0.10244183, step = 53 (6.011 sec) INFO:tensorflow:epoch = 0.06565656565656566, learning_rate = 5.1534894e-06, loss = 0.10088095, step = 65 (5.671 sec) 2023-05-29 14:17:52,919 [INFO] tensorflow: epoch = 0.06565656565656566, learning_rate = 5.1534894e-06, loss = 0.10088095, step = 65 (5.671 sec) INFO:tensorflow:epoch = 0.06565656565656566, learning_rate = 5.1534894e-06, loss = 0.102030784, step = 65 (5.648 sec) 2023-05-29 14:17:52,920 [INFO] tensorflow: epoch = 0.06565656565656566, learning_rate = 5.1534894e-06, loss = 0.102030784, step = 65 (5.648 sec) 2023-05-29 14:17:57,322 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 100.058 INFO:tensorflow:epoch = 0.07777777777777778, learning_rate = 5.182336e-06, loss = 0.099258006, step = 77 (5.970 sec) 2023-05-29 14:17:58,890 [INFO] tensorflow: epoch = 0.07777777777777778, learning_rate = 5.182336e-06, loss = 0.099258006, step = 77 (5.970 sec) INFO:tensorflow:epoch = 0.07777777777777778, learning_rate = 5.182336e-06, loss = 0.10038659, step = 77 (5.990 sec) 2023-05-29 14:17:58,910 [INFO] tensorflow: epoch = 0.07777777777777778, learning_rate = 5.182336e-06, loss = 0.10038659, step = 77 (5.990 sec) INFO:tensorflow:epoch = 0.08888888888888889, learning_rate = 5.208919e-06, loss = 0.09728847, step = 88 (5.586 sec) 2023-05-29 14:18:04,476 [INFO] tensorflow: epoch = 0.08888888888888889, learning_rate = 5.208919e-06, loss = 0.09728847, step = 88 (5.586 sec) INFO:tensorflow:epoch = 0.08888888888888889, learning_rate = 5.208919e-06, loss = 0.096830234, step = 88 (5.664 sec) 2023-05-29 14:18:04,574 [INFO] tensorflow: epoch = 0.08888888888888889, learning_rate = 5.208919e-06, loss = 0.096830234, step = 88 (5.664 sec) INFO:tensorflow:epoch = 0.1, learning_rate = 5.235643e-06, loss = 0.09449359, step = 99 (5.725 sec) 2023-05-29 14:18:10,300 [INFO] tensorflow: epoch = 0.1, learning_rate = 5.235643e-06, loss = 0.09449359, step = 99 (5.725 sec) INFO:tensorflow:epoch = 0.1, learning_rate = 5.235643e-06, loss = 0.095957115, step = 99 (5.844 sec) 2023-05-29 14:18:10,320 [INFO] tensorflow: epoch = 0.1, learning_rate = 5.235643e-06, loss = 0.095957115, step = 99 (5.844 sec) INFO:tensorflow:global_step/sec: 1.88413 2023-05-29 14:18:10,350 [INFO] tensorflow: global_step/sec: 1.88413 2023-05-29 14:18:10,352 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 92.097 INFO:tensorflow:epoch = 0.1111111111111111, learning_rate = 5.2625e-06, loss = 0.09315214, step = 110 (5.653 sec) 2023-05-29 14:18:15,952 [INFO] tensorflow: epoch = 0.1111111111111111, learning_rate = 5.2625e-06, loss = 0.09315214, step = 110 (5.653 sec) INFO:tensorflow:epoch = 0.1111111111111111, learning_rate = 5.2625e-06, loss = 0.09431579, step = 110 (5.638 sec) 2023-05-29 14:18:15,958 [INFO] tensorflow: epoch = 0.1111111111111111, learning_rate = 5.2625e-06, loss = 0.09431579, step = 110 (5.638 sec) INFO:tensorflow:epoch = 0.12222222222222222, learning_rate = 5.2894993e-06, loss = 0.09175056, step = 121 (5.460 sec) 2023-05-29 14:18:21,418 [INFO] tensorflow: epoch = 0.12222222222222222, learning_rate = 5.2894993e-06, loss = 0.09175056, step = 121 (5.460 sec) INFO:tensorflow:epoch = 0.12222222222222222, learning_rate = 5.2894993e-06, loss = 0.09200921, step = 121 (5.467 sec) 2023-05-29 14:18:21,419 [INFO] tensorflow: epoch = 0.12222222222222222, learning_rate = 5.2894993e-06, loss = 0.09200921, step = 121 (5.467 sec) 2023-05-29 14:18:22,750 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 96.793 INFO:tensorflow:epoch = 0.13434343434343435, learning_rate = 5.319107e-06, loss = 0.09199709, step = 133 (5.940 sec) 2023-05-29 14:18:27,360 [INFO] tensorflow: epoch = 0.13434343434343435, learning_rate = 5.319107e-06, loss = 0.09199709, step = 133 (5.940 sec) INFO:tensorflow:epoch = 0.13434343434343435, learning_rate = 5.319107e-06, loss = 0.09015417, step = 133 (5.947 sec) 2023-05-29 14:18:27,364 [INFO] tensorflow: epoch = 0.13434343434343435, learning_rate = 5.319107e-06, loss = 0.09015417, step = 133 (5.947 sec) INFO:tensorflow:epoch = 0.14545454545454545, learning_rate = 5.346392e-06, loss = 0.0872527, step = 144 (5.441 sec) 2023-05-29 14:18:32,800 [INFO] tensorflow: epoch = 0.14545454545454545, learning_rate = 5.346392e-06, loss = 0.0872527, step = 144 (5.441 sec) INFO:tensorflow:epoch = 0.14646464646464646, learning_rate = 5.3488807e-06, loss = 0.08733964, step = 145 (6.012 sec) 2023-05-29 14:18:33,376 [INFO] tensorflow: epoch = 0.14646464646464646, learning_rate = 5.3488807e-06, loss = 0.08733964, step = 145 (6.012 sec) 2023-05-29 14:18:35,474 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 94.318 aining configuration found in save file: ' 2023-05-29 14:16:07,599 [INFO] iva.detectnet_v2.objectives.bbox_objective: Default L1 loss function will be used. __________________________________INFO:tensorflow:epoch = 0.15656565656565657, learning_rate = 5.3738218e-06, loss = 0.086545646, step = 155 (5.886 sec) 2023-05-29 14:18:38,686 [INFO] tensorflow: epoch = 0.15656565656565657, learning_rate = 5.3738218e-06, loss = 0.086545646, step = 155 (5.886 sec) INFO:tensorflow:epoch = 0.15757575757575756, learning_rate = 5.3763183e-06, loss = 0.08513886, step = 156 (5.809 sec) 2023-05-29 14:18:39,185 [INFO] tensorflow: epoch = 0.15757575757575756, learning_rate = 5.3763183e-06, loss = 0.08513886, step = 156 (5.809 sec) INFO:tensorflow:epoch = 0.1686868686868687, learning_rate = 5.4039015e-06, loss = 0.08426226, step = 167 (5.990 sec) 2023-05-29 14:18:44,676 [INFO] tensorflow: epoch = 0.1686868686868687, learning_rate = 5.4039015e-06, loss = 0.08426226, step = 167 (5.990 sec) INFO:tensorflow:epoch = 0.1696969696969697, learning_rate = 5.4064117e-06, loss = 0.084082045, step = 168 (5.957 sec) 2023-05-29 14:18:45,142 [INFO] tensorflow: epoch = 0.1696969696969697, learning_rate = 5.4064117e-06, loss = 0.084082045, step = 168 (5.957 sec) 2023-05-29 14:18:48,044 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 95.464 INFO:tensorflow:epoch = 0.1797979797979798, learning_rate = 5.4316215e-06, loss = 0.08432073, step = 178 (5.485 sec) 2023-05-29 14:18:50,162 [INFO] tensorflow: epoch = 0.1797979797979798, learning_rate = 5.4316215e-06, loss = 0.08432073, step = 178 (5.485 sec) INFO:tensorflow:epoch = 0.1808080808080808, learning_rate = 5.4341494e-06, loss = 0.083187275, step = 179 (5.498 sec) 2023-05-29 14:18:50,640 [INFO] tensorflow: epoch = 0.1808080808080808, learning_rate = 5.4341494e-06, loss = 0.083187275, step = 179 (5.498 sec) INFO:tensorflow:epoch = 0.19090909090909092, learning_rate = 5.4594884e-06, loss = 0.0814651, step = 189 (5.695 sec) 2023-05-29 14:18:55,856 [INFO] tensorflow: epoch = 0.19090909090909092, learning_rate = 5.4594884e-06, loss = 0.0814651, step = 189 (5.695 sec) INFO:tensorflow:epoch = 0.1919191919191919, learning_rate = 5.4620245e-06, loss = 0.08011631, step = 190 (5.682 sec) 2023-05-29 14:18:56,322 [INFO] tensorflow: epoch = 0.1919191919191919, learning_rate = 5.4620245e-06, loss = 0.08011631, step = 190 (5.682 sec) ^CINFO:tensorflow:global_step/sec: 1.97906 2023-05-29 14:19:00,374 [INFO] tensorflow: global_step/sec: 1.97906 2023-05-29 14:19:00,903 [INFO] modulus.hooks.sample_counter_hook: Train Samples / sec: 93.325