• Hardware NVIDIA RTX A4000
• Network Type Yolo_v4_tiny
• TLT Version
toolkit_version: 4.0.1
format_version: 2.0
nvcr.io/nvidia/tao/tao-toolkit:4.0.0-tf1.15.5
I follow the TAO tao_launcher_starter_kit notebook to train yolov4_tiny model. In the beginning of training, only 10G~ memory was used. After one day, Memory usage become 50G~. How can I limit the memory usage?
Following is training command and config.
!tao yolo_v4_tiny train -e $SPECS_DIR/yolo_v4_tiny_train_kitti_5car.txt \
-r $USER_EXPERIMENT_DIR/experiment_dir_unpruned \
-k $KEY \
--log_file $USER_EXPERIMENT_DIR/logs/training_log.txt \
--gpus 1
yolo_v4_tiny_train_kitti_5car.txt
random_seed: 42
yolov4_config {
big_anchor_shape: "[(146.25, 120.53), (306.15, 148.27), (487.50, 189.87)]"
mid_anchor_shape: "[(70.20, 50.13), (109.20, 89.60), (208.65, 80.00)]"
box_matching_iou: 0.25
matching_neutral_box_iou: 0.5
arch: "cspdarknet_tiny"
loss_loc_weight: 1.0
loss_neg_obj_weights: 1.0
loss_class_weights: 1.0
label_smoothing: 0.0
big_grid_xy_extend: 0.05
mid_grid_xy_extend: 0.05
freeze_bn: false
#freeze_blocks: 0
force_relu: false
}
training_config {
visualizer {
enabled: False
num_images: 3
}
batch_size_per_gpu: 8
max_queue_size:16
num_epochs: 80
enable_qat: false
checkpoint_interval: 10
learning_rate {
soft_start_cosine_annealing_schedule {
min_learning_rate: 1e-7
max_learning_rate: 1e-4
soft_start: 0.3
}
}
regularizer {
type: L1
weight: 3e-5
}
optimizer {
adam {
epsilon: 1e-7
beta1: 0.9
beta2: 0.999
amsgrad: false
}
}
pretrain_model_path: "/workspace/tao-experiments/yolo_v4_tiny/pretrained_cspdarknet_tiny/pretrained_object_detection_vcspdarknet_tiny/cspdarknet_tiny.hdf5"
}
eval_config {
average_precision_mode: SAMPLE
batch_size: 8
matching_iou_threshold: 0.5
}
nms_config {
confidence_threshold: 0.001
clustering_iou_threshold: 0.5
force_on_cpu: true
top_k: 200
}
augmentation_config {
hue: 0.1
saturation: 1.5
exposure:1.5
vertical_flip:0
horizontal_flip: 0.5
jitter: 0.3
output_width: 1248
output_height: 384
output_channel: 3
randomize_input_shape_period: 10
mosaic_prob: 0.5
mosaic_min_ratio:0.2
}
dataset_config {
data_sources: {
tfrecords_path: "/workspace/tao-experiments/data/training/tfrecords/train-fold*"
image_directory_path: "/workspace/tao-experiments/data/training"
}
include_difficult_in_training: true
image_extension: "jpg"
target_class_mapping {
key: "car"
value: "car"
}
target_class_mapping {
key: "motorbike"
value: "motorbike"
}
target_class_mapping {
key: "truck"
value: "truck"
}
target_class_mapping {
key: "bus"
value: "bus"
}
target_class_mapping {
key: "smallbus"
value: "smallbus"
}
validation_data_sources: {
tfrecords_path: "/workspace/tao-experiments/data/val/tfrecords/val-fold*"
image_directory_path: "/workspace/tao-experiments/data/val"
}
}