Hi.
I tried to train mask rcnn with my custom dataset. I set the total_steps to 36000 but the train process ends successfully after 1525 iteration. Does the mask rcnn use early stopping?
and one more question, I repeated the train several times but the mAP is always around 0.05. I am using 1 gpu and this is my spec file.
seed: 123
use_amp: False
warmup_steps: 25000
checkpoint: "/workspace/tlt/tlt-experiments/all_segmentation_approaches/pretrained_weights/resnet50.hdf5"
learning_rate_steps: "[18000, 27000]"
learning_rate_decay_levels: "[0.1, 0.01]"
total_steps: 36000
train_batch_size: 2
eval_batch_size: 2
num_steps_per_eval: 5
momentum: 0.9
l2_weight_decay: 0.00001
warmup_learning_rate: 0.00001
init_learning_rate: 0.0025
data_config{
image_size: "(448, 448)"
augment_input_data: True
eval_samples: 48
training_file_pattern: "/workspace/tlt/tlt-experiments/all_segmentation_approaches/weld_dataset_temp/tfrecords/train/*.tfrecord"
validation_file_pattern: "/workspace/tlt/tlt-experiments/all_segmentation_approaches/weld_dataset_temp/tfrecords/val/*.tfrecord"
val_json_file: "/workspace/tlt/tlt-experiments/all_segmentation_approaches/weld_dataset_temp/val/val_coco.json"
# dataset specific parameters
num_classes: 4
skip_crowd_during_training: True
}
maskrcnn_config {
nlayers: 50
arch: "resnet"
freeze_bn: False
#freeze_blocks: "[0,1]"
gt_mask_size: 112
# Region Proposal Network
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_min_size: 0.
# Proposal layer.
batch_size_per_im: 512
fg_fraction: 0.25
fg_thresh: 0.5
bg_thresh_hi: 0.5
bg_thresh_lo: 0.
# Faster-RCNN heads.
fast_rcnn_mlp_head_dim: 1024
bbox_reg_weights: "(10., 10., 5., 5.)"
# Mask-RCNN heads.
include_mask: True
mrcnn_resolution: 28
# training
train_rpn_pre_nms_topn: 2000
train_rpn_post_nms_topn: 1000
train_rpn_nms_threshold: 0.7
# evaluation
test_detections_per_image: 100
test_nms: 0.5
test_rpn_pre_nms_topn: 1000
test_rpn_post_nms_topn: 1000
test_rpn_nms_thresh: 0.7
# model architecture
min_level: 2
max_level: 6
num_scales: 1
aspect_ratios: "[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]"
anchor_scale: 8
# localization loss
rpn_box_loss_weight: 1.0
fast_rcnn_box_loss_weight: 1.0
mrcnn_weight_loss_mask: 1.0
}
and this is my training log:
mask_rcnn_log.docx (515.8 KB)