Could you run fan_base firstly?
Docker: nvcr.io/nvidia/tao/tao-toolkit:5.5.0-pyt
An example yaml is as below. Pretrained model is from https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tao/models/pretrained_segformer_imagenet/files?version=fan_hybrid_base_in22k_1k_384
.
$ cat fanbase.yaml
results_dir: /localhome/local-morganh/segformer/fanbase
train:
num_gpus: 1
exp_config:
manual_seed: 49
checkpoint_interval: 200
logging_interval: 10
max_iters: 20000 #5000 #10000 #5000
resume_training_checkpoint_path: null
validate: True
validation_interval: 10 #200 #50
trainer:
find_unused_parameters: True
sf_optim:
lr: 0.00006
evaluate:
checkpoint: /localhome/local-morganh/segformer/fanbase/train/iter_20000.pth
model:
input_height: 672
input_width: 672
pretrained_model_path: /localhome/local-morganh/segformer/fan_hybrid_base_in22k_1k_384.pth #https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tao/models/pretrained_segformer_imagenet/files?version=fan_hybrid_base_in22k_1k_384
#pretrained_model_path: null
backbone:
type: "fan_base_16_p4_hybrid"
#type: "fan_large_16_p4_hybrid"
#type: "vit_huge_nvclip_14_siglip"
# type: "vit_base_nvclip_16_siglip"
#type: "vit_large_nvdinov2"
#type: "mit_b5"
dataset:
input_type: "grayscale"
img_norm_cfg:
mean:
- 127.5
- 127.5
- 127.5
std:
- 127.5
- 127.5
- 127.5
to_rgb: True
data_root: /tao-pt/tao-experiments
train_dataset:
img_dir:
- /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_NG/train
ann_dir:
- /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_mask/train
pipeline:
augmentation_config:
random_crop:
#crop_size:
# - 672
# - 672
cat_max_ratio: 0.75
resize:
img_scale:
- 672
- 1024
ratio_range:
- 0.5
- 2.0
random_flip:
prob: 0.5
val_dataset:
img_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_NG/val
ann_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_mask/val
#img_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_NG/train
#ann_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_mask/train
test_dataset:
#NG images
img_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_NG/val
ann_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_mask/val
#OK images
#img_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_NG/val_nomask
#ann_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/crop_mask/val_nomask
#full image
#img_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/NG
#ann_dir: /localhome/local-morganh/segformer/39_password_Div8-rd-nvidia/mask_NG
palette:
- seg_class: background
rgb:
- 0
- 0
- 0
label_id: 0
mapping_class: background
- seg_class: foreground
rgb:
- 255
- 255
- 255
label_id: 1
mapping_class: foreground
repeat_data_times: 500
batch_size: 8 #4 #1 #4
workers_per_gpu: 1
export:
# input_height: 512
# input_width: 512
input_height: 672
input_width: 672
#input_height: 768
#input_width: 768
input_channel: 3
onnx_file: "${results_dir}/iter_500.onnx"
gen_trt_engine:
#input_width: 512
#input_height: 512
input_width: 672
input_height: 672
tensorrt:
data_type: FP32
workspace_size: 1024
min_batch_size: 1
opt_batch_size: 1
max_batch_size: 1
Run training:
$segformer train -e /localhome/local-morganh/segformer/fanbase.yaml
Result is under folder: /localhome/local-morganh/segformer/fanbase/train/