Hi, I already overwrote the spec file, I can share spec json file.
The only thing I changed was lower lr to 1e-5, it shows slow converge at early epochs. So bit higher lr might be working as well.
{"model": {"type": "ImageClassifier", "backbone": {"type": "fan_tiny_8_p4_hybrid", "drop_path": 0.1}, "neck": null, "head": {"type": "FANLinearClsHead", "num_classes": 2, "in_channels": 192, "loss": {"type": "CrossEntropyLoss", "loss_weight": 1.0, "use_soft": false}, "topk": [1], "head_init_scale": 1}, "init_cfg": {"type": "Pretrained", "checkpoint": "/workspace/tao-experiments/pretrained_fan_hybrid_tiny/pretrained_fan_classification_imagenet_vfan_hybrid_tiny/fan_hybrid_tiny.pth", "prefix": null}, "train_cfg": {"augments": null}}, "dataset": {"img_norm_cfg": {"mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true}, "data": {"samples_per_gpu": 8, "workers_per_gpu": 2, "train": {"type": "ImageNet", "data_prefix": "/data/cats_dogs_dataset/training_set/training_set/", "pipeline": [{"type": "LoadImageFromFile"}, {"type": "RandomResizedCrop", "size": 224}, {"type": "RandomFlip", "flip_prob": 0.5, "direction": "horizontal"}, {"type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true}, {"type": "ImageToTensor", "keys": ["img"]}, {"type": "ToTensor", "keys": ["gt_label"]}, {"type": "Collect", "keys": ["img", "gt_label"]}], "classes": "/data/cats_dogs_dataset/classes.txt"}, "val": {"type": "ImageNet", "data_prefix": "/data/cats_dogs_dataset/val_set/val_set", "ann_file": null, "pipeline": [{"type": "LoadImageFromFile"}, {"type": "Resize", "size": [256, -1]}, {"type": "CenterCrop", "crop_size": 224}, {"type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true}, {"type": "ImageToTensor", "keys": ["img"]}, {"type": "Collect", "keys": ["img"]}], "classes": "/data/cats_dogs_dataset/classes.txt"}, "test": {"type": "ImageNet", "data_prefix": "/data/cats_dogs_dataset/val_set/val_set", "ann_file": null, "pipeline": [{"type": "LoadImageFromFile"}, {"type": "Resize", "size": [256, -1]}, {"type": "CenterCrop", "crop_size": 224}, {"type": "Normalize", "mean": [123.675, 116.28, 103.53], "std": [58.395, 57.12, 57.375], "to_rgb": true}, {"type": "ImageToTensor", "keys": ["img"]}, {"type": "Collect", "keys": ["img"]}], "classes": "/data/cats_dogs_dataset/classes.txt"}}, "sampler": null}, "train": {"exp_config": {"manual_seed": 49, "MASTER_ADDR": "127.0.0.1", "MASTER_PORT": 631}, "validate": false, "train_config": {"checkpoint_config": {"interval": 1, "by_epoch": true}, "optimizer": {"type": "SGD", "lr": 1e-05, "weight_decay": 0.05}, "paramwise_cfg": null, "optimizer_config": {"grad_clip": null}, "lr_config": {"policy": "CosineAnnealing", "min_lr": 0.001, "warmup": "linear", "warmup_iters": 5, "warmup_ratio": 0.01, "warmup_by_epoch": true}, "runner": {"type": "TAOEpochBasedRunner", "max_epochs": 100}, "logging": {"interval": 500, "log_dir": "logs"}, "evaluation": {"interval": 1, "metric": "accuracy"}, "find_unused_parameters": false, "resume_training_checkpoint_path": null, "validate": true, "load_from": null, "custom_hooks": [{"type": "EMAHook", "momentum": 4e-05, "priority": "ABOVE_NORMAL"}]}, "num_gpus": 1, "results_dir": null}, "evaluate": {"num_gpus": 1, "batch_size": 1, "checkpoint": null, "trt_engine": null, "exp_config": {"manual_seed": 47, "MASTER_ADDR": "127.0.0.1", "MASTER_PORT": 631}, "topk": 1, "results_dir": null}, "inference": {"num_gpus": 1, "batch_size": 1, "checkpoint": null, "trt_engine": null, "exp_config": {"manual_seed": 47, "MASTER_ADDR": "127.0.0.1", "MASTER_PORT": 631}, "results_dir": null}, "gen_trt_engine": {"results_dir": null, "gpu_id": 0, "onnx_file": null, "trt_engine": null, "input_channel": 3, "input_width": 224, "input_height": 224, "opset_version": 12, "batch_size": -1, "verbose": false, "tensorrt": {"data_type": "FP32", "workspace_size": 1024, "min_batch_size": 1, "opt_batch_size": 1, "max_batch_size": 1}}, "export": {"verify": false, "opset_version": 12, "checkpoint": null, "input_channel": 3, "input_width": 224, "input_height": 224, "onnx_file": null, "results_dir": null}, "results_dir": "/results/classification_experiment"}