Hello! I have two separate datasets; one for training (RGB with png images) and one for validation (grayscale with jpg). I was able to get training to work by commenting out validation_data_source
, but reenabling it for tlt-evaluate
crashes for some undocumented reason. Here is the stack trace:
root@00cf90c34bcf:/workspace/tlt-test# tlt-evaluate retinanet -k results/key -m results/weights/retinanet_resnet_epoch_010.tlt -e retinanet_spec
Using TensorFlow backend.
2020-07-07 18:03:28,665 [INFO] iva.retinanet.scripts.evaluate: Loading experiment spec at retinanet_spec.
2020-07-07 18:03:28,666 [INFO] /usr/local/lib/python2.7/dist-packages/iva/retinanet/utils/spec_loader.pyc: Merging specification from retinanet_spec
Traceback (most recent call last):
File "/usr/local/bin/tlt-evaluate", line 8, in <module>
sys.exit(main())
File "./common/magnet_evaluate.py", line 42, in main
File "./retinanet/scripts/evaluate.py", line 114, in main
File "./retinanet/scripts/evaluate.py", line 86, in evaluate
File "./retinanet/builders/data_generator.py", line 51, in __init__
File "./detectnet_v2/dataloader/default_dataloader.py", line 206, in get_dataset_tensors
File "./detectnet_v2/dataloader/default_dataloader.py", line 232, in _generate_images_and_ground_truth_labels
File "./modulus/processors/processors.py", line 227, in __call__
File "./detectnet_v2/dataloader/utilities.py", line 60, in call
File "./modulus/processors/tfrecords_iterator.py", line 143, in process_records
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1508, in split
axis=axis, num_split=num_or_size_splits, value=value, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 8883, in split
"Split", split_dim=axis, value=value, num_split=num_split, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 709, in _apply_op_helper
(key, op_type_name, attr_value.i, attr_def.minimum))
ValueError: Attr 'num_split' of 'Split' Op passed 0 less than minimum 1.
Here is my training spec:
# TODO Tune this; this is copied-and-pasted
eval_config {
validation_period_during_training: 10
# average_precision_mode: SAMPLE
matching_iou_threshold: 0.5
}
# TODO Tune this; this is copied-and-pasted
nms_config {
confidence_threshold: 0.05
clustering_iou_threshold: 0.5
top_k: 200
}
# TODO Tune this
augmentation_config {
preprocessing {
output_image_width: 640
output_image_height: 640
output_image_channel: 3
min_bbox_width: 1.0
min_bbox_height: 1.0
}
spatial_augmentation {
hflip_probability: 0.5
vflip_probability: 0.0
zoom_min: 0.7
zoom_max: 1.8
translate_max_x: 8.0
translate_max_y: 8.0
}
}
dataset_config {
data_sources: {
tfrecords_path: "/workspace/tlt-test/dataset/syn/syn_tf/*"
image_directory_path: "/workspace/tlt-test/dataset/syn"
}
target_class_mapping {
key: "a"
value: "a"
}
target_class_mapping {
key: "b"
value: "b"
}
image_extension: "jpg"
validation_data_source: {
tfrecords_path: "/workspace/tlt-test/dataset/real/real_tf/*"
image_directory_path: "/workspace/tlt-test/dataset/real/images"
}
}
# TODO Tune this; this is copied-and-pasted
retinanet_config {
aspect_ratios_global: "[1.0, 2.0, 0.5]"
scales: "[0.05, 0.15, 0.3, 0.45, 0.6, 0.75]"
two_boxes_for_ar1: false
clip_boxes: false
loss_loc_weight: 1.0
focal_loss_alpha: 0.25
focal_loss_gamma: 2.0
variances: "[0.1, 0.1, 0.2, 0.2]"
arch: "resnet"
nlayers: 18
n_kernels: 2
feature_size: 256
freeze_bn: False
freeze_blocks: 0
}
training_config {
batch_size_per_gpu: 12
num_epochs: 10
learning_rate {
soft_start_annealing_schedule {
min_learning_rate: 5e-06
max_learning_rate: 0.0005
soft_start: 0.1
annealing: 0.7
}
}
regularizer {
type: L1
weight: 3e-09
}
optimizer {
adam {
epsilon: 9.9e-09
beta1: 0.9
beta2: 0.999
}
}
cost_scaling {
initial_exponent: 20.0
increment: 0.005
decrement: 1.0
}
checkpoint_interval: 10
}
Real data configuration:
kitti_config {
root_directory_path: "/workspace/tlt-test/dataset/yuma"
image_dir_name: "test"
label_dir_name: "kitti_labels"
image_extension: ".jpg"
partition_mode: "random"
num_partitions: 2
val_split: 90
num_shards: 10
}
image_directory_path: "/workspace/tlt-test/dataset/yuma"