Hi Dusty
Thank for your response . As requested :
[b]listing 1 : (this is the untampered standard version) :
command = ./detectnet-console peds-003.jpg output.jpg >anton.txt
output :[/b]
detectNet – loading detection network model from:
– prototxt networks/ped-100/deploy.prototxt
– model networks/ped-100/snapshot_iter_70800.caffemodel
– input_blob ‘data’
– output_cvg ‘coverage’
– output_bbox ‘bboxes’
– mean_pixel 0.000000
– mean_binary NULL
– class_labels networks/ped-100/class_labels.txt
– threshold 0.500000
– batch_size 1
[TRT] TensorRT version 5.1.6
[TRT] loading NVIDIA plugins…
[TRT] Plugin Creator registration succeeded - GridAnchor_TRT
[TRT] Plugin Creator registration succeeded - NMS_TRT
[TRT] Plugin Creator registration succeeded - Reorg_TRT
[TRT] Plugin Creator registration succeeded - Region_TRT
[TRT] Plugin Creator registration succeeded - Clip_TRT
[TRT] Plugin Creator registration succeeded - LReLU_TRT
[TRT] Plugin Creator registration succeeded - PriorBox_TRT
[TRT] Plugin Creator registration succeeded - Normalize_TRT
[TRT] Plugin Creator registration succeeded - RPROI_TRT
[TRT] Plugin Creator registration succeeded - BatchedNMS_TRT
[TRT] completed loading NVIDIA plugins.
[TRT] detected model format - caffe (extension ‘.caffemodel’)
[TRT] desired precision specified for GPU: FASTEST
[TRT] requested fasted precision for device GPU without providing valid calibrator, disabling INT8
[TRT] native precisions detected for GPU: FP32, FP16
[TRT] selecting fastest native precision for GPU: FP16
[TRT] attempting to open engine cache file networks/ped-100/snapshot_iter_70800.caffemodel.1.1.GPU.FP16.engine
[TRT] loading network profile from engine cache… networks/ped-100/snapshot_iter_70800.caffemodel.1.1.GPU.FP16.engine
[TRT] device GPU, networks/ped-100/snapshot_iter_70800.caffemodel loaded
[TRT] device GPU, CUDA engine context initialized with 3 bindings
[TRT] binding – index 0
– name ‘data’
– type FP32
– in/out INPUT
– # dims 3
– dim #0 3 (CHANNEL)
– dim #1 512 (SPATIAL)
– dim #2 1024 (SPATIAL)
[TRT] binding – index 1
– name ‘coverage’
– type FP32
– in/out OUTPUT
– # dims 3
– dim #0 1 (CHANNEL)
– dim #1 32 (SPATIAL)
– dim #2 64 (SPATIAL)
[TRT] binding – index 2
– name ‘bboxes’
– type FP32
– in/out OUTPUT
– # dims 3
– dim #0 4 (CHANNEL)
– dim #1 32 (SPATIAL)
– dim #2 64 (SPATIAL)
[TRT] binding to input 0 data binding index: 0
[TRT] binding to input 0 data dims (b=1 c=3 h=512 w=1024) size=6291456
[TRT] binding to output 0 coverage binding index: 1
[TRT] binding to output 0 coverage dims (b=1 c=1 h=32 w=64) size=8192
[TRT] binding to output 1 bboxes binding index: 2
[TRT] binding to output 1 bboxes dims (b=1 c=4 h=32 w=64) size=32768
device GPU, networks/ped-100/snapshot_iter_70800.caffemodel initialized.
detectNet – number object classes: 1
detectNet – maximum bounding boxes: 0
detectnet-console: failed to initialize detectNet
[b]listing 2 : (this is the standard version with additional debug messages (ar_dbg) :
command = ./detectnet-console peds-003.jpg output.jpg >anton.txt
output : (only the tail shown) [/b]
[TRT] binding to input 0 data binding index: 0
[TRT] binding to input 0 data dims (b=1 c=3 h=512 w=1024) size=6291456
[TRT] binding to output 0 coverage binding index: 1
[TRT] binding to output 0 coverage dims (b=1 c=1 h=32 w=64) size=8192
[TRT] binding to output 1 bboxes binding index: 2
[TRT] binding to output 1 bboxes dims (b=1 c=4 h=32 w=64) size=32768
device GPU, networks/ped-100/snapshot_iter_70800.caffemodel initialized.
ar_dbg : entering allocDetections
ar_dbg : model type != MODEL_UFF and model type != MODEL_ONXX
detectNet – number object classes: 1
detectNet – maximum bounding boxes: 0
ar_dbg : cudaAllocMapped function failed . aborted
detectnet-console: failed to initialize detectNet
[b]listing 3 : (this is the 'tampered version where I set maxdetections to 1 :
command = ./detectnet-console peds-003.jpg output.jpg >anton.txt
output : (only the tail shown) [/b]
detectNet – loading detection network model from:
– prototxt networks/ped-100/deploy.prototxt
– model networks/ped-100/snapshot_iter_70800.caffemodel
– input_blob ‘data’
– output_cvg ‘coverage’
– output_bbox ‘bboxes’
– mean_pixel 0.000000
– mean_binary NULL
– class_labels networks/ped-100/class_labels.txt
– threshold 0.500000
– batch_size 1
[TRT] TensorRT version 5.1.6
[TRT] loading NVIDIA plugins…
[TRT] Plugin Creator registration succeeded - GridAnchor_TRT
[TRT] Plugin Creator registration succeeded - NMS_TRT
[TRT] Plugin Creator registration succeeded - Reorg_TRT
[TRT] Plugin Creator registration succeeded - Region_TRT
[TRT] Plugin Creator registration succeeded - Clip_TRT
[TRT] Plugin Creator registration succeeded - LReLU_TRT
[TRT] Plugin Creator registration succeeded - PriorBox_TRT
[TRT] Plugin Creator registration succeeded - Normalize_TRT
[TRT] Plugin Creator registration succeeded - RPROI_TRT
[TRT] Plugin Creator registration succeeded - BatchedNMS_TRT
[TRT] completed loading NVIDIA plugins.
[TRT] detected model format - caffe (extension ‘.caffemodel’)
[TRT] desired precision specified for GPU: FASTEST
[TRT] requested fasted precision for device GPU without providing valid calibrator, disabling INT8
[TRT] native precisions detected for GPU: FP32, FP16
[TRT] selecting fastest native precision for GPU: FP16
[TRT] attempting to open engine cache file networks/ped-100/snapshot_iter_70800.caffemodel.1.1.GPU.FP16.engine
[TRT] loading network profile from engine cache… networks/ped-100/snapshot_iter_70800.caffemodel.1.1.GPU.FP16.engine
[TRT] device GPU, networks/ped-100/snapshot_iter_70800.caffemodel loaded
[TRT] device GPU, CUDA engine context initialized with 3 bindings
[TRT] binding – index 0
– name ‘data’
– type FP32
– in/out INPUT
– # dims 3
– dim #0 3 (CHANNEL)
– dim #1 512 (SPATIAL)
– dim #2 1024 (SPATIAL)
[TRT] binding – index 1
– name ‘coverage’
– type FP32
– in/out OUTPUT
– # dims 3
– dim #0 1 (CHANNEL)
– dim #1 32 (SPATIAL)
– dim #2 64 (SPATIAL)
[TRT] binding – index 2
– name ‘bboxes’
– type FP32
– in/out OUTPUT
– # dims 3
– dim #0 4 (CHANNEL)
– dim #1 32 (SPATIAL)
– dim #2 64 (SPATIAL)
[TRT] binding to input 0 data binding index: 0
[TRT] binding to input 0 data dims (b=1 c=3 h=512 w=1024) size=6291456
[TRT] binding to output 0 coverage binding index: 1
[TRT] binding to output 0 coverage dims (b=1 c=1 h=32 w=64) size=8192
[TRT] binding to output 1 bboxes binding index: 2
[TRT] binding to output 1 bboxes dims (b=1 c=4 h=32 w=64) size=32768
device GPU, networks/ped-100/snapshot_iter_70800.caffemodel initialized.
ar_dbg : entering allocDetections
ar_dbg : model type != MODEL_UFF and model type != MODEL_ONXX
detectNet – number object classes: 1
detectNet – maximum bounding boxes: 1
ar_dbg : leaving allocDetections . return true
detectNet – loaded 1 class info entries
detectNet – number of object classes: 1
[image] loaded ‘peds-003.jpg’ (1024 x 611, 3 channels)
5 objects detected
detected obj 0 class #0 (person) confidence=0.872070
bounding box 0 (692.062500, 43.632202) (841.000000, 459.890869) w=148.937500 h=416.258667
detected obj 1 class #0 (person) confidence=0.899902
bounding box 1 (851.187500, 59.966309) (1014.125000, 490.470703) w=162.937500 h=430.504395
detected obj 2 class #0 (person) confidence=1.076172
bounding box 2 (16.687500, 13.723633) (227.250000, 558.939697) w=210.562500 h=545.216064
detected obj 3 class #0 (person) confidence=0.681152
bounding box 3 (374.250000, 34.756592) (619.109375, 598.320557) w=244.859375 h=563.563965
detected obj 4 class #0 (person) confidence=0.959961
bounding box 4 (549.156250, 130.001587) (617.781250, 319.223633) w=68.625000 h=189.222046
[TRT] ----------------------------------------------
[TRT] Timing Report networks/ped-100/snapshot_iter_70800.caffemodel
[TRT] ----------------------------------------------
[TRT] Pre-Process CPU 0.08740ms CUDA 8.18458ms
[TRT] Network CPU 243.50406ms CUDA 234.84735ms
[TRT] Post-Process CPU 2.01807ms CUDA 1.92625ms
[TRT] Visualize CPU 0.26687ms CUDA 61.72536ms
[TRT] Total CPU 245.87640ms CUDA 306.68356ms
[TRT] ----------------------------------------------
[TRT] note – when processing a single image, run ‘sudo jetson_clocks’ before
to disable DVFS for more accurate profiling/timing measurements
detectnet-console: writing 1024x611 image to ‘output.jpg’
detectnet-console: successfully wrote 1024x611 image to ‘output.jpg’
detectnet-console: shutting down…
detectnet-console: shutdown complete
below is the code that i have changed
// allocDetections
bool detectNet::allocDetections()
{
printf(“ar_dbg : entering allocDetections\n”);
// determine max detections
if( IsModelType(MODEL_UFF) ) // TODO: fixme
{
printf(“ar_dbg : model type = MODEL_UFF\n”);
printf(“W = %u H = %u C = %u\n”, DIMS_W(mOutputs[OUTPUT_UFF].dims), DIMS_H(mOutputs[OUTPUT_UFF].dims), DIMS_C(mOutputs[OUTPUT_UFF].dims));
mMaxDetections = DIMS_H(mOutputs[OUTPUT_UFF].dims) * DIMS_C(mOutputs[OUTPUT_UFF].dims);
}
else if( IsModelType(MODEL_ONNX) )
{
printf(“ar_dbg : model type = MODEL_ONNX\n”);
mMaxDetections = 1;
mNumClasses = 1;
printf(“detectNet – using ONNX model\n”);
}
else
{
printf(“ar_dbg : model type != MODEL_UFF and model type != MODEL_ONXX\n”);
mMaxDetections = DIMS_W(mOutputs[OUTPUT_CVG].dims) * DIMS_H(mOutputs[OUTPUT_CVG].dims) /** DIMS_C(mOutputs[OUTPUT_CVG].dims)*/ * mNumClasses;
mNumClasses = DIMS_C(mOutputs[OUTPUT_CVG].dims);
printf(“detectNet – number object classes: %u\n”, mNumClasses);
}
//----------------------
// ar forced values
//---------------------
mMaxDetections = 1;
printf("detectNet -- maximum bounding boxes: %u\n", mMaxDetections);
// allocate array to store detection results
const size_t det_size = sizeof(Detection) * mNumDetectionSets * mMaxDetections;
if( !cudaAllocMapped((void**)&mDetectionSets[0], (void**)&mDetectionSets[1], det_size) )
{
printf("ar_dbg : cudaAllocMapped function failed . aborted\n");
return false;
}
memset(mDetectionSets[0], 0, det_size);
printf("ar_dbg : leaving allocDetections . return true \n");
return true;
}
Thank you in advance.
Best Regards
Anton