==9237== Profiling result: Type Time(%) Time Calls Avg Min Max Name GPU activities: 46.58% 5.50405s 1404 3.9203ms 1.7501ms 31.241ms void cv::cuda::device::hog::classify_hists_kernel_many_blocks(int, int, int, int, float const *, float const , float, float, unsigned char*) 31.72% 3.74813s 1404 2.6696ms 1.2654ms 30.930ms void cv::cuda::device::hog::compute_hists_kernel_many_blocks(int, cv::cuda::PtrStep, cv::cuda::device::hog::compute_hists_kernel_many_blocks, float, float*, int, int, int, int, int, int) 11.42% 1.34879s 1404 960.68us 463.04us 25.903ms void cv::cuda::device::hog::compute_gradients_8UC4_kernel(int, int, cv::cuda::PtrStep, float, cv::cuda::device::hog::compute_gradients_8UC4_kernel, cv::cuda::PtrStep) 5.36% 632.82ms 1404 450.73us 216.64us 4.8603ms void cv::cuda::device::hog::normalize_hists_kernel_many_blocks(int, int, float*, float) 2.74% 323.22ms 1296 249.40us 161.57us 1.0611ms cv::cuda::device::hog::resize_for_hog_kernel(float, float, cv::cuda::PtrStepSz, int) 2.17% 255.98ms 15553 16.458us 320ns 4.3931ms [CUDA memcpy HtoD] 0.02% 2.7399ms 1404 1.9510us 800ns 9.2480us [CUDA memcpy DtoH] API calls: 45.43% 8.33981s 2700 3.0888ms 104.61us 31.576ms cudaDeviceSynchronize 23.31% 4.27799s 2 2.13899s 2.1431ms 4.27584s cudaMalloc 18.61% 3.41536s 542 6.3014ms 335.81us 65.724ms cudaFree 3.70% 679.45ms 15444 43.994us 30.624us 2.5147ms cudaMemcpyToSymbolAsync 3.40% 623.60ms 6912 90.219us 48.064us 2.3099ms cudaLaunchKernel 3.31% 607.14ms 1513 401.28us 68.160us 6.4725ms cudaMemcpy2D 1.73% 317.03ms 541 586.00us 328.54us 1.2748ms cudaMallocPitch 0.19% 34.909ms 1296 26.935us 15.840us 297.09us cudaBindTexture2D 0.10% 18.182ms 1404 12.949us 6.7200us 86.079us cudaFuncSetCacheConfig 0.08% 14.688ms 2918 5.0330us 3.0080us 118.98us cudaGetDevice 0.08% 14.608ms 1296 11.271us 7.5200us 74.688us cudaUnbindTexture 0.06% 10.525ms 6912 1.5220us 736ns 1.1695ms cudaGetLastError 0.01% 1.6014ms 1296 1.2350us 736ns 16.800us cudaCreateChannelDesc 0.00% 431.20us 96 4.4910us 1.8560us 114.88us cuDeviceGetAttribute 0.00% 315.71us 2 157.85us 125.95us 189.76us cudaGetDeviceProperties 0.00% 20.927us 1 20.927us 20.927us 20.927us cuDeviceTotalMem 0.00% 17.888us 3 5.9620us 2.5920us 8.3200us cuDeviceGetCount 0.00% 6.7840us 2 3.3920us 2.8800us 3.9040us cuDeviceGet 0.00% 5.2470us 5 1.0490us 704ns 1.8880us cudaGetDeviceCount 0.00% 4.6080us 1 4.6080us 4.6080us 4.6080us cuDeviceGetName 0.00% 2.4320us 1 2.4320us 2.4320us 2.4320us cuDeviceGetUuid 0.00% 1.6000us 1 1.6000us 1.6000us 1.6000us cudaDriverGetVersion 0.00% 1.4080us 1 1.4080us 1.4080us 1.4080us cudaRuntimeGetVersion