vc2013, X64, the program always block at GpuMat device_edge(image_h2);

#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <opencv2/opencv.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <string.h>
#include <stdlib.h>

using namespace cv::gpu;

int main(int argc, char *argv)
{
cudaSetDevice(0);
cvNamedWindow(“canny”);

cv::VideoCapture capture("video.avi");

cv::Mat image_h1(720, 480, CV_32FC3);
cv::Mat image_h2(720, 480, CV_32FC1);

GpuMat  device_image;
GpuMat  device_edge(image_h2);//hang by call cudamallocpitch, that why, can anyone help me

for (;;) {
    printf("that ok\r\n");
    capture >> image_h1;

    if (image_h1.empty())
        break;

    cvtColor(image_h1,image_h2,CV_RGB2GRAY);
    device_image.upload(image_h2);
    Canny(device_image, device_edge, 50., 100.);
    device_image.download(image_h2);

    cv::imshow("canny", image_h2);
    cvWaitKey(30);
}

// image_d.release();
//image_e.release();
// image_h1.release();
// capture.release();
}

You are off-topic… this is not a forum about OpenCV, but a forum about Nvidia Embedded GPU systems.

However you cannot pass a cv::Mat to a GPU function. You must first “upload” the memory of cv::Mat to gpu::GpuMat, process it, and then “download” it back.

GPU and CPU momories are not shared, so you cannot perform GPU elaborations on CPU memory.

Here an example:
http://study.marearts.com/2013/12/opencv-cuda-example-source-code.html

thank you very much, but the problem is not you notice befor, the program was hang by call cudamallocpitch,very strange

if i comment as //Canny(device_image, device_edge, 50., 100.);, then the program run fluently

if i run this program at tk1 board, the same as pc, will be block at GpuMat device_edge(image_h2);

at my computer, the cudamallocpitch will take one minute, after that, the program run well.

#include “cuda_runtime.h”
#include “device_launch_parameters.h”

#include <opencv2/opencv.hpp>
#include <opencv2/gpu/gpu.hpp>
#include <string.h>
#include <stdlib.h>

using namespace cv;

int main(int argc, char *argv)
{
cvNamedWindow(“canny”);
cv::VideoCapture capture(“video.avi”);

cv::Mat image_h1(720, 480, CV_32FC3);
cv::Mat image_h2(720, 480, CV_32FC1);
gpu::GpuMat device_lumi;
gpu::GpuMat device_edge;

for (;;) {
    capture >> image_h1;

    if (image_h1.empty())
        break;

    cvtColor(image_h1, image_h2, CV_RGB2GRAY);

    device_lumi.upload(image_h2);
    gpu::Canny(device_lumi, device_edge, 50., 100.);
    device_edge.download(image_h2);

    cv::imshow("canny", image_h2);
    cvWaitKey(30);
}

image_h1.release();
image_h2.release();

device_edge.release();
device_lumi.release();
capture.release();
return 0;

}

can run that code at tk1 board,but when debug it,the upload will spend several minute,i want to know why