From my main.cpp I call a initializeVision() in another C++ file that calls initializeStereoCuda();
initializeStereoCuda() mallocs all the variables needed for later computation. Here is the code:
[codebox]device float3 * LuvImageLeft2;
device float3 * LuvImageRight2;
device uchar3 *LeftImage2;
device uchar3 *RightImage2;
//The Image width & height.
int g_w;
int g_h;
size_t Luv_pitch;
SOM_SA *SOM_MAP;
StereoMapper *mapper;
Rectifier *rectifier;
//LineDetector *lineDetector;
size_t RGB_pitch;
int SOM_TRAINED;[/codebox]
[codebox]void initializeStereoCuda(unsigned int w, unsigned int h)
{
g_w = w;
g_h = h;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&g_disparityLeft,&g_floatDispPitch,w*sizeof(float),h));
CUDA_SAFE_CALL(cudaMallocPitch((void**)&g_disparityLeft2,&g_floatDispPitch,w*sizeof(float),h));
CUDA_SAFE_CALL(cudaMallocPitch((void**)&g_minSSD, &g_floatDispPitch,w*sizeof(int),h));
g_floatDispPitch /= sizeof(float);
CUDA_SAFE_CALL(cudaMallocPitch((void**)&LuvImageLeft2,&Luv_pitch,g_w*sizeof(float3),g_h));
CUDA_SAFE_CALL(cudaMallocPitch((void**)&LuvImageRight2,&Luv_pitch,g_w*sizeof(float3),g_h));
CUDA_SAFE_CALL(cudaMallocPitch((void**)&LeftImage2,&RGB_pitch,g_w*sizeof(uchar3),g_h));
CUDA_SAFE_CALL(cudaMallocPitch((void**)&RightImage2,&RGB_pitch,g_w*sizeof(uchar3),g_h));
Luv_pitch = Luv_pitch/sizeof(float3);
RGB_pitch = RGB_pitch/sizeof(uchar3);
SOM_MAP = new SOM_SA(w,h);
rectifier = new Rectifier(w,h);
mapper = new StereoMapper(w,h);
initializeLineDetector(w,h);
SOM_TRAINED=0;
cudaChannelFormatDesc U8Tex = cudaCreateChannelDesc<unsigned char>();
cudaMallocArray(&g_leftTex_array, &U8Tex, g_w, g_h);
cudaMallocArray(&g_rightTex_array, &U8Tex, g_w, g_h);
print_GPU_mem();
}[/codebox]
Now, initializeVision() then starts a thread that keeps calling stereoProcess() which is in the cuda.cpp together with initializeStereoCuda():
stereoProcess():
[codebox]dim3 grid(1,1,1);
dim3 threads(16,16,1);
grid.x = divUp(g_w,threads.x);
grid.y = divUp(g_h,threads.y);
cudaError ret;
//if i don’t reallocate LuvImageLeft2 and LeftImage2 then cudaMemcpy will throw unspecified launch failure
// ret = cudaMallocPitch((void**)&LuvImageLeft2,&RGB_pitch,g_w*sizeof(float3),g_h);
// printf(“Error malloc luv: %d\n”, ret);
// ret = cudaMallocPitch((void**)&LeftImage2,&RGB_pitch,g_w*sizeof(uchar3),g_h);
// printf(“Error malloc leftimage: %d\n”, ret);
printf("image: %d\n", p_hostLeft[0]);
//RGB_pitch = RGB_pitch/sizeof(uchar3);
//Luv_pitch = Luv_pitch/sizeof(float3);
ret = cudaMemset(LeftImage2,0,g_w*sizeof(uchar3)*g_h);
printf("Error cudaMemset: %d\n", ret);
ret = cudaMemcpy(LeftImage2,p_hostLeft,g_w*sizeof(uchar3)*g_h,cuda
MemcpyHostToDevice);
printf("Error cudaMemcpy: %d\n", ret);
BGR_to_RGB<<<grid,threads>>>(LeftImage2,RGB_pitch,g_w,g_h);
//CUT_CHECK_ERROR("sasd");
cudaThreadSynchronize();
convertRGB_to_MLUV<<<grid,threads>>>(LeftImage2,LuvImageLeft2,RGB_pitch,Luv_pitch,g_w,g_h);
cudaThreadSynchronize();
unsigned char* temp_seg = (unsigned char*) malloc(g_w*sizeof(unsigned char)*g_h);
segmentLines(LuvImageLeft2,temp_seg);
detectLines(temp_seg,seg_image);
ret = cudaFree(LeftImage2);
printf("Error: %d\n", ret);
ret = cudaFree(LuvImageLeft2);
printf("Error: %d\n", ret);
//ret = cudaFree(LeftImage2);
//printf("Error: %d\n", ret);
//ret = cudaFree(LuvImageLeft2);
//printf("Error: %d\n", ret);
CUT_CHECK_ERROR("asd");[/codebox]
I don’t understand why I need to reallocate that memory. The same thing happens in segmentLines() function which also uses memory allocated by initializeLineDetector();
Also, I noticed that the address of LeftImage2 changes from when it is initialized to when it used again in stereoProcess(); ???
PS. I had my cuda functions in a class but had the same problem
Thnx!