Hi everyone. I have spent a large amount of time trying to figure out the problem that I am about to attempt to present, and am looking for help.
I am writing a program that is using a standalone library with its own make utility configured specifically for the library in such a way that I must use it lest I want to spend more time putting things together than writing my code. This means that I cannot use nvcc to compile everything and have to separate things. Half of my code is compiled via the custom make utility, and the other half - the cuda bit, is compiled by nvcc into a static library. The library contains a wrapper class, and the kernel that I want to run.
I compile and link everything fine, however, at runtime I get this libc6 error:
*** glibc detected *** /home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching: free(): invalid next size (normal): 0x00000000017a3050 ***
======= Backtrace: =========
/lib/libc.so.6[0x7ffc3d26ea58]
/lib/libc.so.6(cfree+0x76)[0x7ffc3d2710a6]
/usr/lib/libcuda.so.1[0x7ffc413b1ada]
/usr/lib/libcuda.so.1[0x7ffc413b1bea]
/usr/lib/libcuda.so.1[0x7ffc413b7449]
/usr/lib/libcuda.so.1[0x7ffc4137b56c]
/usr/lib/libcuda.so.1[0x7ffc41125d40]
/usr/lib/libcuda.so.1[0x7ffc411354eb]
/usr/lib/libcuda.so.1[0x7ffc4111d93e]
/usr/lib/libcuda.so.1(cuCtxCreate+0xaa)[0x7ffc41117eda]
/usr/local/cuda/lib/libcudart.so.2[0x7ffc415cb04b]
/usr/local/cuda/lib/libcudart.so.2[0x7ffc415cb96f]
/usr/local/cuda/lib/libcudart.so.2(cudaConfigureCall+0x79)[0x7ffc415aa1e9]
/home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching[0x43a299]
/home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching[0x40e4c8]
/lib/libc.so.6(__libc_start_main+0xe6)[0x7ffc3d213466]
/home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching[0x40caa9]
======= Memory map: ========
00400000-00e2b000 r-xp 00000000 08:04 2712699 /home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching
0102a000-0102d000 rw-p 00a2a000 08:04 2712699 /home/mfastovets/ProjOut/lib/RAVL/linux64/bin/SimpleStereoMatching
0102d000-01071000 rw-p 0102d000 00:00 0
01428000-01952000 rw-p 01428000 00:00 0 [heap]
7ffc08000000-7ffc08021000 rw-p 7ffc08000000 00:00 0
7ffc08021000-7ffc0c000000 ---p 7ffc08021000 00:00 0
7ffc0cf3d000-7ffc0d03d000 rw-s 164c98000 00:0e 14621 /dev/nvidia0
7ffc0d03d000-7ffc0d13d000 rw-s 1649fa000 00:0e 14621 /dev/nvidia0
7ffc0d13d000-7ffc0d13e000 rw-s 1a84a7000 00:0e 14621 /dev/nvidia0
7ffc0d13e000-7ffc0d13f000 rw-s fcc0a000 00:0e 14621 /dev/nvidia0
7ffc0d13f000-7ffc0d140000 rw-s 1a84a6000 00:0e 14621 /dev/nvidia0
7ffc0d140000-7ffc0d542000 rw-s 1778ea000 00:0e 14621 /dev/nvidia0
7ffc0d542000-7ffc0d543000 rw-s fcc08000 00:0e 14621 /dev/nvidia0
7ffc0d543000-7ffc0d544000 rw-s 177851000 00:0e 14621 /dev/nvidia0
7ffc0d544000-7ffc0d946000 rw-s 1648be000 00:0e 14621 /dev/nvidia0
7ffc0d946000-7ffc0d957000 rw-s 2008c8000 00:0e 14621 /dev/nvidia0
7ffc0d957000-7ffc3a554000 rw-p 7ffc0d957000 00:00 0
7ffc3a626000-7ffc3a6be000 rw-p 7ffc3a626000 00:00 0
7ffc3a6be000-7ffc3a6c3000 r-xp 00000000 08:04 2500069 /usr/lib/libXdmcp.so.6.0.0
7ffc3a6c3000-7ffc3a8c2000 ---p 00005000 08:04 2500069 /usr/lib/libXdmcp.so.6.0.0
7ffc3a8c2000-7ffc3a8c3000 rw-p 00004000 08:04 2500069 /usr/lib/libXdmcp.so.6.0.0
7ffc3a8c3000-7ffc3a8c5000 r-xp 00000000 08:04 2500058 /usr/lib/libXau.so.6.0.0
7ffc3a8c5000-7ffc3aac4000 ---p 00002000 08:04 2500058 /usr/lib/libXau.so.6.0.0
7ffc3aac4000-7ffc3aac5000 rw-p 00001000 08:04 2500058 /usr/lib/libXau.so.6.0.0
7ffc3aac5000-7ffc3aac6000 r-xp 00000000 08:04 2501216 /usr/lib/libxcb-xlib.so.0.0.0
7ffc3aac6000-7ffc3acc5000 ---p 00001000 08:04 2501216 /usr/lib/libxcb-xlib.so.0.0.0
7ffc3acc5000-7ffc3acc6000 r--p 00000000 08:04 2501216 /usr/lib/libxcb-xlib.so.0.0.0
7ffc3acc6000-7ffc3acc7000 rw-p 00001000 08:04 2501216 /usr/lib/libxcb-xlib.so.0.0.0
7ffc3acc7000-7ffc3acef000 r-xp 00000000 08:04 122991 /lib/libpcre.so.3.12.1
7ffc3acef000-7ffc3aeee000 ---p 00028000 08:04 122991 /lib/libpcre.so.3.12.1
7ffc3aeee000-7ffc3aeef000 r--p 00027000 08:04 122991 /lib/libpcre.so.3.12.1
7ffc3aeef000-7ffc3aef0000 rw-p 00028000 08:04 122991 /lib/libpcre.so.3.12.1
7ffc3aef0000-7ffc3af17000 r-xp 00000000 08:04 2500295 /usr/lib/libexpat.so.1.5.2
7ffc3af17000-7ffc3b117000 ---p 00027000 08:04 2500295 /usr/lib/libexpat.so.1.5.2
7ffc3b117000-7ffc3b119000 r--p 00027000 08:04 2500295 /usr/lib/libexpat.so.1.5.2
7ffc3b119000-7ffc3b11a000 rw-p 00029000 08:04 2500295 /usr/lib/libexpat.so.1.5.2
7ffc3b11a000-7ffc3b135000 r-xp 00000000 08:04 2501220 /usr/lib/libxcb.so.1.0.0
7ffc3b135000-7ffc3b334000 ---p 0001b000 08:04 2501220 /usr/lib/libxcb.so.1.0.0
7ffc3b334000-7ffc3b335000 r--p 0001a000 08:04 2501220 /usr/lib/libxcb.so.1.0.0
7ffc3b335000-7ffc3b336000 rw-p 0001b000 08:04 2501220 /usr/lib/libxcb.so.1.0.0
7ffc3b336000-7ffc3b33d000 r-xp 00000000 08:04 2501210 /usr/lib/libxcb-render.so.0.0.0
7ffc3b33d000-7ffc3b53d000 ---p 00007000 08:04 2501210 /usr/lib/libxcb-render.so.0.0.0
7ffc3b53d000-7ffc3b53e000 r--p 00007000 08:04 2501210 /usr/lib/libxcb-render.so.0.0.0
7ffc3b53e000-7ffc3b53f000 rw-p 00008000 08:04 2501210 /usr/lib/libxcb-render.so.0.0.0
7ffc3b53f000-7ffc3b542000 r-xp 00000000 08:04 2501208 /usr/lib/libxcb-render-util.so.0.0.0
7ffc3b542000-7ffc3b741000 ---p 00003000 08:04 2501208 /usr/lib/libxcb-render-util.so.0.0.0
7ffc3b741000-7ffc3b742000 r--p 00002000 08:04 2501208 /usr/lib/libxcb-render-util.so.0.0.0
7ffc3b742000-7ffc3b743000 rw-p 00003000 08:04 2501208 /usr/lib/libxcb-render-util.so.0.0.0
7ffc3b743000-7ffc3b785000 r-xp 00000000 08:04 2500991 /usr/lib/libpixman-1.so.0.12.0
7ffc3b785000-7ffc3b984000 ---p 00042000 08:04 2500991 /usr/lib/libpixman-1.so.0.12.0
7ffc3b984000-7ffc3b986000 r--p 00041000 08:04 2500991 /usr/lib/libpixman-1.so.0.12.0
7ffc3b986000-7ffc3b987000 rw-p 00043000 08:04 2500991 /usr/lib/libpixman-1.so.0.12.0
7ffc3b987000-7ffc3b9a1000 r-xp 00000000 08:04 123003 /lib/libselinux.so.1
7ffc3b9a1000-7ffc3bba0000 ---p 0001a000 08:04 123003 /lib/libselinux.so.1
7ffc3bba0000-7ffc3bba1000 r--p 00019000 08:04 123003 /lib/libselinux.so.1
7ffc3bba1000-7ffc3bba2000 rw-p 0001a000 08:04 123003 /lib/libselinux.so.1
7ffc3bba2000-7ffc3bba3000 rw-p 7ffc3bba2000 00:00 0
7ffc3bba3000-7ffc3bbac000 r-xp 00000000 08:04 2500065 /usr/lib/libXcursor.so.1.0.2
7ffc3bbac000Aborted
I was able to trace this problem to the cudaMalloc function inside my static library using ddd. At the end of the cudaMalloc execution I get this error. I have tried everything I can think of to resolve this, but perhaps I am missing something simple. here are the important bits of my code relating to this error:
float *imgR, *imgL, *scoreGrid;
imgR = new float[(rowRange.Max().V()-rowRange.Min().V())*
(rColRange.Max().V()-rColRange.Min().V())];
imgL = new float[(rowRange.Max().V()-rowRange.Min().V())*
(lColRange.Max().V()-lColRange.Min().V())];
scoreGrid = new float[(rowRange.Max().V()-rowRange.Min().V())*(lColRange.Max().V()-lColRange.Min().V())*(rColRange.Max().V()-rColRange.Min().V())];
int index = 0;
for(Array2dIterC<ByteT> iml(pair.LeftRectifiedImage()); iml; iml++)
{
imgL[index] = iml.Data();
index++;
}
index = 0;
for(Array2dIterC<ByteT> imr(pair.RightRectifiedImage()); imr; imr++)
{
imgR[index] = imr.Data();
index++;
}
for(int idx=0; idx < (rowRange.Max().V()-rowRange.Min().V())*(lColRange.Max().V()-lColRange.Min().V())*(rColRange.Max().V()-rColRange.Min().V()); idx++)
{
scoreGrid[idx] = 0;
}
//scoreGrid, imgOne, imgTwo, need filling with the elements for their respecive objects
MatchKernel kern;
kern.launch_kernel(scoreGrid, imgL, imgR, (int)(rowRange.Max().V()-rowRange.Min().V()), (int)(lColRange.Max().V()-lColRange.Min().V()), (int)(rColRange.Max().V()-rColRange.Min().V()));
And the wrapper function:
// Wrapper for the __global__ call that sets up the kernel call
void MatchKernel::launch_kernel(float *scores, float * imgOne, float * imgTwo, int rows, int colOne, int colTwo)
{
float * scores_d;
float * one_d;
float * two_d;
cudaMalloc((void**)&scores_d, sizeof(float)*rows*colOne*colTwo);
cudaMalloc((void**)&one_d, sizeof(float)*rows*colOne);
cudaMalloc((void**)&two_d, sizeof(float)*rows*colTwo);
cudaMemcpy(one_d, imgOne, sizeof(float)*rows*colOne, cudaMemcpyHostToDevice);
cudaMemcpy(two_d, imgTwo, sizeof(float)*rows*colTwo, cudaMemcpyHostToDevice);
cudaMemcpy(scores_d, scores, sizeof(float)*rows*colOne*colTwo, cudaMemcpyHostToDevice);
dim3 block(colTwo, 0, 0);
dim3 grid(colOne, rows, 0);
computeScores<<< grid, block>>>(scores_d, one_d, two_d); //kernel call
cudaMemcpy(scores, scores_d,sizeof(float)*rows*colOne*colTwo, cudaMemcpyDeviceToHost);
}
Any help you can provide would be MUCH appreciated. Cheers!