strange problem accessing device memory cudaMalloc and cudaMemcpy

Hi all,
I have a strange problem when accessing a device memory:

rhis is the code source:

unsigned int* g_image = NULL;
short* g_buffer = NULL;

extern “C”
void copy_slice(short* _host_memory)
cudaMemcpy(g_buffer, _host_memory, 512512sizeof(short), cudaMemcpyHostToDevice);

extern “C”
bool fn_InitObject(int _rows, int _colomns)
if(cudaMalloc((void**)&g_image, 512512sizeof(unsigned int)) != cudaSuccess)
return false;

            if(cudaMalloc((void**)&g_buffer,  512*512*sizeof(short)) != cudaSuccess)
            return false;
            return true;


extern “C”
void call_kernels(UINT* _host_result)
dim3 dimBlock(8, 64);
dim3 dimGrid(64, 8);

cudaEvent_t start, stop;
cudaEventRecord(start, 0);
//lunch kernels
render_vr_cuda_kernel<<<dimGrid, dimBlock>>>(g_image,g_buffer);

cudaEventRecord(stop, 0);
//They are destroyed this way:

//copy the result image back to the host buffer
cudaMemcpy(_host_result, g_image, 512*512*4,cudaMemcpyDeviceToHost);


the cudaMemcpy function returns a cudaErrorInvalidDevicePointer error ! but when a allocate the device memory (with the cudaMalloc) in the “copy_slice” function the cudaMemcpy returns success.
of course all call fn_InitObject before copy_slice and I have no asynchronous problem.

when i use the cudaMalloc and cudaMemcpy in the same function i get a successful result but the kernel obtains errouneus values from the device memory (g_buffer) when unsing kernals (call_kernels function)

think you for your help