strange problem accessing device memory cudaMalloc and cudaMemcpy

Hi all,
I have a strange problem when accessing a device memory:

rhis is the code source:

unsigned int* g_image = NULL;
short* g_buffer = NULL;

extern “C”
void copy_slice(short* _host_memory)
{
cudaMemcpy(g_buffer, _host_memory, 512512sizeof(short), cudaMemcpyHostToDevice);
}

extern “C”
bool fn_InitObject(int _rows, int _colomns)
{
if(cudaMalloc((void**)&g_image, 512512sizeof(unsigned int)) != cudaSuccess)
return false;

            if(cudaMalloc((void**)&g_buffer,  512*512*sizeof(short)) != cudaSuccess)
            return false;
            return true;

}

extern “C”
void call_kernels(UINT* _host_result)
{
dim3 dimBlock(8, 64);
dim3 dimGrid(64, 8);

//synchronisation 
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
	
cudaEventRecord(start, 0);
	
//lunch kernels
render_vr_cuda_kernel<<<dimGrid, dimBlock>>>(g_image,g_buffer);

cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
	
//They are destroyed this way:
cudaEventDestroy(start);
cudaEventDestroy(stop);

//copy the result image back to the host buffer
cudaMemcpy(_host_result, g_image, 512*512*4,cudaMemcpyDeviceToHost);

}

the cudaMemcpy function returns a cudaErrorInvalidDevicePointer error ! but when a allocate the device memory (with the cudaMalloc) in the “copy_slice” function the cudaMemcpy returns success.
of course all call fn_InitObject before copy_slice and I have no asynchronous problem.

when i use the cudaMalloc and cudaMemcpy in the same function i get a successful result but the kernel obtains errouneus values from the device memory (g_buffer) when unsing kernals (call_kernels function)

think you for your help