//test
int * host_mem=new int[2];
int * device_mem;
host_mem[0]=40435848;//need
cudaStatus=cudaMalloc(&device_mem,sizeof(int)*2);
if (cudaStatus != cudaSuccess)throw;
cudaStatus=cudaMemcpy(device_mem,host_mem,sizeof(int)*2,cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)throw;
gpu_test_malloc<<<1,1>>>(device_mem);
cudaStatus=cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)throw;
cudaStatus=cudaMemcpy(host_mem,device_mem,sizeof(int)*2,cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)throw;
cudaError_t cudaStatus;
try
{
int * host_mem=new int[2];
int * device_mem;
host_mem[0]=40435848;//это нужный мне объём
cudaStatus=cudaMalloc(&device_mem,sizeof(int)*2);
if (cudaStatus != cudaSuccess)throw;
cudaStatus=cudaMemcpy(device_mem,host_mem,sizeof(int)*2,cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)throw;
gpu_test_malloc<<<1,1>>>(device_mem);
cudaStatus=cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess)throw;
cudaStatus=cudaMemcpy(host_mem,device_mem,sizeof(int)*2,cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)throw;
cudaStatus=cudaFree(device_mem);
return cudaStatus;
}catch(...)
{
return cudaStatus;
};
}
GTX Titan(драва 335.23)
Nsight 3.2.2.13351
CUDA 5.5
64bit,compute_35,sm_35
The device side malloc() is a suballocator from a fixed size heap. Given that the kernel loop is continuously allocating memory, I suspect the backing heap is being exhausted. The size of the heap can be changed using the CUDA API :