I’m trying to cudaMemcpy into the memory allocated on the device via in-kernel malloc.
But cudaMemcpy fails with “invalid argument”. Any ideas why this is happening???
Using CUDA 5.0, Tesla 2075, driver 310.19, nvcc --gpu-architecture=sm_21
Code sample:
__device__ void* ptr;
__global__ void kernel(){
ptr=malloc(1024);
assert(ptr);
}
#define CUDA_CALL(x) if((x)!=cudaSuccess) assert(0);
int main(){
CUDA_CALL(cudaDeviceSetLimit(cudaLimitMallocHeapSize,1024*1024));
kernel<<<1,1>>>();
CUDA_CALL(cudaDeviceSynchronize());
void* d_ptr;
void* d_ptr_content;
CUDA_CALL(cudaGetSymbolAddress(&d_ptr,ptr));
CUDA_CALL(cudaMemcpy(&d_ptr_content,d_ptr,sizeof(void*),cudaMemcpyDeviceToHost));
char data[]="asdfwertasdf";
cudaPointerAttributes attributes;
CUDA_CALL(cudaPointerGetAttributes(&attributes, d_ptr_content));
fprintf(stderr,"attr: %s\n", attributes.memoryType==cudaMemoryTypeHost?"host":"device");
// this call prints "device"
cudaError_t t=cudaMemcpy(d_ptr_content,data,1,cudaMemcpyHostToDevice);
// this call always fails
if (t!=cudaSuccess) fprintf(stderr,"error: %s\n", cudaGetErrorString(t));
}