Hi I am trying to create a device pointer to some data using float4 however I get an invalid Value error when copying the pointer value over using cudaMemcpyToSymbol (Line 12).
I have this working using the same code using ushort4, however when using float4 instead it does not.
Here is the relevant GPU code:
__device__ float4 *d_Ptr;
float4 *d_HostPtr;
texture<float4, 2, cudaReadModeElementType> texDummy;
cudaChannelFormatDesc ushortDummyDesc = cudaCreateChannelDesc<float4>();
extern "C"
cudaError_t CopyToFunction(float4* h_Ptr)
{
cudaError_t error;
error = cudaMalloc(&d_HostPtr, 5 * sizeof(float4));
error = cudaMemcpyToSymbol(d_Ptr, &d_HostPtr, sizeof(float4));
error = cudaMemcpy(d_HostPtr, h_Ptr, 5 * sizeof(float4), cudaMemcpyHostToDevice);
return error;
}
extern "C"
cudaError_t CopyFromFunction(float4* h_Ptr)
{
cudaError_t error;
error = cudaMemcpy(h_Ptr, d_HostPtr, 5 * sizeof(float4), cudaMemcpyDeviceToHost);
return error;
}
And here is the relevant CPU side code:
float4 cpuInputReferenceArray [5];
float4 *cpuInputPtr;
cpuInputPtr = cpuInputReferenceArray;
float4 cpuOutputReferenceArray [5];
float4 *cpuOutputPtr;
cpuOutputPtr = cpuOutputReferenceArray;
for(int i = 0; i < 5; i++)
{
cpuInputPtr[i].x = i;
cpuInputPtr[i].y = i * 2;
cpuInputPtr[i].z = i * 3;
cpuInputPtr[i].w = i * 4;
}
checkCudaErrors(CopyToFunction(cpuInputPtr));
getLastCudaError("Test Copy To Failed");
for(int i = 0; i < 5; i++)
{
cpuInputPtr[i].x = 0;
cpuInputPtr[i].y = 0;
cpuInputPtr[i].z = 0;
cpuInputPtr[i].w = 0;
}
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(CopyFromFunction(cpuOutputPtr));
getLastCudaError("Test Copy From Failed");
for(int i = 0; i < 5; i++)
{
printf("Ptr at %d is %f\n", i, cpuOutputPtr[i].w);
}
Any help would be greatly appreciated.