I am currently working on a wrapper around Cuda Texture Object and running into several problems I do not understand.
In the example below if you compile the code with if constexpr (true) set you get the error with some random number
cudaMemcpy2DToArray failed with unrecognized error code 2113869480
When set to false, everything runs fine.
Changing all variables in the function to static or global does not solve the problem.
If I change execute_lerp(table.size());
to execute_lerp(22);
a hard-coded number, everything works again in booth options.
What’s going on?
I compile with “CMAKE_POSITION_INDEPENDENT_CODE” enabled.
#include <vector>
#include <iostream>
#include <stdexcept>
#define USE_FUNCTION
__global__ void lerp_test(cudaTextureObject_t tex, const unsigned int N)
{
int idx = threadIdx.x + blockDim.x * blockIdx.x;
if (idx >= N)
return;
const float tex_coords = (float)idx + 0.5;
printf("tex_coords: %f\n", tex_coords);
printf("Tex: %f\n", tex1D<float>(tex, tex_coords));
}
__host__ cudaArray *createArray(const size_t size)
{
cudaError_t last_error_;
cudaArray *dArray_;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
last_error_ = cudaMallocArray(&dArray_, &channelDesc, size, 1, cudaArrayDefault);
if (last_error_ != cudaSuccess)
throw std::runtime_error("cudaMallocArray failed with " + std::string(cudaGetErrorString(last_error_)));
return dArray_;
}
__host__ std::vector<double> execute_lerp(size_t N)
{
float src[N];
for (int i = 0; i < N; i++)
{
src[i] = i;
}
cudaError_t last_error_;
cudaArray *dArray_; // = createArray(N);
#ifdef USE_FUNCTION
std::cout << "Call Function" << std::endl;
dArray_ = createArray(sizeof(float) * N);
#else
std::cout << "Build inplace" << std::endl;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
last_error_ = cudaMallocArray(&dArray_, &channelDesc, sizeof(float) * N, 1, cudaArrayDefault);
if (last_error_ != cudaSuccess)
throw std::runtime_error("cudaMallocArray failed with " + std::string(cudaGetErrorString(last_error_)));
#endif
cudaMemcpy2DToArray(dArray_, 0, 0, src, N * sizeof(float), N * sizeof(float), 1, cudaMemcpyHostToDevice);
if (last_error_ != cudaSuccess)
throw std::runtime_error("cudaMemcpy2DToArray failed with " + std::string(cudaGetErrorString(last_error_)) + " " + std::to_string(last_error_));
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = dArray_;
cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType; // Read data as provided type, no casting
texDesc.filterMode = cudaFilterModeLinear;
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.normalizedCoords = 0;
cudaTextureObject_t textureObject_;
// create texture object: we only have to do this once!
last_error_ = cudaCreateTextureObject(&textureObject_, &resDesc, &texDesc, NULL);
if (last_error_ != cudaSuccess)
throw std::runtime_error("cudaCreateTextureObject failed with " + std::string(cudaGetErrorString(last_error_)));
lerp_test<<<(N + 255) / 256, 256>>>(textureObject_, 22);
auto error = cudaDeviceSynchronize();
if (error != cudaSuccess)
{
std::cout << "Kernel failed: " << cudaGetErrorString(error) << std::endl;
throw std::runtime_error("Kernel failed");
}
return;
}
int main()
{
std::cout << "Running main" << std::endl;
std::vector<float> table(22);
for (int i = 0; i < 22; i++)
{
table[i] = i;
}
execute_lerp(table.size());
return 0;
}