I’m not sure if this is a CUDA problem or an Optix problem. I am just starting out with CUDA and Optix.
I started with the Optix 7.3 optixTriangle sample and have written my code based on that example. At this point I am trying to draw a cube and apply a 2D texture to it. The basics of drawing the cube, as well as rotating it, scaling it or moving it by applying host side transformations seem to be working. Applying the texture to the cube is not working. What I end up with a cube it looks like I’ve applied a uniform color to all the faces. If I print the color value in my closest hit program, the values for the red, green and blue channels very by very small amounts, not enough to really affect the color.
The cube length for each dimension is 1.0, and the cube is centered at 0.0, so coordinates are -0,5:0.5:0 0.5:0.5:0, etc and the cube is drawn centered in the window as I expect with no transformations, so I see just the front face of the cube.
The cube and the texture are exported from Blender as a wavefront obj file. The texture image file is 32 bits per pixel, 8 bits each for R, G, B, and alpha. When building the texture object I convert each byte to float and store that in my texture array.
The code to build the texture object and copy it to the GPU is
bool Texture::load(void) {
QImage image(imagePath);
if (image.isNull()) {
fprintf(stderr, "Unable to load texture %s\n", imagePath.toLatin1().data());
return false;
}
int imageWidth = image.width();
int imageHeight = image.height();
float *hostImageArray;
hostImageArray = new float[imageWidth * imageHeight * 4];
cudaChannelFormatDesc channelDescriptor = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
cudaError_t rc = cudaMallocArray(&gpuImageArray, &channelDescriptor, image.width(), image.height());
if (rc != cudaSuccess) {
fprintf(stderr, "Unable to allocate GPU array for %s: %s\n", imagePath.toLatin1().data(), cudaGetErrorString(rc));
return false;
}
float *imageData = copyImage(image);
if (imageData == nullptr) {
return false;
}
rc = cudaMemcpy2DToArray(gpuImageArray, 0, 0, imageData, imageWidth * sizeof(float) * 4, imageWidth * sizeof(float) * 4, imageHeight, cudaMemcpyHostToDevice);
delete[] imageData;
if (rc != cudaSuccess) {
fprintf(stderr, "Error copying texture to GPU: %s\n", cudaGetErrorString(rc));
cudaFreeArray(gpuImageArray);
gpuImageArray = nullptr;
return false;
}
cudaResourceDesc resourceDescriptor = { };
resourceDescriptor.resType = cudaResourceTypeArray;
resourceDescriptor.res.array.array = gpuImageArray;
cudaTextureDesc textureDescriptor = { };
textureDescriptor.addressMode[0] = cudaAddressModeClamp;
textureDescriptor.addressMode[1] = cudaAddressModeClamp;
textureDescriptor.borderColor[0] = 0.0f;
textureDescriptor.borderColor[1] = 0.0f;
textureDescriptor.disableTrilinearOptimization = 1;
textureDescriptor.filterMode = cudaFilterModeLinear;
textureDescriptor.maxAnisotropy = 1;
textureDescriptor.minMipmapLevelClamp = 1.0f;
textureDescriptor.maxMipmapLevelClamp = 99.0f;
textureDescriptor.mipmapFilterMode = cudaFilterModePoint;
textureDescriptor.mipmapLevelBias = 0.0f;
textureDescriptor.normalizedCoords = false;
textureDescriptor.readMode = cudaReadModeElementType;
textureDescriptor.sRGB = 0;
rc = cudaCreateTextureObject(&texture, &resourceDescriptor, &textureDescriptor, nullptr);
if (rc != cudaSuccess) {
fprintf(stderr, "Unable to create texture object: %s\n", cudaGetErrorString(rc));
cudaFreeArray(gpuImageArray);
gpuImageArray = nullptr;
return false;
}
return true;
}
My closest hit program is
static __forceinline__ __device__ void setPayload(float3 p) {
optixSetPayload_0(float_as_int(p.x));
optixSetPayload_1(float_as_int(p.y));
optixSetPayload_2(float_as_int(p.z));
}
extern "C" __global__ void __closesthit__ch() {
// Adapted from /home/dave/src/NVIDIA-OptiX-SDK-7.3.0-linux64-x86_64/SDK/cuda/LocalGeometry.h and /home/dave/src/NVIDIA-OptiX-SDK-7.3.0-linux64-x86_64/SDK/cuda/whitted.cu
// Get access to hit data structure used by this shader
const HitGroupData *hitData = reinterpret_cast<HitGroupData*>(optixGetSbtDataPointer());
const int *vertexIndices = reinterpret_cast<int*>(hitData->modelData->vertexIndices);
const float3 *vertices = reinterpret_cast<float3*>(hitData->modelData->vertices);
// const float3 *normals = reinterpret_cast<float3*>(hitData->modelData->normals);
const float2 *texture = reinterpret_cast<float2*>(hitData->modelData->texture);
// Get the barycentric coordinates for this ray/triangle intersection
const float2 barycentrics = optixGetTriangleBarycentrics();
// Get the index into the vertex index, normal, and texture arrays for the triangle that was hit. optixGetPrimitiveIndex returns the index of the triangle as defined when building the
// acceleration data structures. The vertex index, normal and texture arrays contain one entry for each triangle vertex, so multiply the triangle index by 3.
// These arrays must be built in sync with the data in the acceleration structures, such that the same triangle index identifies the same triangle in both.
const unsigned int triangle = optixGetPrimitiveIndex() * 3;
// Get the indices for the triangle that was hit
const int triIndex1 = vertexIndices[triangle];
const int triIndex2 = vertexIndices[triangle + 1];
const int triIndex3 = vertexIndices[triangle + 2];
// Now get the triangle vertices
const float3 triVertex1 = vertices[triIndex1];
const float3 triVertex2 = vertices[triIndex2];
const float3 triVertex3 = vertices[triIndex3];
// Get texture coordinates for the triangle that was hit
const float2 textureV1 = texture[triangle];
const float2 textureV2 = texture[triangle + 1];
const float2 textureV3 = texture[triangle + 2];
const float2 geometryUV = (1.0f - barycentrics.x - barycentrics.y) * textureV1 + barycentrics.x * textureV2 + barycentrics.y * textureV3;
const float3 color = make_float3(tex2D<float4>(hitData->textureObject, geometryUV.x, geometryUV.y));
setPayload(color); //make_float3(barycentrics, hitData->blueValue));
I use an array of vertices and a vertex index array when building my acceleration structures.
I’m not sure I need these or not, but for the moment I am copying the vertex array, the vertex index array, and the texture mapping array into a block of GPU memory and setting a pointer to those arrays in the parameter block for my closest hit program. There is a 1:1 correspondence between the vertex array and the texture array, as in the original .obj file.
One thing I’m not sure of is whether for it to work at all, or for performance reasons, I need to convert my texture image data from an array of unsigned char to an array of float. It does increase memory requirements by 4 times.
I think I’m doing something wrong with my tex2D call, but I have no idea what. Explanation of what I’m doing wrong are appreciated.
I’m running Fedora 33 Linux, driver 470.74, CUDA 11.4.100 and Optix 7.3