It seems to me that the following code demonstrates that there is a bug in OptiX:
#include <optix_world.h>
#include "device_functions.h"
using namespace optix;
struct PerRayData_calculate {
bool squeegee;
};
rtDeclareVariable(rtObject, specimen_object, , );
rtDeclareVariable(uint, oneDimensionalIndex, rtLaunchIndex, );
rtDeclareVariable(PerRayData_calculate, currentPrd, rtPayload, );
rtBuffer<uint, 1> myBuffer;
RT_PROGRAM void causeTrouble() {
const optix::Ray ray = optix::make_Ray(make_float3(0.0f,0.0f,0.0f), make_float3(0.0f,0.0f,-1.0f), 0u, 0.01f, RT_DEFAULT_MAX);
PerRayData_calculate prd;
prd.squeegee = false;
rtTrace(specimen_object, ray, prd);
if (prd.squeegee) {
atomicAdd(&(myBuffer[1u]), 1u);
atomicAdd(&(myBuffer[0u]), 1u);
}
else {
atomicAdd(&(myBuffer[2u]), 1u);
atomicAdd(&(myBuffer[0u]), 1u);
}
}
RT_PROGRAM void miss() {
currentPrd.squeegee = true;
}
RT_PROGRAM void exception() {
rtPrintExceptionDetails();
}
Here is my code for the host side:
#define NOMINMAX
#define _USE_MATH_DEFINES
#include <optixu/optixpp_namespace.h>
#include <optixu/optixu_math_namespace.h>
using namespace optix;
int main(int argc, char *argv[]) {
try {
Context context = Context::create();
{
std::vector<int> devs;
devs.push_back(0);
context->setDevices(devs.begin(), devs.end());
}
context->setRayTypeCount(1);
context->setEntryPointCount(1);
context->setStackSize(16384);
context->setPrintEnabled(true);
context->setExceptionEnabled(RT_EXCEPTION_ALL, true);
context->setRayGenerationProgram(0, context->createProgramFromPTXFile("causeTrouble.cu.ptx", "causeTrouble"));
context->setExceptionProgram(0, context->createProgramFromPTXFile("causeTrouble.cu.ptx", "exception"));
context->setMissProgram(0, context->createProgramFromPTXFile("causeTrouble.cu.ptx", "miss"));
GeometryGroup group = context->createGeometryGroup();
group->setChildCount(0u);
group->setAcceleration(context->createAcceleration("NoAccel", "NoAccel"));
context["specimen_object"]->set(group);
Buffer myBuffer = context->createBuffer(RT_BUFFER_INPUT_OUTPUT, RT_FORMAT_UNSIGNED_INT, 3);
context["myBuffer"]->set(myBuffer);
memset(myBuffer->map(), 0, 3 * sizeof(uint));
myBuffer->unmap();
context->launch(0, 100);
{
const uint *data = (uint*)myBuffer->map();
if (fprintf(stderr, "%u, %u, %u\n", data[0], data[1], data[2]) < 0)
perror("fprintf");
myBuffer->unmap();
}
context->destroy();
}
catch (optix::Exception e) {
if (fprintf(stderr, "%s\n", e.getErrorString().c_str()) < 0)
perror("fprintf");
}
}
I would expect to get 100, 100, 0, but in fact I get 193, 100, 93. According to https://devtalk.nvidia.com/default/topic/522795/optix/atomic-buffer-operations/, because I am using only one GPU (see lines 12-16 on the host side), atomicAdd should work properly on OptiX buffer elements. You can question the relevance of such an old forum post, but OptiX 3.8.0 has the “zoneplate” SDK sample still using atomicAdd as described in that discussion.
I tried modifying my code so that the buffer is declared with RT_BUFFER_GPU_LOCAL. In this case, I use an RT_PROGRAM function to initialize it to zeros, and I also use an RT_PROGRAM function to copy the result to an output buffer. I still get the same answer: 193, 100, 93.
If I make it a buffer of floats instead of uints, and use the corresponding version of the atomicAdd function, then the problem seems to be resolved. Zoneplate uses atomicAdd on elements of buffers of floats, so that explains why Zoneplate works. So I am using floats as a workaround to get my work done for now, but I think that an integral data type such as uint should be acceptable.
Do you get the same program behavior that I am seeing? Am I missing something? Is it a bug in OptiX?