Strange behavior: printf in miss-program executed, but payload not set

Hi,

I have encountered quite confusing behavior when trying to setup a shadow ray type.
My setup here consists of: Win10, 2080ti, Cuda Toolkit 10.1, optix 6.0.0 and msvc 15.9.13 as host compiler.

The minimal complete example reads as:

Main application:

#include <iostream>
#include <vector>
#include <stdexcept>
#include <optix_world.h>
#include <optixu/optixpp_namespace.h>

int main()
{
	const int RTX = true;
	if (rtGlobalSetAttribute(RT_GLOBAL_ATTRIBUTE_ENABLE_RTX, sizeof(RTX), &RTX) != RT_SUCCESS)
		throw std::runtime_error("RTX mode not available");

	optix::Context ctx;
	ctx = optix::Context::create();
	ctx->setPrintEnabled(true);
	ctx->setPrintBufferSize(4096);
	ctx->setExceptionEnabled(RT_EXCEPTION_ALL, true);

	ctx->setRayTypeCount(1);
	ctx->setEntryPointCount(1);
	ctx->setMaxTraceDepth(1);
	ctx->setMaxCallableProgramDepth(1);

	// scene epsilon
	ctx["scene_epsilon"]->setFloat(1e-3f);

	// create the material and program objects
	// these programs take the entry point index as first argument
	ctx->setExceptionProgram(0, ctx->createProgramFromPTXFile(PTX_FILE, "exception"));
	ctx->setRayGenerationProgram(0, ctx->createProgramFromPTXFile(PTX_FILE, "create_shadow_rays"));

	// these programs take the ray type as first index
	ctx->setMissProgram(0, ctx->createProgramFromPTXFile(PTX_FILE, "miss_shadow"));

	optix::Material scene_mat = ctx->createMaterial();

	// create the graph hierarchy
	optix::Group top_group = ctx->createGroup();
	top_group->setAcceleration(ctx->createAcceleration("NoAccel"));

	optix::GeometryGroup gg = ctx->createGeometryGroup();
	gg->setAcceleration(ctx->createAcceleration("Trbvh"));

	top_group->addChild(gg);
	ctx["top_object"]->set(top_group);

	// create scene buffers
	std::vector<optix::float3> vertices     = {{0.0f, -1.0f, 0.0f}, {1.0f, -1.0f, 0.0f}, {1.0f, 1.0f, 0.0f}};
	optix::Buffer              vertexBuffer = ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT3, vertices.size());
	optix::float3*             buf          = (optix::float3*)vertexBuffer->map();
	for (int i = 0; i < vertices.size(); i++) { buf[i] = vertices[i]; }
	vertexBuffer->unmap();

	// create the geometry instance
	auto mesh_geometry = ctx->createGeometryTriangles();
	mesh_geometry->setFlagsPerMaterial(0, RT_GEOMETRY_FLAG_DISABLE_ANYHIT);
	mesh_geometry->setBuildFlags(RT_GEOMETRY_BUILD_FLAG_NONE);
	mesh_geometry->setVertices(vertices.size(), vertexBuffer, RT_FORMAT_FLOAT3);
	mesh_geometry->setPrimitiveCount(vertices.size() / 3);

	auto geom_inst = ctx->createGeometryInstance();
	geom_inst->addMaterial(scene_mat);
	geom_inst->setGeometryTriangles(mesh_geometry);
	geom_inst["object_index"]->setInt(gg->getChildCount());
	gg->addChild(geom_inst);

	// create ray buffers
	std::vector<optix::float3> origins          = {{-1.0f, 0.0f, -1.0f}, {0.5f, -0.75f, -1.0f}};
	std::vector<optix::float3> directions       = {{0.0f, 0.0f, 10.0f}, {0.0f, 0.0f, 10.0f}};
	optix::Buffer              originsBuffer    = ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT3, origins.size());
	optix::Buffer              directionsBuffer = ctx->createBuffer(RT_BUFFER_INPUT, RT_FORMAT_FLOAT3, origins.size());
	buf                                         = (optix::float3*)originsBuffer->map();
	for (int i = 0; i < origins.size(); i++) { buf[i] = origins[i]; }
	originsBuffer->unmap();
	buf = (optix::float3*)directionsBuffer->map();
	for (int i = 0; i < directions.size(); i++) { buf[i] = directions[i]; }
	directionsBuffer->unmap();

	// create output buffer
	optix::Buffer shadowBuffer = ctx->createBuffer(RT_BUFFER_OUTPUT, RT_FORMAT_BYTE, origins.size());

	// set everything
	ctx["ray_origins"]->set(originsBuffer);
	ctx["ray_directions"]->set(directionsBuffer);
	ctx["shadow_buffer"]->set(shadowBuffer);

	// launch the entry point
	ctx->validate();
	ctx->launch(0, origins.size());

	// retrieve results
	std::vector<int> outShadows;

	signed char* sBuf = (signed char*)shadowBuffer->map();
	for (int i = 0; i < origins.size(); i++) { outShadows.push_back(sBuf[i]); }
	shadowBuffer->unmap();

	std::cout << outShadows[0] << ", " << outShadows[1] << std::endl;

	ctx->destroy();

	return 0;
}

Optix Kernels:

#include <optix_device.h>
#include <optixu/optixu_math_namespace.h>

using namespace optix;

struct ShadowRayData
{
	optix::char1 inShadow;
};

// globals per context
rtDeclareVariable(float, scene_epsilon, , "Scene epsilon for tracing");
rtDeclareVariable(unsigned int, launch_index, rtLaunchIndex, );
rtDeclareVariable(rtObject, top_object, , );

// per ray variables
rtDeclareVariable(ShadowRayData, prd_shadow, rtPayload, );

// input buffer from ray struct
rtBuffer<float3, 1> ray_origins;
rtBuffer<float3, 1> ray_directions;

// ouptut buffer for shadow rays
rtBuffer<optix::char1, 1> shadow_buffer;

// ---------------------------------------------------------------------------------
// Creates rays from given buffer objects
RT_PROGRAM void create_shadow_rays(void)
{
	// UNCOMMENTING THIS PRINTF CHANGES THE OUTPUT
	// rtPrintf("Hello from %u, creating shadow ray at: (%f, %f, %f)\n",
	//          launch_index,
	//          ray_origins[launch_index].x,
	//          ray_origins[launch_index].y,
	//          ray_origins[launch_index].z);

	Ray r(ray_origins[launch_index],
	      normalize(ray_directions[launch_index]),
	      0,
	      0.0f,
	      length(ray_directions[launch_index]) - scene_epsilon);

	ShadowRayData shd;
	shd.inShadow = optix::make_char1(1);
	rtTrace(top_object,
	        r,
	        shd,
	        RT_VISIBILITY_ALL,
	        (RTrayflags)(RT_RAY_FLAG_DISABLE_CLOSESTHIT | RT_RAY_FLAG_TERMINATE_ON_FIRST_HIT));

	// copy values back to output buffers
	shadow_buffer[launch_index] = shd.inShadow;
}
// ---------------------------------------------------------------------------------

// ---------------------------------------------------------------------------------
RT_PROGRAM void miss_shadow(void)
{
	rtPrintf("Hello from %u, in miss shadow\n", launch_index);
	prd_shadow.inShadow = optix::make_char1(2);
}
// ---------------------------------------------------------------------------------

// ---------------------------------------------------------------------------------
// Exception program
RT_PROGRAM void exception(void)
{
	rtPrintExceptionDetails();
	shadow_buffer[launch_index] = optix::make_char1(-1);
}
// ---------------------------------------------------------------------------------

CMakeLists.txt to tie everything together:

cmake_minimum_required(VERSION 3.9)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
project(cuda_raytracer LANGUAGES CXX CUDA)

# For find OptiX.cmake
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/")

# FindOptiX.cmake sets imported targets
find_package(OptiX REQUIRED)

add_library(optixIntersect OBJECT ray_programs.cu) 
set_property(TARGET optixIntersect PROPERTY CUDA_PTX_COMPILATION ON)
target_link_libraries(optixIntersect optix optixu)

# install the binaries
install(FILES $<TARGET_OBJECTS:optixIntersect> DESTINATION "${CMAKE_SOURCE_DIR}")

# tests
add_executable(intersect_test intersect_test.cu)
target_link_libraries(intersect_test optix optixu)
target_compile_definitions(intersect_test PRIVATE PTX_FILE=\"${CMAKE_SOURCE_DIR}/ray_programs.ptx\")

My confusion is mainly due to the fact, that uncommenting the first

rtPrintf

in

create_shadow_rays

changes the result to the expected output, i.e. from

1 1

to

2 1

Any ideas?

The first ray should miss, the second should hit. Your saying that without the rtPrintf the result is incorrect and hits both.
That would indicate a potential compilation bug in OptiX 6. Which display driver are you using?

There is no need to enable the RTX execution strategy explicitly. It’s the default in all shipping drivers supporting OptiX 6.

I would not use different builders in one scene. Try “Trbvh” for both acceleration structures.

Does disabling all exceptions affect the result? ctx->setExceptionEnabled(RT_EXCEPTION_ALL, false);

You could try using CUDA’s printf() directly.
No need for ctx->setPrintEnabled(true) and ctx->setPrintBufferSize(4096); then, but you would need to limit it to launch indices inside the device code yourself when using ctx->setPrintLaunchIndex(x, y);

I’m assuming the std::vector<int> is just for the decimal console output. For real workloads there should be a memcpy() into a vector of matching type.

Yes, that is almost the behavior. What actually happens is that I get the “Hello from 0 in miss shadow” printf in line 59 (so the miss program is probably correctly called), but the change in the payload variable is not reflected in shd.inShadow in line 52. This also happens, when I define an analogous any-hit program which simply prints something sets the payload to e.g. 3 and terminates the ray. I get the print, but not the payload value; if I uncomment lines 30-35 I get the correct result.

I used 419.67, but also tried 430.86 both with the same result.

I tried both, but it didn’t change the result.

Yes this is just for the purpose of demonstration.

I filed a bug report against OptiX 6.0.0 for analysis.