OptixTrace to 2 different GAS consecutively

I am trying to optixTrace 2 different geometries from a single 3d point but I have some troubles insetting the 2 GAS in the pipeline correctly.

First, I built one GAS and ray traced it from a single 3d point. It worked and I had both hits and misses. Then I tried to add a second GAS and raytrace it after having a hit on the first geometry in a nested if statement but for some reason now I only get misses from the first GAS.

I suspect that there’s something wrong when creating the two GASs.
Here is my buildMeshAccel function:

void buildMeshAccel(
	MyOptixState& state,
	const float3* windowsVertices,	int windowsVerticesSize,
	const float3* roomVertices,		int roomVerticesSize,
)
{
	//
	// Copy mesh data to device
	// Copy float3 triangles geometry to device
	//

	//windows
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&state.d_windows_vertices), sizeof(float3) * windowsVerticesSize));
	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(state.d_windows_vertices),
		windowsVertices,
		sizeof(float3) * windowsVerticesSize,
		cudaMemcpyHostToDevice
	));

	//room
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&state.d_room_vertices), sizeof(float3) * roomVerticesSize));
	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(state.d_room_vertices),
		roomVertices,
		sizeof(float3) * roomVerticesSize,
		cudaMemcpyHostToDevice
	));

	//
	// Build triangle GAS
	//

	// the build input is a simple list of non-indexed triangle vertices
	OptixBuildInput triangle_inputs[2] = {};//2 gas: windows, room
	const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE };

	// Windows input
	triangle_inputs[0].type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
	triangle_inputs[0].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
	triangle_inputs[0].triangleArray.numVertices = static_cast<uint32_t>(windowsVerticesSize);
	triangle_inputs[0].triangleArray.vertexBuffers = &state.d_windows_vertices;
	triangle_inputs[0].triangleArray.flags = triangle_input_flags;
	triangle_inputs[0].triangleArray.numSbtRecords = 1;

	// Room input
	triangle_inputs[1].type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
	triangle_inputs[1].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
	triangle_inputs[1].triangleArray.numVertices = static_cast<uint32_t>(roomVerticesSize);
	triangle_inputs[1].triangleArray.vertexBuffers = &state.d_room_vertices;
	triangle_inputs[1].triangleArray.flags = triangle_input_flags;
	triangle_inputs[1].triangleArray.numSbtRecords = 1;

	// Use default options for simplicity.
	// In a real use case we would want to enable compaction, etc
	OptixAccelBuildOptions accel_options = {};
	accel_options.buildFlags = OPTIX_BUILD_FLAG_NONE;
	accel_options.operation = OPTIX_BUILD_OPERATION_BUILD;

	{
	OptixAccelBufferSizes gas_buffer_sizes;
	OPTIX_CHECK(optixAccelComputeMemoryUsage(
		state.context,
		&accel_options,
		&triangle_inputs[0],
		1, // Number of build inputs
		&gas_buffer_sizes
	));

	CUdeviceptr d_temp_buffer_gas;
	CUDA_CHECK(cudaMalloc(
		reinterpret_cast<void**>(&d_temp_buffer_gas),
		gas_buffer_sizes.tempSizeInBytes
	));

	CUdeviceptr d_gas_output_buffer;
	CUDA_CHECK(cudaMalloc(
		//reinterpret_cast<void**>(&d_gas_output_buffer),
		reinterpret_cast<void**>(&state.d_windows_gas_output_buffer),
		gas_buffer_sizes.outputSizeInBytes
	));

	OPTIX_CHECK(optixAccelBuild(
		state.context,
		0,                  // CUDA stream
		&accel_options,
		&triangle_inputs[0],
		1,                  // num build inputs
		d_temp_buffer_gas,
		gas_buffer_sizes.tempSizeInBytes,
		state.d_windows_gas_output_buffer,
		gas_buffer_sizes.outputSizeInBytes,
		&state.windows_gas_handle,
		nullptr,            // emitted property list
		0                   // num emitted properties
	));
	CUDA_CHECK(cudaFree(reinterpret_cast<void*>(d_temp_buffer_gas)));
	//CUDA_CHECK(cudaFree(reinterpret_cast<void*>(state.d_windows_gas_output_buffer)));

	}


	//ROOM
	{
	OptixAccelBufferSizes gas_buffer_sizes1;
	OPTIX_CHECK(optixAccelComputeMemoryUsage(
		state.context,
		&accel_options,
		&triangle_inputs[1],
		1, // Number of build inputs
		&gas_buffer_sizes1
	));

	CUdeviceptr d_temp_buffer_gas;
	CUDA_CHECK(cudaMalloc(
		reinterpret_cast<void**>(&d_temp_buffer_gas),
		gas_buffer_sizes1.tempSizeInBytes
	));

	//CUdeviceptr d_gas_output_buffer;
	CUDA_CHECK(cudaMalloc(
		//reinterpret_cast<void**>(&d_gas_output_buffer),
		reinterpret_cast<void**>(&state.d_room_output_buffer),
		gas_buffer_sizes1.outputSizeInBytes
	));



	OPTIX_CHECK(optixAccelBuild(
		state.context,
		0,                  // CUDA stream
		&accel_options,
		&triangle_inputs[1],
		1,                  // num build inputs
		d_temp_buffer_gas,
		gas_buffer_sizes1.tempSizeInBytes,
		state.d_room_output_buffer,
		gas_buffer_sizes1.outputSizeInBytes,
		&state.room_gas_handle,
		nullptr,            // emitted property list
		0                   // num emitted properties
	));
	CUDA_CHECK(cudaFree(reinterpret_cast<void*>(d_temp_buffer_gas)));
	//CUDA_CHECK(cudaFree(reinterpret_cast<void*>(state.d_room_output_buffer)));
	}
	//CUDA_CHECK(cudaFree(reinterpret_cast<void*>(d_temp_buffer_gas)));
}

I have modified the sbt function, but I am not sure if this is required because I only need to check if a ray hits a geometry.


struct RayGenData
{
	// No data needed
};


struct MissData
{
	//float3 bg_color;
};


struct HitGroupData
{
	// No data needed
};

void createSBT(MyOptixState& state) {
	//
	// Set up shader binding table
	//

	// Ray generation program
	CUdeviceptr d_raygen_record;
	const size_t raygen_record_size = sizeof(RayGenSbtRecord);
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_raygen_record), raygen_record_size));

	RayGenSbtRecord rg_sbt;
	OPTIX_CHECK(optixSbtRecordPackHeader(state.raygen_prog_group, &rg_sbt));

	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(d_raygen_record),
		&rg_sbt,
		raygen_record_size,
		cudaMemcpyHostToDevice
	));

	// Miss program
	CUdeviceptr d_miss_record;
	size_t miss_record_size = sizeof(MissSbtRecord);
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_miss_record), miss_record_size));

	MissSbtRecord ms_sbt;
	OPTIX_CHECK(optixSbtRecordPackHeader(state.miss_prog_group, &ms_sbt));
	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(d_miss_record),
		&ms_sbt,
		miss_record_size,
		cudaMemcpyHostToDevice
	));

	// Hit group program for windows
	CUdeviceptr d_hitgroup_windows_record;
	size_t hitgroup_record_size_windows = sizeof(HitGroupSbtRecord);
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_hitgroup_windows_record), hitgroup_record_size_windows));
	HitGroupSbtRecord hg_sbt_windows;
	OPTIX_CHECK(optixSbtRecordPackHeader(state.hitgroup_prog_group, &hg_sbt_windows));
	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(d_hitgroup_windows_record),
		&hg_sbt_windows,
		hitgroup_record_size_windows,
		cudaMemcpyHostToDevice
	));

	// Hit group program for room
	CUdeviceptr d_hitgroup_record_room;
	size_t hitgroup_record_size_room = sizeof(HitGroupSbtRecord);
	CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_hitgroup_record_room), hitgroup_record_size_room));
	HitGroupSbtRecord hg_sbt_room;
	OPTIX_CHECK(optixSbtRecordPackHeader(state.hitgroup_prog_group, &hg_sbt_room));
	CUDA_CHECK(cudaMemcpy(
		reinterpret_cast<void*>(d_hitgroup_record_room),
		&hg_sbt_room,
		hitgroup_record_size_room,
		cudaMemcpyHostToDevice
	));

	// Set up SBT
	state.sbt.raygenRecord						= d_raygen_record;
	state.sbt.missRecordBase					= d_miss_record;
	state.sbt.missRecordStrideInBytes			= sizeof(MissSbtRecord);
	state.sbt.missRecordCount					= 1;
	state.sbt.hitgroupRecordBase				= d_hitgroup_windows_record; // Use the windows hit group record base
	state.sbt.hitgroupRecordStrideInBytes		= sizeof(HitGroupSbtRecord);
	state.sbt.hitgroupRecordCount				= 2; // Two hit group records for windows, room
}



void createProgramGroups(MyOptixState& state)
{

	//
	// Create program groups
	//

	OptixProgramGroupOptions program_group_options = {}; // Initialize to zeros

	OptixProgramGroupDesc raygen_prog_group_desc = {}; //
	raygen_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
	raygen_prog_group_desc.raygen.module = state.module;
	raygen_prog_group_desc.raygen.entryFunctionName = "__raygen__rg";
	OPTIX_CHECK_LOG(optixProgramGroupCreate(
		state.context,
		&raygen_prog_group_desc,
		1,   // num program groups
		&program_group_options,
		LOG, &LOG_SIZE,
		&state.raygen_prog_group
	));

	OptixProgramGroupDesc miss_prog_group_desc = {};
	miss_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
	miss_prog_group_desc.miss.module = state.module;
	miss_prog_group_desc.miss.entryFunctionName = "__miss__ms";
	OPTIX_CHECK_LOG(optixProgramGroupCreate(
		state.context,
		&miss_prog_group_desc,
		1,   // num program groups
		&program_group_options,
		LOG, &LOG_SIZE,
		&state.miss_prog_group
	));

	OptixProgramGroupDesc hitgroup_prog_group_desc = {};
	hitgroup_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
	hitgroup_prog_group_desc.hitgroup.moduleCH = state.module;
	hitgroup_prog_group_desc.hitgroup.entryFunctionNameCH = "__closesthit__ch";
	OPTIX_CHECK_LOG(optixProgramGroupCreate(
		state.context,
		&hitgroup_prog_group_desc,
		1,   // num program groups
		&program_group_options,
		LOG, &LOG_SIZE,
		&state.hitgroup_prog_group
	));
}

In my cuda code I run:


extern "C" __global__ void __raygen__rg() {

// Trace the ray against the windows scene
optixTrace(
    params.windows_gas_handle,
    ray_origin,
    ray_direction,
    0.0f,                // Min intersection distance
    1e16f,               // Max intersection distance
    0.0f,                // rayTime -- used for motion blur
    OptixVisibilityMask(255), // Specify always visible
    OPTIX_RAY_FLAG_DISABLE_ANYHIT | OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT | OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT,
    0,                   // SBT offset   -- See SBT discussion
    1,                   // SBT stride   -- See SBT discussion
    0,                   // missSBTIndex -- See SBT discussion
    payloadWindows, payloadRoom
);
    if (payloadWindows > 0) {
 // Trace the ray against the room scene
 optixTrace(
     params.room_gas_handle,
     ray_origin,
     ray_direction,
     0.0f,                // Min intersection distance
     1e16f,               // Max intersection distance
     0.0f,                // rayTime -- used for motion blur
     OptixVisibilityMask(255), // Specify always visible
     OPTIX_RAY_FLAG_DISABLE_ANYHIT | OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT | OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT,
     0,                   // SBT offset   -- See SBT discussion
     1,                   // SBT stride   -- See SBT discussion
     0,                   // missSBTIndex -- See SBT discussion
     payloadWindows, payloadRoom
 );
 }
 
 
 extern "C" __global__ void __miss__ms() {
    unsigned int payloadWindows = 0;
    unsigned int payloadRoom = 0;
    setPayload(payloadWindows, payloadRoom);
}

extern "C" __global__ void __closesthit__ch() {
    unsigned int payloadWindows = 1;
    unsigned int payloadRoom = 1;
    setPayload(payloadWindows, payloadRoom);
}

but I never have a hit on the first GAS. Is there some suggestion? I suppose there must be something totally wrong in GAS or SBT creation.
Thanks!

is there any optix sample that is related to the creation of different GAS?

The optixHair and optixDynamicGeometry samples use instancing. Have you tried using an IAS and putting the 2 GASes as instances? That is generally preferable to tracing GASes separately, unless you have very specific reasons to avoid the instancing mechanism. Using an IAS will be faster too.

I haven’t studied the code yet, but at a glance I see the hit and miss programs both set both of your payload flags. Won’t the room miss clear your window hit flag, for example? You probably want to have separate hit & miss pairs for each GAS, if you want to continue down this road.

Aside from instancing, another alternative option is to use the optixTraverse() call instead of optixTrace(). That will allow you to avoid setting flags in hit and miss, and just test the result of the traversal directly using the hit object. You don’t need to use optixReorder, you can simply replace your trace call with a traverse call and then query the resulting hit object. https://raytracing-docs.nvidia.com/optix8/guide/index.html#shader_execution_reordering#hit-objects


David.

Yes, exactly this is what I am a bit confused. How do you associate different program hit (for example) groups with a specific GAS?

PS: I solved the issue of not having any hit, it was a bad copy-paste ray flag.

PS2: I will look at the optixTraverse and IAS options

How do you associate different program hit (for example) groups with a specific GAS?

Perhaps the easiest way would be to associate each GAS with a ‘ray type’ which is just a concept you define, but you can tailor your SBT offset to your ray type. That will let you address different SBT entries depending on ray type. A common example of this is to use one ray type (and a corresponding hit group) for primary rays, and a different ray type (and separate hit group) for shadow rays. Notice how you can also change the miss program SBT index in your trace calls as well.

We used to demonstrate ray types in the OptiX SDK samples, but it looks like they’ve been simplified so they don’t need ray types. You could download and look at an older version of the optixPathTracer sample, if you want; we were using ray types for optixPathTracer in OptiX 7.5.


David.

Take a look at optixSimpleMotionBlur as a template for your SBT setup. It has two GASes under a single IAS (this is indeed the setup you want). Basically:

  • Create your two GASes, specifying in the build input structs that each will use a single SBT entry
  • Create your IAS with two instance entries (one with SBT offset 0 and one with SBT offset 1)
  • create your SBT with two entries for relevant program types (eg, hit groups).
    Now the first GAS in your IAS will access the programs and sbt data associated with the first SBT entry and similarly for the 2nd.