Two models in the scene, one is moving and another is static, I’m trying to get all hits along the ray in the scene every frame. two problems are encountered.
- I tested to get all hits along with the ray when one model with matrix motion transforms. build a GAS->MT. The hits counts returned more than the model layers on the ray direction. For example, for a model with a front face and back face, the count should be 2, but the count I got large than it.
Method to get all hit
- setup the primary ray origin and direction,
- trace it to get the closest_hit result,
- set the next ray origin to that hit point coordinate,
- keep the ray direction to gather all intersections along a straight line,
- offset ray.t_min by a small epsilon to prevent self intersections.
- Repeat until you don’t get a closest hit anymore.
- When I build a traversable as shown below, illegal memory access was encountered.
GAS–>MT–>IAS—╮
GAS------->IAS—> IAS
5th line from the bottom CUDA_SYNC_CHECK() reports illegal memory access in buildInstanceAccel().
build traversable code:
void buildAccel() {
vertexBuffer.resize(model->meshes.size());
indexBuffer.resize(model->meshes.size());
asHandle.resize(model->meshes.size());
// ==================================================================
// triangle inputs
// ==================================================================
std::vector<OptixBuildInput> triangleInput(model->meshes.size());
std::vector<CUdeviceptr> d_vertices(model->meshes.size());
std::vector<CUdeviceptr> d_indices(model->meshes.size());
std::vector<uint32_t> triangleInputFlags(model->meshes.size());
for (int meshID = 0; meshID < model->meshes.size(); meshID++) {
OptixTraversableHandle tempGSHandle;
// upload the model to the device: the builder
TriangleMesh &mesh = *model->meshes[meshID];
vertexBuffer[meshID].alloc_and_upload(mesh.vertex);
indexBuffer[meshID].alloc_and_upload(mesh.index);
triangleInput[meshID] = {};
triangleInput[meshID].type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
// create local variables, because we need a *pointer* to the
// device pointers
d_vertices[meshID] = vertexBuffer[meshID].d_pointer();
d_indices[meshID] = indexBuffer[meshID].d_pointer();
triangleInput[meshID].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
triangleInput[meshID].triangleArray.vertexStrideInBytes = sizeof(vec3f);
triangleInput[meshID].triangleArray.numVertices = (int)mesh.vertex.size();
triangleInput[meshID].triangleArray.vertexBuffers = &d_vertices[meshID];
triangleInput[meshID].triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
triangleInput[meshID].triangleArray.indexStrideInBytes = sizeof(vec3i);
triangleInput[meshID].triangleArray.numIndexTriplets = (int)mesh.index.size();
triangleInput[meshID].triangleArray.indexBuffer = d_indices[meshID];
triangleInputFlags[meshID] = 0;
// in this example we have one SBT entry, and no per-primitive
// materials:
triangleInput[meshID].triangleArray.flags = &triangleInputFlags[meshID];
triangleInput[meshID].triangleArray.numSbtRecords = 1;
triangleInput[meshID].triangleArray.sbtIndexOffsetBuffer = 0;
triangleInput[meshID].triangleArray.sbtIndexOffsetSizeInBytes = 0;
triangleInput[meshID].triangleArray.sbtIndexOffsetStrideInBytes = 0;
// ==================================================================
// BLAS setup
// ==================================================================
OptixAccelBuildOptions accelOptions = {};
accelOptions.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS;
accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD;
OptixAccelBufferSizes blasBufferSizes;
OPTIX_CHECK(optixAccelComputeMemoryUsage(optixContext,
&accelOptions,
&triangleInput[meshID],
1, // num_build_inputs
&blasBufferSizes));
// ==================================================================
// prepare compaction
// ==================================================================
CUDABuffer compactedSizeBuffer;
compactedSizeBuffer.alloc(sizeof(uint64_t));
OptixAccelEmitDesc emitDesc;
emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
emitDesc.result = compactedSizeBuffer.d_pointer();
// ==================================================================
// execute build (main stage)
// ==================================================================
CUDABuffer tempBuffer;
tempBuffer.alloc(blasBufferSizes.tempSizeInBytes);
CUDABuffer outputBuffer;
outputBuffer.alloc(blasBufferSizes.outputSizeInBytes);
OPTIX_CHECK(optixAccelBuild(optixContext,
/* stream */ 0,
&accelOptions,
&triangleInput[meshID],
1,
tempBuffer.d_pointer(),
tempBuffer.sizeInBytes,
outputBuffer.d_pointer(),
outputBuffer.sizeInBytes,
&tempGSHandle,
&emitDesc, 1));
CUDA_SYNC_CHECK();
// ==================================================================
// perform compaction
// ==================================================================
uint64_t compactedSize;
compactedSizeBuffer.download(&compactedSize, 1);
asBuffer.alloc(compactedSize);
OPTIX_CHECK(optixAccelCompact(optixContext,
/*stream:*/ 0,
tempGSHandle,
asBuffer.d_pointer(),
asBuffer.sizeInBytes,
&tempGSHandle));
CUDA_SYNC_CHECK();
if (meshID == 0) {
const float motion_matrix_keys[2][12] =
{
{1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 0.0f},
{1.0f, 0.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f, 0.0f,
0.0f, 0.0f, 1.0f, 2.0f}};
OptixMatrixMotionTransform motion_transform = {};
motion_transform.child = tempGSHandle;
motion_transform.motionOptions.numKeys = 2;
motion_transform.motionOptions.timeBegin = 0.0f;
motion_transform.motionOptions.timeEnd = 1.0f;
motion_transform.motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
memcpy(motion_transform.transform, motion_matrix_keys, 2 * 12 * sizeof(float));
std::vector<OptixMatrixMotionTransform> temp_mh;
temp_mh.push_back(motion_transform);
CUDABuffer d_motion_transform;
d_motion_transform.alloc_and_upload(temp_mh);
OPTIX_CHECK(optixConvertPointerToTraversableHandle(
optixContext,
d_motion_transform.d_pointer(),
OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM,
&tempGSHandle));
CUDA_SYNC_CHECK();
d_motion_transform.free();
}
asHandle[meshID] = tempGSHandle;
// ==================================================================
// aaaaaand .... clean up
// ==================================================================
outputBuffer.free(); // << the UNcompacted, temporary output buffer
tempBuffer.free();
compactedSizeBuffer.free();
}
}
void buildInstanceAccel() {
// Use the identity matrix for the instance transform
Instance instance = {{1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}};
optixInstances.resize(asHandle.size());
size_t instanceSizeInBytes = sizeof(OptixInstance) * asHandle.size();
for (int instanceId = 0; instanceId < asHandle.size(); instanceId++) {
optixInstances[instanceId].traversableHandle = asHandle[instanceId];
optixInstances[instanceId].flags = OPTIX_INSTANCE_FLAG_NONE;
optixInstances[instanceId].instanceId = instanceId;
optixInstances[instanceId].sbtOffset = 0;
optixInstances[instanceId].visibilityMask = 1;
memcpy(optixInstances[instanceId].transform, instance.transform, sizeof(float) * 12);
}
instancesBuffer.alloc_and_upload(optixInstances);
OptixBuildInput instanceInput = {};
instanceInput.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
instanceInput.instanceArray.instances = instancesBuffer.d_pointer();
instanceInput.instanceArray.numInstances = optixInstances.size();
OptixAccelBuildOptions accelOptions = {};
accelOptions.buildFlags = OPTIX_BUILD_FLAG_NONE;
accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD;
OptixAccelBufferSizes instancesBufferSize;
OPTIX_CHECK(optixAccelComputeMemoryUsage(optixContext, &accelOptions, &instanceInput,
1, // num build inputs
&instancesBufferSize));
CUDABuffer tempBuffer;
tempBuffer.alloc(instancesBufferSize.tempSizeInBytes);
CUDABuffer instancesOutputBuffer;
instancesOutputBuffer.alloc(instancesBufferSize.outputSizeInBytes);
//OptixTraversableHandle instancesHandle = 0;
OPTIX_CHECK(optixAccelBuild(optixContext,
0, // CUDA stream
&accelOptions,
&instanceInput,
1, // num build inputs
tempBuffer.d_pointer(),
tempBuffer.sizeInBytes,
instancesOutputBuffer.d_pointer(),
instancesOutputBuffer.sizeInBytes,
&instancesHandle,
nullptr, // emitted property list
0 // num emitted properties
));
//cudaDeviceSynchronize();
CUDA_SYNC_CHECK();
instancesBuffer.free();
tempBuffer.free();
instancesOutputBuffer.free();
}
Any reply will be helpful. Thanks!