An problem about update acceleration structure?

When I want to update my acceleration structure per frame like this:

I use buildAccel() in initialization and use updateAccel() in a render loop, but it will error the last sentence of code CUDA_SYNC_CHECK();

0x0000029f62ea68f0 "CUDA error on synchronize with error ‘an illegal memory access was encountered’ "

I checked the values of parameters of two optixAccelBuild() should be the same.

So I’m wondering what went wrong with my understanding that caused this error

Thank you for helping me!!

void USRenderer::buildAccel() {
    PING;
    numMeshes = scene->worldmodel.size();
    PRINT(numMeshes);
    vertexBuffer.resize(numMeshes);
    indexBuffer.resize(numMeshes);
    normalBuffer.resize(numMeshes);
    triangleInput.resize(numMeshes);
    d_vertices.resize(numMeshes);
    d_indices.resize(numMeshes);
    triangleInputFlags.resize(numMeshes);

    for (int meshID = 0; meshID < numMeshes; meshID++) {
        TriangleMesh& mesh = *scene->worldmodel[meshID];
        vertexBuffer[meshID].alloc_and_upload(mesh.vertex);
        indexBuffer[meshID].alloc_and_upload(mesh.index);
        if (!mesh.normal.empty())
            normalBuffer[meshID].alloc_and_upload(mesh.normal);

        triangleInput[meshID] = {};
        triangleInput[meshID].type
            = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;

        d_vertices[meshID] = vertexBuffer[meshID].d_pointer();
        d_indices[meshID] = indexBuffer[meshID].d_pointer();

        triangleInput[meshID].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
        triangleInput[meshID].triangleArray.vertexStrideInBytes = sizeof(vec3f);
        triangleInput[meshID].triangleArray.numVertices = (int)mesh.vertex.size();
        triangleInput[meshID].triangleArray.vertexBuffers = &d_vertices[meshID];

        triangleInput[meshID].triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
        triangleInput[meshID].triangleArray.indexStrideInBytes = sizeof(vec3i);
        triangleInput[meshID].triangleArray.numIndexTriplets = (int)mesh.index.size();
        triangleInput[meshID].triangleArray.indexBuffer = d_indices[meshID];

        triangleInputFlags[meshID] = 0;

        triangleInput[meshID].triangleArray.flags = &triangleInputFlags[meshID];
        triangleInput[meshID].triangleArray.numSbtRecords = 1;
        triangleInput[meshID].triangleArray.sbtIndexOffsetBuffer = 0;
        triangleInput[meshID].triangleArray.sbtIndexOffsetSizeInBytes = 0;
        triangleInput[meshID].triangleArray.sbtIndexOffsetStrideInBytes = 0;
    }

    accelOptions.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accelOptions.motionOptions.numKeys = 1;
    accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD;

    OptixAccelBufferSizes blasBufferSizes;
    OPTIX_CHECK(optixAccelComputeMemoryUsage
    (optixContext,
        &accelOptions,
        triangleInput.data(),
        (int)numMeshes,  // num_build_inputs
        &blasBufferSizes
    ));
    CUDABuffer compactedSizeBuffer;
    compactedSizeBuffer.alloc(sizeof(uint64_t));

    
    emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
    emitDesc.result = compactedSizeBuffer.d_pointer();


    tempBuffer.alloc(blasBufferSizes.tempSizeInBytes);
    outputBuffer.alloc(blasBufferSizes.outputSizeInBytes);
    OPTIX_CHECK(optixAccelBuild(
        optixContext,
        stream,
        &accelOptions,
        triangleInput.data(),
        (int)numMeshes,
        tempBuffer.d_pointer(),
        tempBuffer.sizeInBytes,

        outputBuffer.d_pointer(),
        outputBuffer.sizeInBytes,

        &asHandle,

        &emitDesc, 1
    ));
    CUDA_SYNC_CHECK();
    compactedSizeBuffer.download(&compactedSize, 1);

    asBuffer.alloc(compactedSize);
    OPTIX_CHECK(optixAccelCompact(optixContext,
        stream,
        asHandle,
        asBuffer.d_pointer(),
        asBuffer.sizeInBytes,
        &asHandle));
    CUDA_SYNC_CHECK();
    outputBuffer.free();
    tempBuffer.free();
    compactedSizeBuffer.free();

    tempBuffer.alloc(blasBufferSizes.tempSizeInBytes);
    outputBuffer.alloc(blasBufferSizes.outputSizeInBytes);
    CUDA_SYNC_CHECK();
}

void USRenderer::updateAccel() {
    accelOptionsUpdate.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accelOptionsUpdate.operation = OPTIX_BUILD_OPERATION_UPDATE;
    std::cout << "vertexBuffers4: " << int(triangleInput.data()->triangleArray.vertexBuffers[0]) << std::endl;
    OPTIX_CHECK(optixAccelBuild(
        optixContext,
        stream,
        &accelOptionsUpdate,
        triangleInput.data(),
        (int)numMeshes,
        tempBuffer.d_pointer(),
        tempBuffer.sizeInBytes,

        outputBuffer.d_pointer(),
        outputBuffer.sizeInBytes,

        &asHandle,

        &emitDesc, 1
    ));
    CUDA_SYNC_CHECK();
    std::cout << "update success" << std::endl;
}

First, please always provide the following system configuration information when asking about OptiX issues:
OS version, installed GPU(s), VRAM amount, display driver version, OptiX major.minor.micro version, CUDA toolkit version used to generate the input module code (PTX or OptiX-IR), host compiler version.

In principle this looks like it should be working, but there could be other factors breaking it.
With these incomplete source code excerpts I need to do a lot of assumptions:

  • The scene consists of only a single GAS?
    That is NOT the fastest OptiX render graph structure esp. on RTX boards.
    You should always use an IAS->GAS structure for performance reasons.
    Search for OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING.
    https://forums.developer.nvidia.com/t/beginner-with-simple-geometry-program-problems/284405/4
  • Everything works when not calling updateAccel()?
  • All the variables used there are member variables and are not deleted when leaving the scope of the functions or touched anywhere else?
  • You’re not changing the topology of the meshes between build and update?
  • If I understand this correctly this also happens if you only call buildAccel(); updateAccel(); directly after each other?
  • Or does this only happen when there is an optixLaunch in between?
    Means is the illegal memory access from the optixAccelBuild update or something earlier?

Please enable OptiX validation and provide a log callback function with debugLevel >= 3 (max is 4) to see if OptiX reports any additional error information.

I would clean up the code quite a bit. Please see all code comments marked with “// NV” below.

void USRenderer::buildAccel() {
    PING;
    numMeshes = scene->worldmodel.size();
    PRINT(numMeshes);
    vertexBuffer.resize(numMeshes);
    indexBuffer.resize(numMeshes);
    normalBuffer.resize(numMeshes);
    triangleInput.resize(numMeshes);
    d_vertices.resize(numMeshes);
    d_indices.resize(numMeshes);
    triangleInputFlags.resize(numMeshes);

    for (int meshID = 0; meshID < numMeshes; meshID++) {
        TriangleMesh& mesh = *scene->worldmodel[meshID];
        
        vertexBuffer[meshID].alloc_and_upload(mesh.vertex);
        indexBuffer[meshID].alloc_and_upload(mesh.index);
        if (!mesh.normal.empty())
            normalBuffer[meshID].alloc_and_upload(mesh.normal); // NV I'm assuming the presence of the normal attribute buffer is checked dynamically inside the device code.

        triangleInput[meshID] = {}; // NV Good, alway initialze all OptiX structures to defaults. See below.
        
        triangleInput[meshID].type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;

        d_vertices[meshID] = vertexBuffer[meshID].d_pointer();
        d_indices[meshID] = indexBuffer[meshID].d_pointer();

        triangleInput[meshID].triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
        triangleInput[meshID].triangleArray.vertexStrideInBytes = sizeof(vec3f); // NV vertexStrideInBytes is an unsigned int, not a size_t.
        triangleInput[meshID].triangleArray.numVertices = (int)mesh.vertex.size(); // NV numVertices is an unsigned int, not an int.
        triangleInput[meshID].triangleArray.vertexBuffers = &d_vertices[meshID];

        triangleInput[meshID].triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
        triangleInput[meshID].triangleArray.indexStrideInBytes = sizeof(vec3i); // NV indexStrideInBytes is an unsigned int, not a size_t.
        triangleInput[meshID].triangleArray.numIndexTriplets = (int)mesh.index.size(); // NV numIndexTriplets  is an unsigned int, not an int.
        triangleInput[meshID].triangleArray.indexBuffer = d_indices[meshID];

        triangleInputFlags[meshID] = 0; // NV Use OptixGeometryFlags OPTIX_GEOMETRY_FLAG_NONE (== 0) for clarity instead.

        triangleInput[meshID].triangleArray.flags = &triangleInputFlags[meshID];
        triangleInput[meshID].triangleArray.numSbtRecords = 1; // NV This assumes the SBT has numMeshes * number of ray types many hit record entries.
        triangleInput[meshID].triangleArray.sbtIndexOffsetBuffer = 0; // NV Redundant after triangleInput[meshID] = {};
        triangleInput[meshID].triangleArray.sbtIndexOffsetSizeInBytes = 0; // NV Redundant after triangleInput[meshID] = {};
        triangleInput[meshID].triangleArray.sbtIndexOffsetStrideInBytes = 0; // NV Redundant after triangleInput[meshID] = {};
    }

    // NV Always initialize all OptiX structures to defaults! That prevents issues when switching OptiX SDK versions.
    // Add accelOptions = {}; here.
    accelOptions.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accelOptions.motionOptions.numKeys = 1; // NV Redundant. Can be left at zero from default initialization, only numKeys > 1 means motion.
    accelOptions.operation = OPTIX_BUILD_OPERATION_BUILD;

    OptixAccelBufferSizes blasBufferSizes; // NV Add = {}; for initialization.
    
    OPTIX_CHECK(optixAccelComputeMemoryUsage
    (optixContext,
        &accelOptions,
        triangleInput.data(),
        (int)numMeshes,  // num_build_inputs // NV  numBuildInputs is an unsigned int, not an int.
        &blasBufferSizes
    ));
    
    CUDABuffer compactedSizeBuffer;
    compactedSizeBuffer.alloc(sizeof(uint64_t));
    
    emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
    emitDesc.result = compactedSizeBuffer.d_pointer();

    tempBuffer.alloc(blasBufferSizes.tempSizeInBytes); // NV Must be aligned to OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT.
    outputBuffer.alloc(blasBufferSizes.outputSizeInBytes); // NV Must be aligned to OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT.

    OPTIX_CHECK(optixAccelBuild(
        optixContext,
        stream,
        &accelOptions,
        triangleInput.data(),
        (int)numMeshes, // NV numBuildInputs is an unsigned int, not an int.
        tempBuffer.d_pointer(),
        tempBuffer.sizeInBytes,

        outputBuffer.d_pointer(),
        outputBuffer.sizeInBytes,

        &asHandle,

        &emitDesc, 1
    ));
    
    CUDA_SYNC_CHECK();
    
    compactedSizeBuffer.download(&compactedSize, 1); // NV Assuming size_t or uint64_t compactedSize; Assuming the argument 1 is the number of elements, means one uint64_t, 8 bytes? Is that based on Ingo Wald's code?

    asBuffer.alloc(compactedSize); // NV Leaked after updateAccel().
    
    OPTIX_CHECK(optixAccelCompact(optixContext,
        stream,
        asHandle,
        asBuffer.d_pointer(),
        asBuffer.sizeInBytes,
        &asHandle));
    
    CUDA_SYNC_CHECK();
    
    outputBuffer.free(); // NV Redundant when immediately allocating with the same size below.
    tempBuffer.free();   // NV Redundant when immediately allocating with the same size below.
    compactedSizeBuffer.free(); // NV Can be done above after compactedSizeBuffer.download(&compactedSize, 1);

    // NV These must be aligned to OPTIX_ACCEL_BUFFER_BYTE_ALIGNMENT.
    tempBuffer.alloc(blasBufferSizes.tempSizeInBytes); // NV There is a separate OptixAccelBufferSizes tempUpdateSizeInBytes for OPTIX_BUILD_OPERATION_UPDATE. Use that or make sure it's <= tempSizeInBytes. 
    outputBuffer.alloc(blasBufferSizes.outputSizeInBytes);
    
    CUDA_SYNC_CHECK();
}

void USRenderer::updateAccel() {
    // NV Add another CUDA_SYNC_CHECK(); for debugging here to make sure the error comes from the optixAccelBuild() OPTIX_BUILD_OPERATION_UPDATE.
    // NV Add accelOptionsUpdate = {}; to make sure there isn't garbage in OptixMotionOptions motionOptions
    accelOptionsUpdate.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accelOptionsUpdate.operation = OPTIX_BUILD_OPERATION_UPDATE;
    std::cout << "vertexBuffers4: " << int(triangleInput.data()->triangleArray.vertexBuffers[0]) << std::endl;
    OPTIX_CHECK(optixAccelBuild(
        optixContext,
        stream, // NV I'm assuming this is the same stream as used for the optixLaunch? Otherwise you could be building AS while still using it inside the launched kernel, when there isn't another sync.
        &accelOptionsUpdate,
        triangleInput.data(), // NV Assuming the topoloyg didn't change in any of the meshes.
        (int)numMeshes, // NV numBuildInputs is an unsigned int, not an int.
        tempBuffer.d_pointer(),
        tempBuffer.sizeInBytes,

        outputBuffer.d_pointer(), // NV After this, asBuffer is not required anymore. Make sure you don't leak it.
        outputBuffer.sizeInBytes,

        &asHandle, // NV Verify that this isn't changing, or verify that this is set inside the launch parameters before the next optixLaunch.

        &emitDesc, 1 // NV Unused. You're udpating a compacted AS already, which is allowed. 
    ));
    
    CUDA_SYNC_CHECK();
    
    std::cout << "update success" << std::endl;
}

If you’re constantly updating a GAS per frame, I wouldn’t be wasting time on compaction.
It would also make sense to use the OptixBuildFlags OPTIX_BUILD_FLAG_PREFER_FAST_BUILD then.

I’m currently working on the same thing in my GLTF_renderer example for morphing and skinning which I release very soon and that is also using GAS with multiple build inputs and it’s working without problems.

Related post about AS rebuilds and updates: https://forums.developer.nvidia.com/t/optix-pathtracer-how-to-implement-an-updategeometry-functionality/284447/2

I’m currently working on the same thing in my GLTF_renderer example for morphing and skinning which I release very soon and that is also using GAS with multiple build inputs and it’s working without problems.

For reference, this is my routine which builds or updates GAS:
https://github.com/NVIDIA/OptiX_Apps/blob/master/apps/GLTF_renderer/Application.cpp#L5921

I do not update compacted meshes though because those are always static.
Animated meshes (morphed or skinned) prefer the fastest build option with no compaction.

The bug is the output buffer inside the AS update.
The update happens in-place, so that output buffer must contain the already built AS data, but you’re using a new output buffer.
Read https://raytracing-docs.nvidia.com/optix8/guide/index.html#acceleration_structures#dynamic-updates

 OPTIX_CHECK(optixAccelBuild(
        optixContext,
        stream,
        &accelOptionsUpdate,
        triangleInput.data(),
        (unsigned int)numMeshes,
        tempBuffer.d_pointer(),
        tempBuffer.sizeInBytes,

        outputBuffer.d_pointer(), // NV BUG This must be the original asBuffer.d_pointer()
        outputBuffer.sizeInBytes, // NV BUG This must be the original asBuffer.sizeInBytes.

        &asHandle, // NV Verify that this isn't changing, or verify that this is set inside the launch parameters before the next optixLaunch.

        &emitDesc, 1 // NV Unused. You're updating a compacted AS already, which is allowed. 
    ));