optix 7 dynamic update leads to 'an illegal memory access was encountered'

while trying to animate a height field with dynamic update to GAS, got:

Caught exception: CUDA call (cudaFree((void*)d_temp_buffer_gas) ) failed with error: 'an illegal memory access was encountered' (C:\Users\liqiu\work\xuni-ocean\Ocean\Mesh.cpp:171)

The code works properly while rotating a single triangle. However, when the number of triangles increases (to 18), optixAccelBuild returned successfully, but any subsequent cuda calls failed.

void Mesh::buildAccelerationStructure(OptixDeviceContext context)
{
    OptixAccelBuildOptions accel_options = {};
    accel_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accel_options.operation = OPTIX_BUILD_OPERATION_BUILD;

    const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE };
    OptixBuildInput triangle_input = {};
    triangle_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
    triangle_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
    triangle_input.triangleArray.numVertices = mVerticesSize;
    triangle_input.triangleArray.vertexStrideInBytes = sizeof(Vertex);
    triangle_input.triangleArray.vertexBuffers = &mdVertices;
    triangle_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
    triangle_input.triangleArray.indexStrideInBytes = 12;
    triangle_input.triangleArray.numIndexTriplets = mIndicesSize / 3;
    triangle_input.triangleArray.indexBuffer = mdIndices;
    triangle_input.triangleArray.flags = triangle_input_flags;
    triangle_input.triangleArray.numSbtRecords = 1;

    OptixAccelBufferSizes gas_buffer_sizes;
    OPTIX_CHECK(optixAccelComputeMemoryUsage(context, &accel_options, &triangle_input,
        1,  // Number of build input
        &gas_buffer_sizes));
    CUdeviceptr d_temp_buffer_gas;
    CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_temp_buffer_gas), gas_buffer_sizes.tempSizeInBytes));

    // non-compacted output
    CUdeviceptr d_buffer_output_gas;
    CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(
        &d_buffer_output_gas),
        gas_buffer_sizes.outputSizeInBytes
    ));

    OPTIX_CHECK(optixAccelBuild(
        context,
        0,              // CUDA stream
        &accel_options,
        &triangle_input,
        1,              // num build inputs
        d_temp_buffer_gas,
        gas_buffer_sizes.tempSizeInBytes,
        d_buffer_output_gas,
        gas_buffer_sizes.outputSizeInBytes,
        &mGasHandle,
        nullptr,  // emitted property list
        0               // num emitted properties
    ));

    CUDA_CHECK(cudaFree((void*)d_temp_buffer_gas));
    mdGasOutputBuffer = d_buffer_output_gas;
}
void Mesh::updateAccelerationStructure(OptixDeviceContext context)
{
    OptixAccelBuildOptions accel_options = {};
    accel_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
    accel_options.operation = OPTIX_BUILD_OPERATION_UPDATE;

    const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE };
    OptixBuildInput triangle_input = {};
    triangle_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
    triangle_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
    triangle_input.triangleArray.numVertices = mVerticesSize;
    triangle_input.triangleArray.vertexStrideInBytes = sizeof(Vertex);
    triangle_input.triangleArray.vertexBuffers = &mdVertices;
    triangle_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
    triangle_input.triangleArray.indexStrideInBytes = 12;
    triangle_input.triangleArray.numIndexTriplets = mIndicesSize / 3;
    triangle_input.triangleArray.indexBuffer = mdIndices;
    triangle_input.triangleArray.flags = triangle_input_flags;
    triangle_input.triangleArray.numSbtRecords = 1;

    OptixAccelBufferSizes gas_buffer_sizes;
    OPTIX_CHECK(optixAccelComputeMemoryUsage(context, &accel_options, &triangle_input,
        1,  // Number of build input
        &gas_buffer_sizes));
    CUdeviceptr d_temp_buffer_gas;
    CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_temp_buffer_gas), gas_buffer_sizes.tempUpdateSizeInBytes));


    OPTIX_CHECK(optixAccelBuild(
        context,
        0,              // CUDA stream
        &accel_options,
        &triangle_input,
        1,              // num build inputs
        d_temp_buffer_gas,
        gas_buffer_sizes.tempUpdateSizeInBytes,
        mdGasOutputBuffer,
        gas_buffer_sizes.outputSizeInBytes,
        &mGasHandle,
        nullptr,  // emitted property list
        0             // num emitted properties
    ));

    CUDA_CHECK(cudaFree((void*)d_temp_buffer_gas));
}

Please always provide the following system configuration information when asking about OptiX issues:
OS version, installed GPU(s), VRAM amount, display driver version, OptiX major.minor.micro version, CUDA toolkit version used to generate the input PTX, host compiler version.

The OptiX API calls are asynchronous. That the cudaFree() call after it caught a CUDA error normally means that the error happened inside the asynchronous call. Means the optixAccelBuild() actually didn’t succeed.
You could verify that by adding a CUDA_CHECK( cudaStreamSynchronize(0) ); before the cudaFree().

Also make sure the optixLaunch() is either running on the same stream or is correctly synchronized before changing the acceleration structure currently used.

Acceleration structure buffers need to be 128-byte aligned, which they are when allocating them with cudaMalloc() directly

Please check if the gas_buffer_sizes.outputSizeInBytes in the update function matches the initial allocation.

For performance I would not recreate the CUdeviceptr d_temp_buffer_gas with cudaAlloc() and cudaFree() every time you update the acceleration structure. These calls are synchronous and expensive. You would only need to do that if the tempUpdateSizeInBytes grows.

If none of this helps, a minimal, complete reproducer in failing state would be required to analyze this further.

Thanks for the quick reply!

After all the possibilities you suggested checked, a bug was found in the configs.