If I setup my denoise buffers and command list then run through the denoise stage twice I get this exception
2480x3508 Nvidia exception Unknown error (Details: Function “RTresult bufferMap(RTbuffer, unsigned int, unsigned int, void *, void **)” caught exception: Encountered a CUDA error: cudaDriver().CuMemcpyDtoH( dstHost, srcDevice, byteCount ) returned (700): Illegal address), -1
Program ended with exit code: 1
when trying to map the output buffer to obtain the result of the denoise on the second time through.
The following sample shows this problem, first time through the for loop everything works fine, second time through throws the above exception on the outputBuffer->map( 0, RT_BUFFER_MAP_READ ); line.
#include <iostream>
#include "optix.h"
#include "optixu/optixpp_namespace.h"
const char* EMPTY_PROGRAM_PTX = {
"//\r\n"
"// Generated by NVIDIA NVVM Compiler\r\n"
"//\r\n"
"// Compiler Build ID: CL-23083092\r\n"
"// Cuda compilation tools, release 9.1, V9.1.85\r\n"
"// Based on LLVM 3.4svn\r\n"
"//\r\n"
"\r\n"
".version 6.1\r\n"
".target sm_30\r\n"
".address_size 64\r\n"
"\r\n"
" // .globl _Z12emptyProgramv\r\n"
".global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;\r\n"
".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;\r\n"
".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;\r\n"
"\r\n"
".visible .entry _Z12emptyProgramv(\r\n"
"\r\n"
")\r\n"
"{\r\n"
"\r\n"
"\r\n"
"\r\n"
" ret;\r\n"
"}\r\n"
};
int main( int argc, char* argv[] )
{
RTsize width = 2480;
RTsize height = 3508;
try {
optix::Context context = optix::Context::create();
context->setRayTypeCount( 1 );
context->setEntryPointCount( 1 );
context->setStackSize( 1800 );
// Create empty program
optix::Program emptyProgram = context->createProgramFromPTXString( EMPTY_PROGRAM_PTX, "emptyProgram" );
context->setRayGenerationProgram( 0, emptyProgram );
optix::PostprocessingStage denoiserStage = context->createBuiltinPostProcessingStage( "DLDenoiser" );
denoiserStage->declareVariable( "blend" )->setFloat( 0.f );
// Create input and output buffers
optix::Buffer inputBuffer = context->createBuffer( RT_BUFFER_INPUT_OUTPUT, RT_FORMAT_FLOAT4, width, height );
optix::Buffer outputBuffer = context->createBuffer( RT_BUFFER_INPUT_OUTPUT, RT_FORMAT_FLOAT4, width, height );
// Set into denoiser
denoiserStage->declareVariable( "input_buffer" )->set( inputBuffer );
denoiserStage->declareVariable( "output_buffer" )->set( outputBuffer );
// Create command list, first append launch of 1, 1 was to combat bug in optix 5.0 when running denoiser
optix::CommandList commandList = context->createCommandList();
commandList->appendLaunch( 0, 1, 1 );
commandList->appendPostprocessingStage( denoiserStage, width, height );
commandList->finalize();
for ( int i = 0; i < 2; ++i ) {
// Fill input buffer with black values
size_t numPixels = width * height;
float *dst = (float*)inputBuffer->map( 0, RT_BUFFER_MAP_WRITE_DISCARD );
for ( size_t i = 0; i < numPixels; ++i ) {
*dst++ = 0.f;
*dst++ = 0.f;
*dst++ = 0.f;
*dst++ = 1.f;
}
inputBuffer->unmap();
// Do denoise
commandList->execute();
// Simulate read result
outputBuffer->map( 0, RT_BUFFER_MAP_READ );
outputBuffer->unmap();
}
std::cout << width << "x" << height << " Denoise ok" << std::endl;
return 0;
}
catch ( const optix::Exception& ex ) {
std::cout << width << "x" << height << " Nvidia exception " << ex.getErrorString() << ", " << ex.getErrorCode() << std::endl;
}
catch ( ... ) {
std::cout << width << "x" << height << " Exception" << std::endl;
}
return 1;
}
System setup :-
OSX 10.13.6
Nvidia Geforce GT 750M 2048Mb
Web driver 387.10.10.10.40.122
CUDA 418.105
Optix SDK 5.0.1