OSX buffer map exception when using same denoising buffers a second time

If I setup my denoise buffers and command list then run through the denoise stage twice I get this exception

2480x3508 Nvidia exception Unknown error (Details: Function “RTresult bufferMap(RTbuffer, unsigned int, unsigned int, void *, void **)” caught exception: Encountered a CUDA error: cudaDriver().CuMemcpyDtoH( dstHost, srcDevice, byteCount ) returned (700): Illegal address), -1
Program ended with exit code: 1

when trying to map the output buffer to obtain the result of the denoise on the second time through.

The following sample shows this problem, first time through the for loop everything works fine, second time through throws the above exception on the outputBuffer->map( 0, RT_BUFFER_MAP_READ ); line.

#include <iostream>

#include "optix.h"
#include "optixu/optixpp_namespace.h"

const char* EMPTY_PROGRAM_PTX = {
	"//\r\n"
	"// Generated by NVIDIA NVVM Compiler\r\n"
	"//\r\n"
	"// Compiler Build ID: CL-23083092\r\n"
	"// Cuda compilation tools, release 9.1, V9.1.85\r\n"
	"// Based on LLVM 3.4svn\r\n"
	"//\r\n"
	"\r\n"
	".version 6.1\r\n"
	".target sm_30\r\n"
	".address_size 64\r\n"
	"\r\n"
	"	// .globl	_Z12emptyProgramv\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register20reg_bitness_detectorE;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail0E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail1E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail2E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail3E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail4E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail5E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail6E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail7E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail8E;\r\n"
	".global .align 8 .u64 _ZN21rti_internal_register24reg_exception_64_detail9E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail0E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail1E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail2E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail3E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail4E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail5E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail6E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail7E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail8E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register21reg_exception_detail9E;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_xE;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_yE;\r\n"
	".global .align 4 .u32 _ZN21rti_internal_register14reg_rayIndex_zE;\r\n"
	"\r\n"
	".visible .entry _Z12emptyProgramv(\r\n"
	"\r\n"
	")\r\n"
	"{\r\n"
	"\r\n"
	"\r\n"
	"\r\n"
	"	ret;\r\n"
	"}\r\n"
};

int main( int argc, char* argv[] )
{
	RTsize		width = 2480;
	RTsize		height = 3508;

	try {
		optix::Context		context = optix::Context::create();

		context->setRayTypeCount( 1 );
		context->setEntryPointCount( 1 );
		context->setStackSize( 1800 );

		// Create empty program
		optix::Program emptyProgram = context->createProgramFromPTXString( EMPTY_PROGRAM_PTX, "emptyProgram" );
		context->setRayGenerationProgram( 0, emptyProgram );

		optix::PostprocessingStage denoiserStage = context->createBuiltinPostProcessingStage( "DLDenoiser" );
		denoiserStage->declareVariable( "blend" )->setFloat( 0.f );

		// Create input and output buffers
		optix::Buffer inputBuffer = context->createBuffer( RT_BUFFER_INPUT_OUTPUT, RT_FORMAT_FLOAT4, width, height );
		optix::Buffer outputBuffer = context->createBuffer( RT_BUFFER_INPUT_OUTPUT, RT_FORMAT_FLOAT4, width, height );

		// Set into denoiser
		denoiserStage->declareVariable( "input_buffer" )->set( inputBuffer );
		denoiserStage->declareVariable( "output_buffer" )->set( outputBuffer );

        // Create command list, first append launch of 1, 1 was to combat bug in optix 5.0 when running denoiser
        optix::CommandList commandList = context->createCommandList();
        commandList->appendLaunch( 0, 1, 1 );
        commandList->appendPostprocessingStage( denoiserStage, width, height );
        commandList->finalize();

        for ( int i = 0; i < 2; ++i ) {
            // Fill input buffer with black values
            size_t		numPixels = width * height;
            float		*dst = (float*)inputBuffer->map( 0, RT_BUFFER_MAP_WRITE_DISCARD );

            for ( size_t i = 0; i < numPixels; ++i ) {
                *dst++ = 0.f;
                *dst++ = 0.f;
                *dst++ = 0.f;
                *dst++ = 1.f;
            }
       
            inputBuffer->unmap();

            // Do denoise
            commandList->execute();
 
            // Simulate read result
            outputBuffer->map( 0, RT_BUFFER_MAP_READ );
            outputBuffer->unmap();
        }
        
		std::cout << width << "x" << height << " Denoise ok" << std::endl;

		return 0;
	}
	catch ( const optix::Exception& ex ) {
		std::cout << width << "x" << height << " Nvidia exception " << ex.getErrorString() << ", " << ex.getErrorCode() << std::endl;
	}
	catch ( ... ) {
		std::cout << width << "x" << height << " Exception" << std::endl;
	}

	return 1;
}

System setup :-

OSX 10.13.6
Nvidia Geforce GT 750M 2048Mb

Web driver 387.10.10.10.40.122
CUDA 418.105
Optix SDK 5.0.1