How can this program using Optix Denoiser get more than just an all-black output?

This program produces fine output without the use of Optix Denoiser, but when I use the denoiser to process the image it simply outputs a copy of the alpha image with all rgb values equals to 0.

Here is what the initialization of the denoiser looks like:

Renderer::Renderer(const int2 &size) {
	initOptix();
	createContext();
	getSourceCode();
	createModule();
	raygenPGs.resize(1);
	createRaygenProgram(0, "__raygen__renderFrame");
	missPGs.resize(RAY_TYPE_COUNT);
	hitgroupPGs.resize(RAY_TYPE_COUNT);
	createMissProgram(INTERSECT_RAY_TYPE, "__miss__radiance");
	createHitgroupProgram(INTERSECT_RAY_TYPE, "__closesthit__radiance", "__anyhit__radiance");
	createPipeline();
	paramsBuffer.alloc(sizeof(params));
	fbSize = size;
	renderBuffer = new sutil::CUDAOutputBuffer<float4>(
		sutil::CUDAOutputBufferType::GL_INTEROP,
		fbSize.x, fbSize.y
	);
	renderBuffer->setStream(stream);
	outputBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	prevBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	albedoBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	normalBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	colorBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	flowBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	flowTrustworthinessBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	randomBuffer.alloc(sizeof(curandStateXORWOW_t) * (fbSize.x / 2) * (fbSize.y / 2));
	params.initialized = false;
	OptixDenoiserOptions dnOptions;
	dnOptions.guideAlbedo = 1;
	dnOptions.guideNormal = 1;
	dnOptions.denoiseAlpha = OPTIX_DENOISER_ALPHA_MODE_COPY;
	OPTIX_CHECK(optixDenoiserCreate(optixContext, OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X, &dnOptions, &denoiser));
	OPTIX_CHECK(optixDenoiserComputeMemoryResources(denoiser, fbSize.x, fbSize.y, &dnSizes));
	CUDA_CHECK(cudaMalloc((void **)&dnState, dnSizes.stateSizeInBytes));
	CUDA_CHECK(cudaMalloc((void **)&dnScratch, dnSizes.withoutOverlapScratchSizeInBytes));
	OPTIX_CHECK(optixDenoiserSetup(
		denoiser, stream,
		fbSize.x, fbSize.y,
		dnState, dnSizes.stateSizeInBytes,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_CHECK(cudaMalloc((void **)&internalIn, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&internalOut, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&intensityPtr, sizeof(float)));
	CUDA_CHECK(cudaMalloc((void **)&avgColorPtr, sizeof(float3)));
	dnParams.hdrIntensity = intensityPtr;
	dnParams.blendFactor = 0.0f;
	dnParams.hdrAverageColor = avgColorPtr;
	dnParams.temporalModeUsePreviousLayers = 0;
	CUDA_SYNC_CHECK();
	std::cout << "Successfully set up renderer." << std::endl;
}

The rendering part looks like this:

void Renderer::render() {
	CUDA_SYNC_CHECK();
	if (!params.frameIndex) params.preCamera = params.camera;
	params.frame.fbSize = make_int2(fbSize.x / 2, fbSize.y / 2);
	params.frame.randomBuffer = (curandStateXORWOW_t *)randomBuffer.d_pointer();
	params.frame.prevBuffer = (vec4f *)prevBuffer.d_pointer();
	params.frame.albedoBuffer = (vec4f *)albedoBuffer.d_pointer();
	params.frame.normalBuffer = (vec4f *)normalBuffer.d_pointer();
	params.frame.colorBuffer = (vec4f *)colorBuffer.d_pointer();
	params.frame.flowBuffer = (float4 *)flowBuffer.d_pointer();
	params.frame.flowTrustworthinessBuffer = (float4 *)flowTrustworthinessBuffer.d_pointer();
	paramsBuffer.upload(&params, 1);
	++ params.frameIndex;
	CUDA_SYNC_CHECK();
	OPTIX_CHECK(optixLaunch(
		pipeline, stream,
		paramsBuffer.d_pointer(),
		paramsBuffer.sizeInBytes,
		&sbt,
		fbSize.x / 2, fbSize.y / 2,
		1
	));
	CUDA_SYNC_CHECK();
	OptixDenoiserGuideLayer guideLayer;
	guideLayer.albedo = createOptixImageF4(albedoBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.normal = createOptixImageF4(normalBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flow = createOptixImageF4(flowBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flowTrustworthiness = createOptixImageF4(flowTrustworthinessBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.previousOutputInternalGuideLayer = OptixImage2D{
		internalIn,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	guideLayer.outputInternalGuideLayer = OptixImage2D{
		internalOut,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	OptixDenoiserLayer layer;
	layer.output = createOptixImageF4(outputBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.previousOutput = createOptixImageF4(prevBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.input = createOptixImageF4(colorBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_SYNC_CHECK();
	CUDA_CHECK(cudaMemcpyAsync((void*)prevBuffer.d_pointer(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	CUDA_CHECK(cudaMemcpyAsync(renderBuffer->map(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	renderBuffer->unmap();
	CUDA_SYNC_CHECK();
	params.initialized = true;
	params.preCamera = params.camera;
	dnParams.temporalModeUsePreviousLayers = 1;
}

This program uses an encapsulation of the device pointers:

struct CUDABuffer {
    inline CUdeviceptr d_pointer() const {
        return (CUdeviceptr)d_ptr;
    }

    //! re-size buffer to given number of bytes
    void resize(size_t size) {
        if (d_ptr) free();
        alloc(size);
    }

    //! allocate to given number of bytes
    void alloc(size_t size) {
        assert(!allocated);
        this->sizeInBytes = size;
        CUDA_CHECK(cudaMalloc((void **)&d_ptr, sizeInBytes));
        allocated = true;
    }

    //! free allocated memory
    void free() {
        assert(allocated);
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaFree(d_ptr));
            d_ptr = nullptr;
        }
        sizeInBytes = 0;
        allocated = false;
    }

    template<typename T>
    void alloc_and_upload(const std::vector<T> &vt) {
        alloc(vt.size() * sizeof(T));
        upload((const T *)vt.data(), vt.size());
    }

    template<typename T>
    void upload(const T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy(d_ptr, (void *)t, count * sizeof(T), cudaMemcpyHostToDevice));
        }
    }

    template<typename T>
    void download(T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy((void *)t, d_ptr, count * sizeof(T), cudaMemcpyDeviceToHost));
        }
    }

    size_t sizeInBytes{ 0 };
    void *d_ptr{ nullptr };
    bool allocated{ 0 };
};

Hi @2254106794,

Are you setting the hdrIntensity and hdrAverageColor values? Your code passes valid pointers, but the snippets you shared don’t initialize that memory. Is it done somewhere else? These are input values to the denoiser, not output values. You can call optixDenoiserComputeAverageColor() and/or optixDenoiserComputeIntensity() to compute them… I don’t see either of those in the code.


David.

Thanks for the reply.

I have tried setting them by using both functions simultaneously just before calling optixDenoiserInvoke, but it still didn’t work and kept resulted in a black image.

To be more precisely, I called the functions in the following way:

	...
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserComputeAverageColor(
		denoiser, stream,
		layer.input,
		dnParams.hdrAverageColor,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserComputeIntensity(
		denoiser, stream,
		layer.input,
		dnParams.hdrIntensity,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	...

Can you try adding initializers to all your OptiX structs and see if that helps? e.g.:

OptixDenoiserOptions dnOptions = {};
OptixDenoiserParams params = {};

etc.

Check your input images for NaNs; they can cause black outputs. Also here’s a secret debugging trick. You can set the environment variable OPTIX_DENOISER_DEBUG_DUMP_PATH to something like /tmp or c:\temp, and it will allow you to verify that all input data to the denoiser is correct.


David.

Thanks for the reply.

I should have initialized all the variables in OptixDenoiserOptions and OptixDenoiserParams, and the input image only contains normal values.

I’m trying to set OPTIX_DENOISER_DEBUG_DUMP_PATH to verify the input data again.

Sounds good. If that doesn’t help, let me know if it’s possible to share a complete reproducer. It’s always possible the issue is just color handling somewhere else, and not a problem interfacing with the denoiser.

It’s good if you initialize all the variables in the OptiX structs, but to be clear, we recommend zero-initializing the entire struct for safety using {}, even if you also manually assign all values. The reason is because we occasionally add members to these structs, and if the struct isn’t zero-initialized, then SDK upgrades or even driver upgrades might introduce bugs or accidentally change behavior, and require a code change and a new build of your app. We are careful to ensure that the default values for new struct members are zero, to help you guarantee that nothing changes as a result of routine upgrades on your (or your users’) machines.


David.

Thanks for your reply.

I’ve successfully found the problem — I forgot to check whether the motion vector is valid in the shader program and it resulted in some NaNs.

However, after I fixed the problem, it seems that the denoised result is strange. Is this because I chose the wrong denoising model? I’ve tried OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV and OPTIX_DENOISER_MODEL_KIND_TEMPORAL, but both resulted in simular strange results.


Or is this caused by incorrect input data? I can’t figure out the problem.

Both the results are using 1 spp.

Comparing to another result generated using 255 spp:

Hi @2254106794,

Sorry this was left hanging. What is strange, exactly? I’m not sure I see what it unexpected about the output. Is the 25spp image converging correctly? 1spp is certainly very low, and the OptiX denoiser typically needs more than 1 sample per pixel to get decent results. These images seem very dark, and that might be contributing. Do you get anything different with a higher exposure?

Of course, also worth checking that your motion vectors are correct and not causing additional issues, make sure they’re in the right space and have the right magnitude and direction. Compare to denoised images without motion vectors; using motion vectors should at least make it the same or better, and not worse.


David.

Thanks for your reply.
It seems that my motion vectors aren’t working very well. I’ve switched to another noise reduction algorithm already.