How can this program using Optix Denoiser get more than just an all-black output?

2254106794 · November 25, 2024, 3:28pm

This program produces fine output without the use of Optix Denoiser, but when I use the denoiser to process the image it simply outputs a copy of the alpha image with all rgb values equals to 0.

Here is what the initialization of the denoiser looks like:

Renderer::Renderer(const int2 &size) {
	initOptix();
	createContext();
	getSourceCode();
	createModule();
	raygenPGs.resize(1);
	createRaygenProgram(0, "__raygen__renderFrame");
	missPGs.resize(RAY_TYPE_COUNT);
	hitgroupPGs.resize(RAY_TYPE_COUNT);
	createMissProgram(INTERSECT_RAY_TYPE, "__miss__radiance");
	createHitgroupProgram(INTERSECT_RAY_TYPE, "__closesthit__radiance", "__anyhit__radiance");
	createPipeline();
	paramsBuffer.alloc(sizeof(params));
	fbSize = size;
	renderBuffer = new sutil::CUDAOutputBuffer<float4>(
		sutil::CUDAOutputBufferType::GL_INTEROP,
		fbSize.x, fbSize.y
	);
	renderBuffer->setStream(stream);
	outputBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	prevBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	albedoBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	normalBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	colorBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	flowBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	flowTrustworthinessBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	randomBuffer.alloc(sizeof(curandStateXORWOW_t) * (fbSize.x / 2) * (fbSize.y / 2));
	params.initialized = false;
	OptixDenoiserOptions dnOptions;
	dnOptions.guideAlbedo = 1;
	dnOptions.guideNormal = 1;
	dnOptions.denoiseAlpha = OPTIX_DENOISER_ALPHA_MODE_COPY;
	OPTIX_CHECK(optixDenoiserCreate(optixContext, OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X, &dnOptions, &denoiser));
	OPTIX_CHECK(optixDenoiserComputeMemoryResources(denoiser, fbSize.x, fbSize.y, &dnSizes));
	CUDA_CHECK(cudaMalloc((void **)&dnState, dnSizes.stateSizeInBytes));
	CUDA_CHECK(cudaMalloc((void **)&dnScratch, dnSizes.withoutOverlapScratchSizeInBytes));
	OPTIX_CHECK(optixDenoiserSetup(
		denoiser, stream,
		fbSize.x, fbSize.y,
		dnState, dnSizes.stateSizeInBytes,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_CHECK(cudaMalloc((void **)&internalIn, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&internalOut, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&intensityPtr, sizeof(float)));
	CUDA_CHECK(cudaMalloc((void **)&avgColorPtr, sizeof(float3)));
	dnParams.hdrIntensity = intensityPtr;
	dnParams.blendFactor = 0.0f;
	dnParams.hdrAverageColor = avgColorPtr;
	dnParams.temporalModeUsePreviousLayers = 0;
	CUDA_SYNC_CHECK();
	std::cout << "Successfully set up renderer." << std::endl;
}

The rendering part looks like this:

void Renderer::render() {
	CUDA_SYNC_CHECK();
	if (!params.frameIndex) params.preCamera = params.camera;
	params.frame.fbSize = make_int2(fbSize.x / 2, fbSize.y / 2);
	params.frame.randomBuffer = (curandStateXORWOW_t *)randomBuffer.d_pointer();
	params.frame.prevBuffer = (vec4f *)prevBuffer.d_pointer();
	params.frame.albedoBuffer = (vec4f *)albedoBuffer.d_pointer();
	params.frame.normalBuffer = (vec4f *)normalBuffer.d_pointer();
	params.frame.colorBuffer = (vec4f *)colorBuffer.d_pointer();
	params.frame.flowBuffer = (float4 *)flowBuffer.d_pointer();
	params.frame.flowTrustworthinessBuffer = (float4 *)flowTrustworthinessBuffer.d_pointer();
	paramsBuffer.upload(&params, 1);
	++ params.frameIndex;
	CUDA_SYNC_CHECK();
	OPTIX_CHECK(optixLaunch(
		pipeline, stream,
		paramsBuffer.d_pointer(),
		paramsBuffer.sizeInBytes,
		&sbt,
		fbSize.x / 2, fbSize.y / 2,
		1
	));
	CUDA_SYNC_CHECK();
	OptixDenoiserGuideLayer guideLayer;
	guideLayer.albedo = createOptixImageF4(albedoBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.normal = createOptixImageF4(normalBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flow = createOptixImageF4(flowBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flowTrustworthiness = createOptixImageF4(flowTrustworthinessBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.previousOutputInternalGuideLayer = OptixImage2D{
		internalIn,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	guideLayer.outputInternalGuideLayer = OptixImage2D{
		internalOut,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	OptixDenoiserLayer layer;
	layer.output = createOptixImageF4(outputBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.previousOutput = createOptixImageF4(prevBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.input = createOptixImageF4(colorBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_SYNC_CHECK();
	CUDA_CHECK(cudaMemcpyAsync((void*)prevBuffer.d_pointer(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	CUDA_CHECK(cudaMemcpyAsync(renderBuffer->map(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	renderBuffer->unmap();
	CUDA_SYNC_CHECK();
	params.initialized = true;
	params.preCamera = params.camera;
	dnParams.temporalModeUsePreviousLayers = 1;
}

This program uses an encapsulation of the device pointers:

struct CUDABuffer {
    inline CUdeviceptr d_pointer() const {
        return (CUdeviceptr)d_ptr;
    }

    //! re-size buffer to given number of bytes
    void resize(size_t size) {
        if (d_ptr) free();
        alloc(size);
    }

    //! allocate to given number of bytes
    void alloc(size_t size) {
        assert(!allocated);
        this->sizeInBytes = size;
        CUDA_CHECK(cudaMalloc((void **)&d_ptr, sizeInBytes));
        allocated = true;
    }

    //! free allocated memory
    void free() {
        assert(allocated);
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaFree(d_ptr));
            d_ptr = nullptr;
        }
        sizeInBytes = 0;
        allocated = false;
    }

    template<typename T>
    void alloc_and_upload(const std::vector<T> &vt) {
        alloc(vt.size() * sizeof(T));
        upload((const T *)vt.data(), vt.size());
    }

    template<typename T>
    void upload(const T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy(d_ptr, (void *)t, count * sizeof(T), cudaMemcpyHostToDevice));
        }
    }

    template<typename T>
    void download(T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy((void *)t, d_ptr, count * sizeof(T), cudaMemcpyDeviceToHost));
        }
    }

    size_t sizeInBytes{ 0 };
    void *d_ptr{ nullptr };
    bool allocated{ 0 };
};

dhart · November 27, 2024, 4:21pm

Hi @2254106794,

Are you setting the hdrIntensity and hdrAverageColor values? Your code passes valid pointers, but the snippets you shared don’t initialize that memory. Is it done somewhere else? These are input values to the denoiser, not output values. You can call optixDenoiserComputeAverageColor() and/or optixDenoiserComputeIntensity() to compute them… I don’t see either of those in the code.

–
David.

2254106794 · November 28, 2024, 12:04am

Thanks for the reply.

I have tried setting them by using both functions simultaneously just before calling optixDenoiserInvoke, but it still didn’t work and kept resulted in a black image.

To be more precisely, I called the functions in the following way:

	...
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserComputeAverageColor(
		denoiser, stream,
		layer.input,
		dnParams.hdrAverageColor,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserComputeIntensity(
		denoiser, stream,
		layer.input,
		dnParams.hdrIntensity,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	...

dhart · December 2, 2024, 5:47pm

Can you try adding initializers to all your OptiX structs and see if that helps? e.g.:

OptixDenoiserOptions dnOptions = {};
OptixDenoiserParams params = {};

etc.

Check your input images for NaNs; they can cause black outputs. Also here’s a secret debugging trick. You can set the environment variable OPTIX_DENOISER_DEBUG_DUMP_PATH to something like /tmp or c:\temp, and it will allow you to verify that all input data to the denoiser is correct.

–
David.

2254106794 · December 3, 2024, 2:16pm

Thanks for the reply.

I should have initialized all the variables in OptixDenoiserOptions and OptixDenoiserParams, and the input image only contains normal values.

I’m trying to set OPTIX_DENOISER_DEBUG_DUMP_PATH to verify the input data again.

dhart · December 4, 2024, 3:57pm

Sounds good. If that doesn’t help, let me know if it’s possible to share a complete reproducer. It’s always possible the issue is just color handling somewhere else, and not a problem interfacing with the denoiser.

It’s good if you initialize all the variables in the OptiX structs, but to be clear, we recommend zero-initializing the entire struct for safety using {}, even if you also manually assign all values. The reason is because we occasionally add members to these structs, and if the struct isn’t zero-initialized, then SDK upgrades or even driver upgrades might introduce bugs or accidentally change behavior, and require a code change and a new build of your app. We are careful to ensure that the default values for new struct members are zero, to help you guarantee that nothing changes as a result of routine upgrades on your (or your users’) machines.

–
David.

2254106794 · December 14, 2024, 3:19pm

Thanks for your reply.

I’ve successfully found the problem — I forgot to check whether the motion vector is valid in the shader program and it resulted in some NaNs.

However, after I fixed the problem, it seems that the denoised result is strange. Is this because I chose the wrong denoising model? I’ve tried OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV and OPTIX_DENOISER_MODEL_KIND_TEMPORAL, but both resulted in simular strange results.

Or is this caused by incorrect input data? I can’t figure out the problem.

2254106794 · December 14, 2024, 3:23pm

Both the results are using 1 spp.

Comparing to another result generated using 255 spp:

dhart · December 31, 2024, 12:18am

Hi @2254106794,

Sorry this was left hanging. What is strange, exactly? I’m not sure I see what it unexpected about the output. Is the 25spp image converging correctly? 1spp is certainly very low, and the OptiX denoiser typically needs more than 1 sample per pixel to get decent results. These images seem very dark, and that might be contributing. Do you get anything different with a higher exposure?

Of course, also worth checking that your motion vectors are correct and not causing additional issues, make sure they’re in the right space and have the right magnitude and direction. Compare to denoised images without motion vectors; using motion vectors should at least make it the same or better, and not worse.

–
David.

2254106794 · December 31, 2024, 1:06pm

Thanks for your reply.
It seems that my motion vectors aren’t working very well. I’ve switched to another noise reduction algorithm already.

Topic		Replies	Views
Optix denoiser implementation exhibits black square artifacts OptiX	20	141	September 20, 2024
optix7 denoiser OptiX	5	1214	June 14, 2022
Fast denoising of high number of low-res images OptiX	3	720	September 22, 2023
Optix denoiser output buffer data range OptiX	7	1060	June 14, 2022
OptiX denoiser is broken (?) after recent driver updates OptiX	16	2119	June 14, 2022
Grid-like patterns in denoiser OptiX	14	1670	June 14, 2022
Questions about denoiser OptiX	4	1081	November 29, 2021
[OptiX 7] Tiled Denoiser OptiX	12	1187	June 14, 2022
Unresolved Symbol Error when OptixDenoiser used in seperate class OptiX	3	596	February 27, 2023
Basic OptiX use turning image red OptiX	7	840	November 11, 2021

How can this program using Optix Denoiser get more than just an all-black output?

Related topics