How can this program using Optix Denoiser get more than just an all-black output?

This program produces fine output without the use of Optix Denoiser, but when I use the denoiser to process the image it simply outputs a copy of the alpha image with all rgb values equals to 0.

Here is what the initialization of the denoiser looks like:

Renderer::Renderer(const int2 &size) {
	initOptix();
	createContext();
	getSourceCode();
	createModule();
	raygenPGs.resize(1);
	createRaygenProgram(0, "__raygen__renderFrame");
	missPGs.resize(RAY_TYPE_COUNT);
	hitgroupPGs.resize(RAY_TYPE_COUNT);
	createMissProgram(INTERSECT_RAY_TYPE, "__miss__radiance");
	createHitgroupProgram(INTERSECT_RAY_TYPE, "__closesthit__radiance", "__anyhit__radiance");
	createPipeline();
	paramsBuffer.alloc(sizeof(params));
	fbSize = size;
	renderBuffer = new sutil::CUDAOutputBuffer<float4>(
		sutil::CUDAOutputBufferType::GL_INTEROP,
		fbSize.x, fbSize.y
	);
	renderBuffer->setStream(stream);
	outputBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	prevBuffer.alloc(sizeof(vec4f) * fbSize.x * fbSize.y);
	albedoBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	normalBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	colorBuffer.alloc(sizeof(vec4f) * (fbSize.x / 2) * (fbSize.y / 2));
	flowBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	flowTrustworthinessBuffer.alloc(sizeof(float4) * (fbSize.x / 2) * (fbSize.y / 2));
	randomBuffer.alloc(sizeof(curandStateXORWOW_t) * (fbSize.x / 2) * (fbSize.y / 2));
	params.initialized = false;
	OptixDenoiserOptions dnOptions;
	dnOptions.guideAlbedo = 1;
	dnOptions.guideNormal = 1;
	dnOptions.denoiseAlpha = OPTIX_DENOISER_ALPHA_MODE_COPY;
	OPTIX_CHECK(optixDenoiserCreate(optixContext, OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X, &dnOptions, &denoiser));
	OPTIX_CHECK(optixDenoiserComputeMemoryResources(denoiser, fbSize.x, fbSize.y, &dnSizes));
	CUDA_CHECK(cudaMalloc((void **)&dnState, dnSizes.stateSizeInBytes));
	CUDA_CHECK(cudaMalloc((void **)&dnScratch, dnSizes.withoutOverlapScratchSizeInBytes));
	OPTIX_CHECK(optixDenoiserSetup(
		denoiser, stream,
		fbSize.x, fbSize.y,
		dnState, dnSizes.stateSizeInBytes,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_CHECK(cudaMalloc((void **)&internalIn, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&internalOut, dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x * fbSize.y));
	CUDA_CHECK(cudaMalloc((void **)&intensityPtr, sizeof(float)));
	CUDA_CHECK(cudaMalloc((void **)&avgColorPtr, sizeof(float3)));
	dnParams.hdrIntensity = intensityPtr;
	dnParams.blendFactor = 0.0f;
	dnParams.hdrAverageColor = avgColorPtr;
	dnParams.temporalModeUsePreviousLayers = 0;
	CUDA_SYNC_CHECK();
	std::cout << "Successfully set up renderer." << std::endl;
}

The rendering part looks like this:

void Renderer::render() {
	CUDA_SYNC_CHECK();
	if (!params.frameIndex) params.preCamera = params.camera;
	params.frame.fbSize = make_int2(fbSize.x / 2, fbSize.y / 2);
	params.frame.randomBuffer = (curandStateXORWOW_t *)randomBuffer.d_pointer();
	params.frame.prevBuffer = (vec4f *)prevBuffer.d_pointer();
	params.frame.albedoBuffer = (vec4f *)albedoBuffer.d_pointer();
	params.frame.normalBuffer = (vec4f *)normalBuffer.d_pointer();
	params.frame.colorBuffer = (vec4f *)colorBuffer.d_pointer();
	params.frame.flowBuffer = (float4 *)flowBuffer.d_pointer();
	params.frame.flowTrustworthinessBuffer = (float4 *)flowTrustworthinessBuffer.d_pointer();
	paramsBuffer.upload(&params, 1);
	++ params.frameIndex;
	CUDA_SYNC_CHECK();
	OPTIX_CHECK(optixLaunch(
		pipeline, stream,
		paramsBuffer.d_pointer(),
		paramsBuffer.sizeInBytes,
		&sbt,
		fbSize.x / 2, fbSize.y / 2,
		1
	));
	CUDA_SYNC_CHECK();
	OptixDenoiserGuideLayer guideLayer;
	guideLayer.albedo = createOptixImageF4(albedoBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.normal = createOptixImageF4(normalBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flow = createOptixImageF4(flowBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.flowTrustworthiness = createOptixImageF4(flowTrustworthinessBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	guideLayer.previousOutputInternalGuideLayer = OptixImage2D{
		internalIn,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	guideLayer.outputInternalGuideLayer = OptixImage2D{
		internalOut,
		(unsigned int)fbSize.x, (unsigned int)fbSize.y,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes * fbSize.x,
		(unsigned int)dnSizes.internalGuideLayerPixelSizeInBytes,
		OPTIX_PIXEL_FORMAT_INTERNAL_GUIDE_LAYER
	};
	OptixDenoiserLayer layer;
	layer.output = createOptixImageF4(outputBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.previousOutput = createOptixImageF4(prevBuffer.d_pointer(), fbSize.x, fbSize.y);
	layer.input = createOptixImageF4(colorBuffer.d_pointer(), fbSize.x / 2, fbSize.y / 2);
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	CUDA_SYNC_CHECK();
	CUDA_CHECK(cudaMemcpyAsync((void*)prevBuffer.d_pointer(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	CUDA_CHECK(cudaMemcpyAsync(renderBuffer->map(), (vec4f *)outputBuffer.d_pointer(), outputBuffer.sizeInBytes, cudaMemcpyDeviceToDevice, stream));
	renderBuffer->unmap();
	CUDA_SYNC_CHECK();
	params.initialized = true;
	params.preCamera = params.camera;
	dnParams.temporalModeUsePreviousLayers = 1;
}

This program uses an encapsulation of the device pointers:

struct CUDABuffer {
    inline CUdeviceptr d_pointer() const {
        return (CUdeviceptr)d_ptr;
    }

    //! re-size buffer to given number of bytes
    void resize(size_t size) {
        if (d_ptr) free();
        alloc(size);
    }

    //! allocate to given number of bytes
    void alloc(size_t size) {
        assert(!allocated);
        this->sizeInBytes = size;
        CUDA_CHECK(cudaMalloc((void **)&d_ptr, sizeInBytes));
        allocated = true;
    }

    //! free allocated memory
    void free() {
        assert(allocated);
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaFree(d_ptr));
            d_ptr = nullptr;
        }
        sizeInBytes = 0;
        allocated = false;
    }

    template<typename T>
    void alloc_and_upload(const std::vector<T> &vt) {
        alloc(vt.size() * sizeof(T));
        upload((const T *)vt.data(), vt.size());
    }

    template<typename T>
    void upload(const T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy(d_ptr, (void *)t, count * sizeof(T), cudaMemcpyHostToDevice));
        }
    }

    template<typename T>
    void download(T *t, size_t count) {
        assert(allocated);
        assert(sizeInBytes == count * sizeof(T));
        if (d_ptr != nullptr) {
            CUDA_CHECK(cudaMemcpy((void *)t, d_ptr, count * sizeof(T), cudaMemcpyDeviceToHost));
        }
    }

    size_t sizeInBytes{ 0 };
    void *d_ptr{ nullptr };
    bool allocated{ 0 };
};

Hi @2254106794,

Are you setting the hdrIntensity and hdrAverageColor values? Your code passes valid pointers, but the snippets you shared don’t initialize that memory. Is it done somewhere else? These are input values to the denoiser, not output values. You can call optixDenoiserComputeAverageColor() and/or optixDenoiserComputeIntensity() to compute them… I don’t see either of those in the code.


David.

Thanks for the reply.

I have tried setting them by using both functions simultaneously just before calling optixDenoiserInvoke, but it still didn’t work and kept resulted in a black image.

To be more precisely, I called the functions in the following way:

	...
	layer.type = OPTIX_DENOISER_AOV_TYPE_NONE;
	OPTIX_CHECK(optixDenoiserComputeAverageColor(
		denoiser, stream,
		layer.input,
		dnParams.hdrAverageColor,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserComputeIntensity(
		denoiser, stream,
		layer.input,
		dnParams.hdrIntensity,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	OPTIX_CHECK(optixDenoiserInvoke(
		denoiser, stream,
		&dnParams,
		dnState, dnSizes.stateSizeInBytes,
		&guideLayer, &layer, 1,
		0, 0,
		dnScratch, dnSizes.withoutOverlapScratchSizeInBytes
	));
	...

Can you try adding initializers to all your OptiX structs and see if that helps? e.g.:

OptixDenoiserOptions dnOptions = {};
OptixDenoiserParams params = {};

etc.

Check your input images for NaNs; they can cause black outputs. Also here’s a secret debugging trick. You can set the environment variable OPTIX_DENOISER_DEBUG_DUMP_PATH to something like /tmp or c:\temp, and it will allow you to verify that all input data to the denoiser is correct.


David.

Thanks for the reply.

I should have initialized all the variables in OptixDenoiserOptions and OptixDenoiserParams, and the input image only contains normal values.

I’m trying to set OPTIX_DENOISER_DEBUG_DUMP_PATH to verify the input data again.

Sounds good. If that doesn’t help, let me know if it’s possible to share a complete reproducer. It’s always possible the issue is just color handling somewhere else, and not a problem interfacing with the denoiser.

It’s good if you initialize all the variables in the OptiX structs, but to be clear, we recommend zero-initializing the entire struct for safety using {}, even if you also manually assign all values. The reason is because we occasionally add members to these structs, and if the struct isn’t zero-initialized, then SDK upgrades or even driver upgrades might introduce bugs or accidentally change behavior, and require a code change and a new build of your app. We are careful to ensure that the default values for new struct members are zero, to help you guarantee that nothing changes as a result of routine upgrades on your (or your users’) machines.


David.