xilefmai,
The way I solved it was three vulkan images with tiling=linear and three for optimal tiling vulkan usage. you can probably get away with fewer images by ping ponging between some of them.
pseudocode:
m_resultRGB = makeStorage();
m_resultRGBLinear = makeLinear();
m_resultAlbedo = makeStorage();
m_resultAlbedoLinear = makeLinear();
m_denoisedResult = makeLinear();
// create optix images by exporting linear images to fd and importing into cuda
m_resultRGBOptix = DenoiserVulkanImage(m_resultRGBLinear);
m_resultAlbedoOptix = DenoiserVulkanImage(m_resultAlbedoLinear);
m_resultNormalOptix = DenoiserVulkanImage(m_resultNormalLinear);
m_denoisedResultOptix = DenoiserVulkanImage(m_denoisedResult);
// draw loop: blit from storage to linear after raytracing, before optix
setImageLayout(m_drawCmdBuffer,
m_resultRGB,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
setImageLayout(m_drawCmdBuffer,
m_resultRGBLinear,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vkCmdCopyImage(m_drawCmdBuffer, m_resultRGB,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
m_resultRGBLinear,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Region);
setImageLayout(m_drawCmdBuffer,
m_resultAlbedo,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
setImageLayout(m_drawCmdBuffer,
m_resultAlbedoLinear,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vkCmdCopyImage(m_drawCmdBuffer, m_resultAlbedo,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
m_resultAlbedoLinear,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Region);
// after optix, blit back
setImageLayout(m_blitCmdBuffer,
m_renderTarget,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
setImageLayout(m_blitCmdBuffer,
m_denoisedResult,
VK_IMAGE_LAYOUT_GENERAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vkCmdCopyImage(m_blitCmdBuffer,
m_denoisedResult,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
m_renderTarget,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©Region);
setImageLayout(m_blitCmdBuffer,
m_denoisedResult,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_IMAGE_LAYOUT_GENERAL);
setImageLayout(m_blitCmdBuffer,
m_renderTarget,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
then:
// submit draw/raytracing/blit-to-linear m_drawCmdBuffer
VkSubmitInfo submitInfo{};
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &m_drawCmdBuffer;
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = signalSemaphores;
/*...*/
VkFence fence;
vkCreateFence(device(), &fenceInfo, VK_NULL_HANDLE, &fence);
vkQueueSubmit(m_instance.graphicsQueue, 1, &submitInfo, fence);
vkWaitForFences(device(), 1, &fence, VK_TRUE, DEFAULT_FENCE_TIMEOUT);
std::vector optixLayers {
m_resultRGBOptix->optixImage(),
m_resultAlbedoOptix->optixImage()
};
OptixDenoiserParams p {};
m_optix->invoke(&p, optixLayers, 0,0, &m_denoisedResultOptix->optixImage());
VkSemaphore waitSemaphores[] = {
m_denoiseFinished
};
// entry 0 corresponds to semaphore 0 above etc
VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_ALL_COMMANDS_BIT};
// submit blit-to-rendertarget m_blitCmdBuffer
submitInfo = {};
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &m_blitCmdBuffer;
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = waitSemaphores;
submitInfo.pWaitDstStageMask = waitStages;
vkQueueSubmit(m_instance.graphicsQueue, 1, &submitInfo, fence);