Creating a linear tiling VkImage by cuda failed

Since I updated the driver version 550.78 on linux, my code got the wrong result without any error code. I wrote a demo project to verify which part of my core code wasn’t working. The demo can run on driver version 525.89.02 and get correct result.

This is my develop environment:

  • System: Ubuntu 20.04 LTS
  • Graphics Card: NVIDIA RTX A4000
  • Driver version: 550.78

The function of image creation in my demo project:

void* createImageByCuda(uint32_t width, uint32_t height, VkFormat format, VkImageTiling tiling, VkImageUsageFlags usage, VkMemoryPropertyFlags properties, VkImage &image, VkDeviceMemory &imageMemory)
{
VkImageCreateInfo imageInfo{};
imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = width;
imageInfo.extent.height = height;
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = format;
imageInfo.tiling = tiling;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo.usage = usage;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

    VkExternalMemoryImageCreateInfo externalMemoryImageInfo{};
    externalMemoryImageInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
    externalMemoryImageInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
    imageInfo.pNext = &externalMemoryImageInfo;

    if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS)
    {
        throw std::runtime_error("failed to create image!");
    }

    VkMemoryRequirements memRequirements;
    vkGetImageMemoryRequirements(device, image, &memRequirements);

    VkMemoryAllocateInfo allocInfo{};
    allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
    allocInfo.allocationSize = memRequirements.size;
    allocInfo.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties);

    VkImportMemoryFdInfoKHR importStruct = {};
    importStruct.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR;
    importStruct.pNext = nullptr;
    importStruct.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;

    CUdevice cudevice;
    cuInit(0);
    cuDeviceGet(&cudevice, 0);
    cudaSetDevice(0);
    CUdeviceptr d_ptr;
    size_t granularity = 1;
    CUmemGenericAllocationHandle cudaHandle;
    CUmemAllocationHandleType exportHandleType;
    CUmemAllocationProp allocProp = {};
    allocProp.type = CU_MEM_ALLOCATION_TYPE_PINNED;
    allocProp.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
    allocProp.location.id = 0;

    int handle;
    exportHandleType = CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR;
    allocProp.win32HandleMetaData = NULL;
    allocProp.requestedHandleTypes = exportHandleType;

    cuMemGetAllocationGranularity(&granularity, &allocProp, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
    size_t capacity = (memRequirements.size + granularity - 1) / granularity * granularity;
    cuMemAddressReserve(&d_ptr, capacity, granularity, 0U, 0);
    cuMemCreate(&cudaHandle, capacity, &allocProp, 0);
    cuMemExportToShareableHandle((void *)&handle, cudaHandle, exportHandleType, 0);
    cuMemMap(d_ptr, capacity, 0, cudaHandle, 0);
    cuMemRelease(cudaHandle);
    CUmemAccessDesc accessDescriptor = {};
    accessDescriptor.location.id = 0;
    accessDescriptor.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
    accessDescriptor.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
    cuMemSetAccess(d_ptr, capacity, &accessDescriptor, 1);
    void *data = (void *)d_ptr;
    importStruct.fd = handle;
    void *sysHandle = (void *)(long long)handle;

    allocInfo.pNext = &importStruct;

    if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) != VK_SUCCESS)
    {
        throw std::runtime_error("failed to allocate image memory!");
    }

    vkBindImageMemory(device, image, imageMemory, 0);
    return data;
}

All functions return success.

What this code does is create an linear tiling image and allow CUDA to access it directly through CUdeviceptr rather than CudaArray, we will encode the image directly.

The error is that when this image is used as a render target, I can’t get the render result, and when I copy it to host memory via cudamemcpy api, I get a black image, all pixels are 0.

Is there any way to achieve the purpose of my direct access? I also tried to export VkImage to fd and import fd by cuda, but it seems that there will be synchronization issues. This issue also occurs on driver 525.89.02.

Thanks.