Oh, here it’s still available. Anyway I paste it here:
// gcc -o cuda_nvmedia_test ./samples/cuda_nvmedia_test.cpp -I ./nvmedia_include/usr/include/ -I /usr/local/cuda-11.4/targets/aarch64-linux/include -std=c++17 -lnvmedia_dla -lnvscibuf -lnvscisync -lnvmedia2d -lnvmedia_iep_sci -lnvmedia_ide_sci -lnvmedialdc -lpthread -lstdc++ -L /usr/local/cuda-11.4/targets/aarch64-linux/lib/ -lcudart -DDRIVEOS_6
// gcc -o cuda_nvmedia_test ./samples/cuda_nvmedia_test.cpp -I /usr/local/cuda-10.2/targets/aarch64-linux/include -std=c++17 -lnvmedia_dla -lnvscibuf -lnvscisync -lnvmedia_2d -lnvmedia_core -lnvmedia -lpthread -lstdc++ -L /usr/local/cuda-10.2/targets/aarch64-linux/lib/ -lcudart -DDRIVEOS_5_COMPAT
#include <nvscibuf.h>
#include <nvscisync.h>
#ifdef DRIVEOS_5_COMPAT
#include <nvmedia_surface.h>
#include <nvmedia_ldc.h>
#include <nvmedia_2d.h>
#include <nvmedia_image.h>
#include <nvmedia_image_nvscibuf.h>
#include <nvmedia_2d_nvscisync.h>
#include <nvmedia_ldc_nvscisync.h>
#else
#include <nvmedia_6x/nvmedia_ldc.h>
#include <nvmedia_6x/nvmedia_2d.h>
#include <nvmedia_6x/nvmedia_2d_sci.h>
#include <nvmedia_6x/nvmedia_ldc_sci.h>
#include <nvmedia_6x/nvmedia_iep.h>
#include <nvmedia_6x/nvmedia_ldc_util.h>
#endif
#include <cuda_runtime.h>
#include <stddef.h>
#include <cstdint>
#include <vector>
#include <memory>
#include <cstdio>
#include <type_traits>
#include <chrono>
#include <thread>
#include <fstream>
#include <iostream>
#define LOG_ERR(...) printf(__VA_ARGS__); printf("\n")
static constexpr size_t imgW = 2912;
static constexpr size_t imgH = 1080;
std::array<uint8_t, imgW * imgH> inImgCudaY;
std::array<uint8_t, (imgW) * (imgH / 2)> inImgCudaUV;
std::array<uint8_t, imgW * imgH> outImgCpuY;
std::array<uint8_t, (imgW / 2) * (imgH / 2)> outImgCpuU;
std::array<uint8_t, (imgW / 2) * (imgH / 2)> outImgCpuV;
std::array<void*, 3> outImgPtrs{outImgCpuY.data(), outImgCpuU.data(), outImgCpuV.data()};
using ScopedSciBufObj = std::unique_ptr<NvSciBufObjRefRec, std::integral_constant<decltype(NvSciBufObjFree)*, NvSciBufObjFree>>;
using ScopedCpuWaitContext = std::unique_ptr<NvSciSyncCpuWaitContextRec, std::integral_constant<decltype(NvSciSyncCpuWaitContextFree)*, NvSciSyncCpuWaitContextFree>>;
using ScopedNvMedia2D = std::unique_ptr<NvMedia2D, std::integral_constant<decltype(NvMedia2DDestroy)*, NvMedia2DDestroy>>;
using ScopedNvSciSyncAttrList = std::unique_ptr<NvSciSyncAttrListRec, std::integral_constant<decltype(NvSciSyncAttrListFree)*, NvSciSyncAttrListFree>>;
using ScopedNvSciBufAttrList = std::unique_ptr<NvSciBufAttrListRec, std::integral_constant<decltype(NvSciBufAttrListFree)*, NvSciBufAttrListFree>>;
using ScopedSciSyncObj = std::unique_ptr<NvSciSyncObjRec, std::integral_constant<decltype(NvSciSyncObjFree)*, NvSciSyncObjFree>>;
#ifdef DRIVEOS_5_COMPAT
static void initNvMediaImageNvSciBuf()
{
static constexpr auto initLambda{[]() -> void* {
if(auto const err{NvMediaImageNvSciBufInit()}; err != NVMEDIA_STATUS_OK) {
LOG_ERR("Could not init NvMediaImageNvSciBuf: %d", err);
return nullptr;
}
return reinterpret_cast<void*>(1); // important to return something non-null (unique_ptr won't call the deleter with nullptr)
}};
static constexpr auto dummyDeleterFunctionPtr{
+[](void*) {
#ifdef DRIVEOS_5_COMPAT
NvMediaImageNvSciBufDeinit();
#endif
}
};
static std::unique_ptr<void, void(*)(void*)> const nvMediaSciBufInitDeinit{
initLambda(),
dummyDeleterFunctionPtr
};
}
static NvMediaDevice* getNvMediaDevice()
{
static constexpr auto initLambda{[]() {
initNvMediaImageNvSciBuf();
auto const nvMediaDevice{NvMediaDeviceCreate()};
if(nvMediaDevice == nullptr) {
LOG_ERR("Could not create NvMediaDevice");
}
return nvMediaDevice;
}};
static std::unique_ptr<NvMediaDevice, std::integral_constant<decltype(NvMediaDeviceDestroy)*, NvMediaDeviceDestroy>> const nvMediaDevice{initLambda()};
return nvMediaDevice.get();
}
#endif
static NvSciBufModule getNvSciBufModule()
{
static constexpr auto initLambda{[]() -> NvSciBufModule {
#ifdef DRIVEOS_5_COMPAT
initNvMediaImageNvSciBuf();
#endif
NvSciBufModule rawNvSciBufModule{};
if(auto const error{NvSciBufModuleOpen(&rawNvSciBufModule)}; error != NvSciError_Success)
{
LOG_ERR("NvSciBufModuleOpen failed: %d", error);
return nullptr;
}
return rawNvSciBufModule;
}};
static std::unique_ptr<NvSciBufModuleRec, std::integral_constant<decltype(NvSciBufModuleClose)*, NvSciBufModuleClose>> const nvSciBufModule{initLambda()};
return nvSciBufModule.get();
}
static NvSciSyncModule getNvSciSyncModule()
{
static constexpr auto initLambda{[]() -> NvSciSyncModule {
#ifdef DRIVEOS_5_COMPAT
initNvMediaImageNvSciBuf();
#endif
NvSciSyncModule rawNvSciSyncModule{};
if(auto const error{NvSciSyncModuleOpen(&rawNvSciSyncModule)}; error != NvSciError_Success)
{
LOG_ERR("NvSciSyncModuleOpen failed: %d", error);
return nullptr;
}
return rawNvSciSyncModule;
}};
static std::unique_ptr<NvSciSyncModuleRec, std::integral_constant<decltype(NvSciSyncModuleClose)*, NvSciSyncModuleClose>> const nvSciSyncModule{initLambda()};
return nvSciSyncModule.get();
}
#ifdef DRIVEOS_5_COMPAT
NvSciBufAttrList nvMediaSurfAttrsToNvSciBufAttrList(NvMediaSurfaceType surfType, NvMediaSurfAllocAttr const *surfAttrs, uint32_t numSurfAttrs, NvSciBufAttrValAccessPerm accessPerm)
{
// NvMedia
ScopedNvSciBufAttrList nvmediaAttr{nullptr};
{
NvSciBufAttrList rawNvMediaAttr{nullptr};
if (auto const err{NvSciBufAttrListCreate(getNvSciBufModule(), &rawNvMediaAttr)}; NvSciError_Success != err) {
LOG_ERR("Couldn't create NvSciBufAttr");
return nullptr;
}
nvmediaAttr.reset(rawNvMediaAttr);
}
if (auto const err{NvMediaImageFillNvSciBufAttrs(getNvMediaDevice(), surfType, surfAttrs, numSurfAttrs, 0, nvmediaAttr.get())}; NVMEDIA_STATUS_OK != err) {
LOG_ERR("Couldn't fill NvSciBufAttrs");
return nullptr;
}
// CUDA + NPPI
ScopedNvSciBufAttrList cudaAttr{nullptr};
{
static constexpr NvSciBufType bufType = NvSciBufType_Image;
{
NvSciBufAttrList rawCudaAttr{nullptr};
if (auto const err{NvSciBufAttrListCreate(getNvSciBufModule(), &rawCudaAttr)}; NvSciError_Success != err) {
LOG_ERR("Couldn't create NvSciBufAttr");
return nullptr;
}
cudaAttr.reset(rawCudaAttr);
}
{
NvSciBufAttrKeyValuePair setAttrs[] = {
{ NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
{ NvSciBufGeneralAttrKey_RequiredPerm, &accessPerm, sizeof(accessPerm) },
};
if (auto const err{NvSciBufAttrListSetAttrs(cudaAttr.get(), setAttrs, sizeof(setAttrs) / sizeof(NvSciBufAttrKeyValuePair))}; NvSciError_Success != err) {
LOG_ERR("Couldn't fill NvSciBufAttrs");
return nullptr;
}
}
}
/* Reconcile the NvSciBufAttrs and then allocate an NvSciBufObj. */
NvSciBufAttrList attrList[2] = {nvmediaAttr.get(), cudaAttr.get()};
NvSciBufAttrList reconciled{nullptr};
NvSciBufAttrList conflicts{nullptr};
if (auto const err{NvSciBufAttrListReconcile(attrList, 2, &reconciled, &conflicts)}; NvSciError_Success != err) {
LOG_ERR("Couldn't reconcile NvSciBufAttr and allocate NvSciBufObj");
if(conflicts != nullptr)
{
NvSciBufAttrListFree(conflicts);
}
}
return reconciled;
}
#else
static const std::vector<NvSciRmGpuId>& getGpuIds()
{
static const auto initLambda{[]() -> std::vector<NvSciRmGpuId> {
int cudaDeviceCount{0};
if (auto const err{cudaGetDeviceCount(&cudaDeviceCount)}; err != cudaSuccess)
{
LOG_ERR("Couldn't get cuda device count: %d", err);
return {};
}
std::vector<NvSciRmGpuId> result{};
result.reserve(cudaDeviceCount);
for(int currDevice{0}; currDevice < cudaDeviceCount; ++currDevice)
{
cudaDeviceProp currDeviceProp{};
if (auto const err{cudaGetDeviceProperties(&currDeviceProp, currDevice)}; err != cudaSuccess)
{
LOG_ERR("Couldn't get cuda device properties: %d", err);
return {};
}
result.emplace_back(NvSciRmGpuId{});
memcpy(result.back().bytes, currDeviceProp.uuid.bytes, 16 * sizeof(uint8_t));
}
return result;
}};
static const std::vector<NvSciRmGpuId> gpuIds{initLambda()};
return gpuIds;
}
#endif
ScopedSciBufObj allocateImage(uint32_t width, uint32_t height)
{
ScopedNvSciBufAttrList imageAttrReconciled{nullptr};
#ifdef DRIVEOS_5_COMPAT
{
NvMediaSurfAllocAttr surfAllocAttr[] = {
{ NVM_SURF_ATTR_WIDTH, width },
{ NVM_SURF_ATTR_HEIGHT, height },
{ NVM_SURF_ATTR_EMB_LINES_TOP, 0 },
{ NVM_SURF_ATTR_EMB_LINES_BOTTOM, 0 },
{ NVM_SURF_ATTR_CPU_ACCESS, NVM_SURF_ATTR_CPU_ACCESS_CACHED },
{ NVM_SURF_ATTR_ALLOC_TYPE, NVM_SURF_ATTR_ALLOC_ISOCHRONOUS },
{ NVM_SURF_ATTR_PEER_VM_ID, 0 },
{ NVM_SURF_ATTR_SCAN_TYPE, NVM_SURF_ATTR_SCAN_PROGRESSIVE },
{ NVM_SURF_ATTR_COLOR_STD_TYPE, NVM_SURF_ATTR_COLOR_STD_REC709_ER }
};
NvMediaSurfFormatAttr surfFormatAttr[NVM_SURF_FMT_ATTR_MAX]{};
NVM_SURF_FMT_SET_ATTR_YUV(surfFormatAttr, YUV, 420, SEMI_PLANAR, UINT, 8, BL);
NvMediaSurfaceType const surfType = NvMediaSurfaceFormatGetType(surfFormatAttr, NVM_SURF_FMT_ATTR_MAX);
NvSciBufAttrList const rawImageAttrReconciled{nvMediaSurfAttrsToNvSciBufAttrList(surfType, surfAllocAttr, sizeof(surfAllocAttr) / sizeof(NvMediaSurfAllocAttr), NvSciBufAccessPerm_ReadWrite)};
imageAttrReconciled.reset(rawImageAttrReconciled);
}
#else
static constexpr NvSciBufAttrValImageLayoutType layout{NvSciBufImage_BlockLinearType};
static constexpr NvSciBufAttrValAccessPerm accessPerm{NvSciBufAccessPerm_ReadWrite};
static constexpr NvSciBufType bufType{NvSciBufType_Image};
static constexpr bool needCPUAccess{true};
static constexpr bool enableCpuCache{true};
static constexpr uint32_t planeCount{2};
static constexpr uint64_t padding[]{0, 0};
static constexpr NvSciBufAttrValColorFmt colorFormat[]{NvSciColor_Y8, NvSciColor_U8_V8};
static constexpr NvSciBufAttrValColorStd colorStd[]{NvSciColorStd_REC709_ER, NvSciColorStd_REC709_ER};
static constexpr int32_t planeBaseAddrAlign[]{1024, 1024};
uint32_t const planeWidth[]{width, width / 2};
uint32_t const planeHeight[]{height, height / 2};
static constexpr NvSciBufAttrValImageScanType scanType{NvSciBufScan_ProgressiveType};
static constexpr bool vpr{false};
static constexpr uint64_t imageCount{1};
static std::vector<NvSciRmGpuId> const& gpuIds{getGpuIds()};
static std::vector<NvSciBufAttrValGpuCache> const& gpuCache{[](){
std::vector<NvSciBufAttrValGpuCache> result{};
result.reserve(gpuIds.size());
for(auto const& currGpu: gpuIds) {
result.emplace_back(NvSciBufAttrValGpuCache{currGpu, false});
}
return result;
}()};
if(gpuIds.empty())
{
LOG_ERR("Couldn't get GPU vector.");
return nullptr;
}
ScopedNvSciBufAttrList imageAttr{nullptr};
{
{
NvSciBufAttrList rawImageAttr{nullptr};
if (auto const err{NvSciBufAttrListCreate(getNvSciBufModule(), &rawImageAttr)}; NvSciError_Success != err) {
LOG_ERR("Couldn't create NvSciBufAttr: %d", err);
return nullptr;
}
imageAttr.reset(rawImageAttr);
}
{
NvSciBufAttrKeyValuePair setAttrs[] = {
{ NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
{ NvSciBufGeneralAttrKey_RequiredPerm, &accessPerm, sizeof(accessPerm) },
{ NvSciBufGeneralAttrKey_NeedCpuAccess, &needCPUAccess, sizeof(needCPUAccess) },
{ NvSciBufGeneralAttrKey_EnableCpuCache, &enableCpuCache, sizeof(enableCpuCache) },
{ NvSciBufGeneralAttrKey_GpuId, gpuIds.data(), sizeof(NvSciRmGpuId) * gpuIds.size() },
{ NvSciBufGeneralAttrKey_EnableGpuCache, gpuCache.data(), sizeof(NvSciBufAttrValGpuCache) * gpuCache.size() },
{ NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) },
{ NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) },
{ NvSciBufImageAttrKey_TopPadding, &padding, sizeof(padding) },
{ NvSciBufImageAttrKey_BottomPadding, &padding, sizeof(padding) },
{ NvSciBufImageAttrKey_LeftPadding, &padding, sizeof(padding) },
{ NvSciBufImageAttrKey_RightPadding, &padding, sizeof(padding) },
{ NvSciBufImageAttrKey_PlaneColorFormat, &colorFormat, sizeof(colorFormat) },
{ NvSciBufImageAttrKey_PlaneColorStd, &colorStd, sizeof(colorStd) },
{ NvSciBufImageAttrKey_PlaneBaseAddrAlign, &planeBaseAddrAlign, sizeof(planeBaseAddrAlign) },
{ NvSciBufImageAttrKey_PlaneWidth, &planeWidth, sizeof(planeWidth) },
{ NvSciBufImageAttrKey_PlaneHeight, &planeHeight, sizeof(planeHeight) },
{ NvSciBufImageAttrKey_ScanType, &scanType, sizeof(scanType) },
{ NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) },
{ NvSciBufImageAttrKey_ImageCount, &imageCount, sizeof(imageCount) },
};
if (auto const err{NvSciBufAttrListSetAttrs(imageAttr.get(), setAttrs, sizeof(setAttrs) / sizeof(NvSciBufAttrKeyValuePair))}; NvSciError_Success != err) {
LOG_ERR("Couldn't fill NvSciBufAttrs: %d", err);
return nullptr;
}
}
// We need to call this to be able to use the buffer from HW engines
if (auto const err{NvMedia2DFillNvSciBufAttrList(nullptr, imageAttr.get())}; NVMEDIA_STATUS_OK != err) {
LOG_ERR("NvMedia2DFillNvSciBufAttrList failed: %d", err);
return nullptr;
}
}
{
NvSciBufAttrList attrList[]{imageAttr.get()};
NvSciBufAttrList reconciled{nullptr};
NvSciBufAttrList conflicts{nullptr};
if (auto const err{NvSciBufAttrListReconcile(attrList, sizeof(attrList) / sizeof(NvSciBufAttrList), &reconciled, &conflicts)}; NvSciError_Success != err)
{
LOG_ERR("Couldn't reconcile NvSciBufAttr and allocate NvSciBufObj: %d", err);
if(conflicts != nullptr)
{
void* dump{};
size_t len{};
if (auto const err{NvSciBufAttrListDebugDump(conflicts, &dump, &len)}; NvSciError_Success != err) {
LOG_ERR("NvSciBufAttrListDebugDump failed: %d", err);
} else {
LOG_ERR("Conflicted args: %-*s", static_cast<int>(len), static_cast<char*>(dump));
}
NvSciBufAttrListFree(conflicts);
} else {
LOG_ERR("No conflict list");
}
}
imageAttrReconciled.reset(reconciled);
}
#endif
NvSciBufObj result{nullptr};
if (auto const err{NvSciBufObjAlloc(imageAttrReconciled.get(), &result)}; err != NvSciError_Success) {
LOG_ERR("NvSciBufObjAlloc failed: %d", err);
return nullptr;
}
return ScopedSciBufObj{result};
}
int main()
{
// Allocate images
auto srcImage{allocateImage(imgW, imgH)};
uint64_t fullBufferSize{};
struct CUexternalMemory_st *externalMemory;
struct cudaMipmappedArray *mipmapArray[2];
struct cudaArray *imagePlanes[2];
unsigned long long cudaSurfaceNvmediaBuf[2];
{
// Get cuda maps
NvSciBufAttrList sciBufAttr{nullptr};
// Don't need to free the returned attribute list
if (auto const err{NvSciBufObjGetAttrList(srcImage.get(), &sciBufAttr)}; NvSciError_Success != err) {
LOG_ERR("Couldn't get sciBufObj attribute list: %d", err);
}
NvSciBufAttrKeyValuePair queriedAttrs[]{
{ NvSciBufImageAttrKey_Size, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneChannelCount, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneOffset, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneWidth, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneHeight, nullptr, 0 },
{ NvSciBufImageAttrKey_PlanePitch, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneBitsPerPixel, nullptr, 0 },
{ NvSciBufImageAttrKey_PlaneCount, nullptr, 0 },
};
// Query args returned filled by NvMedia
if (auto const err{NvSciBufAttrListGetAttrs(sciBufAttr, queriedAttrs, sizeof(queriedAttrs) / sizeof(NvSciBufAttrKeyValuePair))}; NvSciError_Success != err) {
LOG_ERR("Couldn't get NvSciBufAttrs: %d", err);
}
fullBufferSize = *static_cast<uint64_t const*>(queriedAttrs[0].value);
uint8_t const *const channelCountPerPlane{static_cast<uint8_t const*>(queriedAttrs[1].value)};
uint64_t const *planeOffset{static_cast<uint64_t const*>(queriedAttrs[2].value)};
uint32_t const *planeWidth{static_cast<uint32_t const*>(queriedAttrs[3].value)};
uint32_t const *planeHeight{static_cast<uint32_t const*>(queriedAttrs[4].value)};
uint32_t const *planePitch{static_cast<uint32_t const*>(queriedAttrs[5].value)};
uint32_t const *planePixelBits{static_cast<uint32_t const*>(queriedAttrs[6].value)};
uint32_t const planeCount{*static_cast<uint32_t const*>(queriedAttrs[7].value)};
// Import external memory
{
cudaExternalMemoryHandleDesc memHandleDesc{};
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
memHandleDesc.handle.nvSciBufObject = srcImage.get();
memHandleDesc.size = fullBufferSize;
if(auto const error{cudaImportExternalMemory(&externalMemory, &memHandleDesc)}; error != cudaSuccess)
{
LOG_ERR("Could not import external memory: %d", error);
}
}
for (uint32_t planeIndex{0}; planeIndex < planeCount; ++planeIndex)
{
LOG_ERR("Plane %d W: %d H: %d P: %d", planeIndex, planeWidth[planeIndex], planeHeight[planeIndex], planePitch[planeIndex]);
/* Create mipmapArray */
cudaExtent const extent{make_cudaExtent(planeWidth[planeIndex], planeHeight[planeIndex], 0)}; // It works only on DriveOS 5.2.6, on DriveOS 6.0.8.1 the image breaks
//cudaExtent const extent{make_cudaExtent(planePitch[planeIndex] / channelCountPerPlane[planeIndex], planeHeight[planeIndex], 0)}; // It makes both versions work
cudaChannelFormatDesc desc{};
uint32_t const bitsPerPixel{planePixelBits[planeIndex] / channelCountPerPlane[planeIndex]};
switch (channelCountPerPlane[planeIndex]) {
case 1:
desc = cudaCreateChannelDesc(bitsPerPixel, 0, 0, 0, cudaChannelFormatKindUnsigned);
break;
case 2:
desc = cudaCreateChannelDesc(bitsPerPixel, bitsPerPixel, 0, 0, cudaChannelFormatKindUnsigned);
break;
default:
LOG_ERR("Unsupported channel count");
}
cudaExternalMemoryMipmappedArrayDesc mipmapDesc{};
mipmapDesc.offset = planeOffset[planeIndex];
mipmapDesc.formatDesc = desc;
mipmapDesc.extent = extent;
mipmapDesc.flags = 0;
mipmapDesc.numLevels = 1;
if(auto const error{cudaExternalMemoryGetMappedMipmappedArray(&mipmapArray[planeIndex], externalMemory, &mipmapDesc)}; error != cudaSuccess)
{
LOG_ERR("Could not create mipmapped array: %d", error);
}
/* Create cuda arrays */
if(auto const error{cudaGetMipmappedArrayLevel(&imagePlanes[planeIndex], mipmapArray[planeIndex], 0 /*mipmapLevel*/)}; error != cudaSuccess)
{
LOG_ERR("Could not create cuda array from mipmapArray: %d", error);
}
/* Create cuda surface */
{
cudaResourceDesc resourceDesc{};
resourceDesc.resType = cudaResourceTypeArray;
resourceDesc.res.array.array = imagePlanes[planeIndex];
if(auto const error{cudaCreateSurfaceObject(&cudaSurfaceNvmediaBuf[planeIndex], &resourceDesc)}; error != cudaSuccess)
{
LOG_ERR("Could not create cuda surface from array: %d", error);
}
}
}
}
// init input image
for(size_t y = 0; y < imgH; ++y)
{
for(size_t x = 0; x < imgW; ++x)
{
inImgCudaY[y * imgW + x] = (y * 128 / imgH) + (x * 128 / imgW);
if(x % 2 == 0 && y % 2 == 0)
{
inImgCudaUV[(y / 2) * imgW + x] = (y * 128 / imgH) + (x * 128 / imgW);
inImgCudaUV[(y / 2) * imgW + x + 1] = 255 - (y * 128 / imgH) + (x * 128 / imgW);
}
}
}
// Fill image from cuda
{
cudaError_t cudaError = cudaSuccess;
cudaError = cudaMemcpy2DToArray(imagePlanes[0], 0, 0, inImgCudaY.data(), imgW, imgW, imgH, cudaMemcpyHostToDevice);
if(cudaError != cudaSuccess)
{
LOG_ERR("Couldn't copy image to host memory, cudaMemcpy failed: %d", cudaError);
return -1;
}
cudaError = cudaMemcpy2DToArray(imagePlanes[1], 0, 0, inImgCudaUV.data(), imgW, imgW, imgH / 2, cudaMemcpyHostToDevice);
if(cudaError != cudaSuccess)
{
LOG_ERR("Couldn't copy image to host memory, cudaMemcpy failed: %d", cudaError);
return -1;
}
}
// Get output data
{
std::array<uint32_t, 3> outImgPitches{imgW, imgW / 2, imgW / 2};
#ifdef DRIVEOS_5_COMPAT
NvMediaImage* nvMediaImage{};
if (auto const err{NvMediaImageCreateFromNvSciBuf(getNvMediaDevice(), srcImage.get(), &nvMediaImage)}; err != NVMEDIA_STATUS_OK) {
LOG_ERR("NvMediaImageCreateFromNvSciBuf failed: %d", err);
return -1;
}
NvMediaImageSurfaceMap surfaceMap{};
if(auto const error{NvMediaImageLock(nvMediaImage, NVMEDIA_IMAGE_ACCESS_READ, &surfaceMap)}; error != NVMEDIA_STATUS_OK)
{
LOG_ERR("NvMediaImageLock failed: %d", error);
}
if(auto const error{NvMediaImageGetBits(nvMediaImage, nullptr, const_cast<void**>(outImgPtrs.data()), outImgPitches.data())}; error != NVMEDIA_STATUS_OK)
{
LOG_ERR("NvMediaImageGetBits failed: %d", error);
}
NvMediaImageUnlock(nvMediaImage);
#else
std::array<uint32_t, 3> outImgSizes{outImgCpuY.size(), outImgCpuU.size(), outImgCpuV.size()};
if(auto const error{NvSciBufObjGetPixels(srcImage.get(), nullptr, outImgPtrs.data(), outImgSizes.data(), outImgPitches.data())}; error != NvSciError_Success) {
LOG_ERR("NvSciBufObjGetPixels failed: %d", error);
return -1;
}
#endif
}
// Save images
{
std::ofstream imgFs("img_cuda" + std::to_string(imgW) + "x" + std::to_string(imgH) + ".yuv", std::ios::out | std::ios::binary | std::ios::trunc);
imgFs.write(reinterpret_cast<char*>(outImgCpuY.data()), outImgCpuY.size());
imgFs.write(reinterpret_cast<char*>(outImgCpuU.data()), outImgCpuU.size());
imgFs.write(reinterpret_cast<char*>(outImgCpuV.data()), outImgCpuV.size());
imgFs.close();
}
}