How to use large size DMA buffer with dma_alloc_coherent

Hi,

I would like to allocate 2MB of DMA buffer using dma_alloc_coherent. I originally intended to use pci_alloc_consistent, but this API fails when trying to allocate a 2MB buffer.

JetPack and kernel versions are as follows:

  • JetPack version
    JetPack 4.2.1.

  • Kernel
    Linux tegra-ubuntu 4.9.140-tegra #1 SMP PREEMPT Tue Jul 16 17:09:53 PDT 2019 aarch64 aarch64 aarch64 GNU/Linux

The source code and Print output are listed below:

============================================
#define PRINTF(…) printk(VA_ARGS)

typedef struct {

caddr_t pDmaBuf;
dma_addr_t pDmaAdr;
ULONG bDmaSize;

} BoardDEV;

static int32_t attach_dmamem(BoardDEV *dev)
{
ULONG length;
char *szMethod;

PRINTF("%s() Entry\n",__func__);
if(dev!=NULL)
{
	for(length=DMA_BUFFER_LENGTH;length>0x100;length/=2)
	{
		szMethod="dma_alloc_coherent";
		dev->pDmaBuf=dma_alloc_coherent(NULL,length,&dev->pDmaAdr,GFP_KERNEL|GFP_DMA);

		if(dev->pDmaBuf!=NULL)
		{
			dev->bDmaSize=length;
			PRINTF("%s() %s(%08lx) Success\n",__func__,szMethod,dev->bDmaSize);
			PRINTF("  size=0x%lxbytes\n",dev->bDmaSize);				/* size=0x80000bytes */
			PRINTF("  cpuaddr=0x%p\n",dev->pDmaBuf);				/* cpuaddr=0xffffff800b081000 */
			PRINTF("  devaddr=0x%p\n",(void*)dev->pDmaAdr);				/* devaddr=0x00000000fff00000 */
			PRINTF(" (busaddr=0x%p)\n",(void*)virt_to_phys(dev->pDmaBuf));		/* busaddr=0x0000000083281000 */
			PRINTF("%s() exit\n",__func__);
			return(0);
		}
		else
		{
			PRINTF("%s() alloc (%08lx) Failure\n",__func__,length);
		}
	}
}

dev->pDmaBuf=NULL;
dev->bDmaSize=0;
PRINTF("%s() exit\n",__func__);
return(-EINVAL);

}

int drv_probe(struct pci_dev *devp,const struct pci_device_id *entp)
{

attach_dmamem(dev);

}

static int doDmaTrans(BoardDEV *dev,DWORD dmaOffset,ULONG ddrOffset,int length,int pci2local)
{

DWORD mask[2];
ULONG dmaAddr;
DWORD read_addressL;
DWORD write_addressL;
DWORD read_addressH;
DWORD write_addressH;

if((dmaAddr=dev->pDmaAdr)!=0)
{
	PRINTF("%s(dma:+%08x,ddr:+%08lx) Entry. dmaAddr:0x%08lx\n",__func__,dmaOffset,ddrOffset,dmaAddr);
	fpgaWL(dev,0x1000,0xfffffffc);		// 0x1000 is the offset for translation register
	fpgaWL(dev,0x1004,0xffffffff);

	// reading out the resulted mask of path through
	mask[0]=fpgaRL(dev,0x1000);			/* Low  32Bit (ff000000) */
	mask[1]=fpgaRL(dev,0x1004);			/* High 32Bit (ffffffff) */
	PRINTF("%s() mask[0]:0x%08x, mask[1]:0x%08x\n",__func__,mask[0],mask[1]);

	// program address translation table
	dmaAddr+=dmaOffset;
	fpgaWL(dev,0x1000,(DWORD)(dmaAddr&mask[0])|1);		/* setting lower address */
	fpgaWL(dev,0x1004,(DWORD)((dmaAddr>>32)&mask[1]));	/* setting upper address */
	PRINTF("%s() fpgaWL(0x1000,0x%08x)\n",__func__,(DWORD)(dmaAddr&mask[0])|1);
	PRINTF("%s() fpgaWL(0x1004,0x%08x)\n",__func__,(DWORD)((dmaAddr>>32)&mask[1]));
		:
	if(pci2local)
	{
		// generate buffer base addresses and lengths
		read_addressL = (DWORD)dmaAddr&~mask[0];
		read_addressH = (DWORD)(PCIADRMAP>>32);			/* PCIADRMAP:0x3_0000_0000 */
			:
	}
	else
	{
		// generate buffer base addresses and lengths
		read_addressL = (DWORD)ddrOffset;			//ddrOffset is the SOPC address for DDR/OCM
		read_addressH = (DWORD)(ddrOffset>>32);			//ddrOffset is the SOPC address for DDR/OCM
			:
	}

	fpgaWL(dev,MSGDMADESC+0x00,read_addressL);
	PRINTF("%s() fpgaWL(0x%08x,0x%08x)\n",__func__,MSGDMADESC+0x00,read_addressL);
	fpgaWL(dev,MSGDMADESC+0x04,write_addressL);
	PRINTF("%s() fpgaWL(0x%08x,0x%08x)\n",__func__,MSGDMADESC+0x04,write_addressL);
		:
	fpgaWL(dev,MSGDMADESC+0x14,read_addressH);
	PRINTF("%s() fpgaWL(0x%08x,0x%08x)\n",__func__,MSGDMADESC+0x14,read_addressH);
	fpgaWL(dev,MSGDMADESC+0x18,write_addressH);
	PRINTF("%s() fpgaWL(0x%08x,0x%08x)\n",__func__,MSGDMADESC+0x18,write_addressH);
		:

}
	:

}

Print Log:

10 0:00:00.014437(+0.000003) 5:doDmaTrans(dma:+00000000,ddr:+00100000) Entry. dmaAddr:0xe0100000
11 0:00:00.014444(+0.000007) 5:doDmaTrans() mask[0]:0xff000000, mask[1]:0xffffffff
12 0:00:00.014445(+0.000001) 5:doDmaTrans() fpgaWL(0x1000,0xe0000001)
13 0:00:00.014446(+0.000001) 5:doDmaTrans() fpgaWL(0x1004,0x00000000)
14 0:00:00.014450(+0.000004) 5:doDmaTrans() fpgaWL(0x00018000,0x00100000)
15 0:00:00.014451(+0.000001) 5:doDmaTrans() fpgaWL(0x00018004,0x00100000)
16 0:00:00.014453(+0.000002) 5:doDmaTrans() fpgaWL(0x00018014,0x00000000)
17 0:00:00.014454(+0.000001) 5:doDmaTrans() fpgaWL(0x00018018,0x00000003)
18 0:00:00.028845(+0.014391) 5:doDmaTrans(dma:+00100000,ddr:+00200000) Entry. dmaAddr:0xe0100000
19 0:00:00.028853(+0.000008) 5:doDmaTrans() mask[0]:0xff000000, mask[1]:0xffffffff
20 0:00:00.028854(+0.000001) 5:doDmaTrans() fpgaWL(0x1000,0xe0000001)
21 0:00:00.028854(+0.000000) 5:doDmaTrans() fpgaWL(0x1004,0x00000000)
22 0:00:00.028858(+0.000004) 5:doDmaTrans() fpgaWL(0x00018000,0x00200000)
23 0:00:00.028859(+0.000001) 5:doDmaTrans() fpgaWL(0x00018004,0x00200000)
24 0:00:00.028860(+0.000001) 5:doDmaTrans() fpgaWL(0x00018014,0x00000000)
25 0:00:00.028861(+0.000001) 5:doDmaTrans() fpgaWL(0x00018018,0x00000003)

============================================

Although the 2MB buffer can now be allocated using dma_alloc_coherent. However the comparison does not match and the following error occurs in the system message when DMA is actually operated,
Am I using dma_alloc_coherent incorrectly? I would appreciate any guidance on how to resolve this error.

=Error log=
mc-err: (255) csr_afir: EMEM address decode error arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0200000, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0203340, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0206f40, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe020a000, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe020d480, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0210500, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0213180, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0216980, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0 arm-smmu 12000000.iommu: Unhandled context fault: iova=0xe0219300, fsynr=0x2, cb=22, sid=17(0x11 - AFI), pgd=1671d4003, pud=1671d4003, pmd=0, pte=0

Best Regards,
UNA

Please refer to below thread to see if can help: How to allocate large DMA memory - Jetson & Embedded Systems / Jetson TX2 - NVIDIA Developer Forums

Hi Kayccc,

I have already checked the following thread, but I think it is different from my problem.

How to allocate large DMA memory - Jetson & Embedded Systems / Jetson TX2 - NVIDIA Developer Forums

Best Regards,
UNA

Hi Kayccc,

Is there anything I should check about this issue or can you give me some advice on how to solve the problem?

Best Regards,
UNA