Immediate dma_write() and dma_read() cause system crash

Dear supporters,

My SDK is JetPack_4.6.1_Linux_JETSON_XAVIER_NX_TARGETS.
I modified pcie-tegra.c (at drivers/pci/dwc/) to communicate EP(FPGA) and RC (Xavier NX) using DMA and ioctl;
RC is providing misc_device to allow an application access DMA memory by mmap(). Single and discrete dma_write() or dma_read() are OK but immediate and contiguous (pair) dma_write() and dma_read() caused system crash.

Pseudo code;

  1. write FPGA’s memory bank number through dma_write() as follow;

    tx.dst = pcie->dst = RC_DMA_PHY_ADDR + 0x200; /* 0x1f40000000, DMA memory address /
    /
    adding an offset as 0x200 is FPGA engineer’s request to identify its internal memory bank */
    dst_cpu_virt = ioremap(pcie->dst, pcie->size);
    memcpy(pcie->cpu_virt_addr, &((struct ioctl_info *)data)->chan, tx.size);
    ret = sci_dma_write(pcie, &tx);

    I could check the correct (intended) memory bank number was written at RC_DMA_PHY_ADDR + 0x200. I’ve check this with Vivado tool.

  2. try to read data
    After above dma_write(), I immediately tried to read FPGA’s memory bank at RC_DMA_PHY_ADDR but the system crashed.

Do you have any idea about this weird situation?
If you need full source code please let me know, I will provide you in private.

Thanks in advance,
Matt

Here goes the test application source code;

				for(int j=0; j<48/* max channel(bank) */; j++) {
					/* select channel #: write channel_number at DMA Address + 0x200 (protocol) first,
					 * and read filtered data.
					 * channel # should be set by user-level app. (this application knows about this).
					 */
					set_info.chan = j;
					set_info.size = 1;

					/* set channel # */
					do {
						ret = ioctl(dma_fd, DPC_RTE_RDY_DMA, &set_info);
						if (ret != 0) {
							printf("E has occurred!: %d(%s)\n", __LINE__, strerror(errno));
							goto out;
						}
					} while((ret==0) && (errno==EINTR));

					/* read data */
					memset(&set_info, 0x00, sizeof(struct ioctl_info));
					for(int i=1; (i*DMA_MAX_RD_REQ_SIZE)<=totlen; i++) {
						set_info.cnt = i-1;
						set_info.size = DMA_MAX_RD_REQ_SIZE;
						printf("cnt: %lu, chan #: %d\n", set_info.cnt, set_info.chan);
						do {
							ret = ioctl(dma_fd, DPC_ETR_INI_DMA, &set_info);
							if (ret != 0) {
								printf("E has occurred!: %d(%s)\n", __LINE__, strerror(errno));
								goto out;
							} else {
								//printf("recvd: %s\n", set_info.buf);
								uiRcvCnt =+ set_info.size;
							}
						} while((ret==0) && (errno==EINTR));
					}
				}

and the device driver source code;

		case DPC_RTE_RDY_DMA:
			/* notify EP settlement of DMA read op. is ready */
		   if (copy_from_user((void *)&get_info, (void __user *)arg, (unsigned long)sizeof(struct ioctl_info))) {
			   ret = -EFAULT;
			   break;
		   }

		   if (sci_write_chan(scidma_dev->pcie, &get_info) > 0) {
			   //OK
		   } else {
			   ret = -EIO;
			   break; /* notify error has occurred */
		   }
		   break;

static int sci_write_chan(struct tegra_pcie_dw *pdev, void *data)
{
	struct tegra_pcie_dw *pcie = pdev;
	struct dma_tx tx;
	int ret = 0;
	void __iomem *dst_cpu_virt;

	memset(&tx, 0x0, sizeof(struct dma_tx));

	tx.src = pcie->src = pdev->dma_addr;  /*   0xF8000000, BAR: not data-src address for data transfer */
	pcie->dst = RC_DMA_PHY_ADDR;
	tx.dst = RC_DMA_PHY_ADDR + 0x200; /* 0x1f40000000, DMA memory address */
	tx.size = pcie->size = 1; /* just 1 byte: channel # */
	tx.channel = (pcie->channel)?(pcie->channel):0/* default channel # is 0 */;

	dst_cpu_virt = ioremap(pcie->dst, pcie->size);

	/* fill source with random data */
	#if 0
	get_random_bytes(pcie->cpu_virt_addr, pcie->size);
	#endif
	memcpy(pcie->cpu_virt_addr, &((struct ioctl_info *)data)->chan, tx.size);

	ret = sci_dma_write(pcie, &tx);
	if (ret <= 0) {
		dev_err(pcie->dev, "DMA-Write test FAILED (%s:%u)\n", __func__, __LINE__);
		ret = -EIO;
		goto err_out;
	}

err_out:
	iounmap(dst_cpu_virt);
	return ret;
}


static int sci_dma_write(struct tegra_pcie_dw *pcie, struct dma_tx *tx)
{
	struct dw_pcie *pci = &pcie->pci;
	struct device *dev = pcie->dev;
	u32 val = 0, bit = 0;
	int ret = 0;
	unsigned long now, timeout = msecs_to_jiffies(6000);

	if (tx->channel > 3) {
		dev_err(dev, "Invalid channel num, should be within [0~3]\n");
		return -EINVAL;
	}

	/* acquire lock for channel HW */
	mutex_lock(&pcie->wr_lock[tx->channel]);

	/* program registers */
	/* Enable Write Engine */
	dma_common_wr(pci->atu_base, DMA_WRITE_ENGINE_EN_OFF_ENABLE,
		      DMA_WRITE_ENGINE_EN_OFF);

	/* Un Mask DONE and ABORT interrupts */
	val = dma_common_rd(pci->atu_base, DMA_WRITE_INT_MASK_OFF);
	val &= ~(1 << tx->channel);		/* DONE */
	val &= ~(1 << ((tx->channel) + 16));	/* ABORT */
	dma_common_wr(pci->atu_base, val, DMA_WRITE_INT_MASK_OFF);

	val = dma_channel_rd(pci->atu_base, tx->channel,
			     DMA_CH_CONTROL1_OFF_WRCH);
	if (tx->ll)
		val = DMA_CH_CONTROL1_OFF_WRCH_LLE;
	else
		val = DMA_CH_CONTROL1_OFF_WRCH_LIE;
	dma_channel_wr(pci->atu_base, tx->channel, val,
		       DMA_CH_CONTROL1_OFF_WRCH);

	if (tx->ll) {
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->src & 0xFFFFFFFF),
			       DMA_LLP_LOW_OFF_WRCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->src >> 32) & 0xFFFFFFFF),
			       DMA_LLP_HIGH_OFF_WRCH);
	} else {
		dma_channel_wr(pci->atu_base, tx->channel, tx->size,
			       DMA_TRANSFER_SIZE_OFF_WRCH);
		/* DMA Write: The SAR is the address of the local memory. */
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->src & 0xFFFFFFFF),
			       DMA_SAR_LOW_OFF_WRCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->src >> 32) & 0xFFFFFFFF),
			       DMA_SAR_HIGH_OFF_WRCH);
		/* DMA Write: The DAR is the address of the remote memory. */
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->dst & 0xFFFFFFFF),
			       DMA_DAR_LOW_OFF_WRCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->dst >> 32) & 0xFFFFFFFF),
			       DMA_DAR_HIGH_OFF_WRCH);
	}
	/* acquire lock for busy-data and mark it as busy and then release */
	pcie->wr_busy |= 1 << tx->channel;

	pcie->wr_start_time = ktime_get();
	/* start DMA (ring the door bell) */
	/* ring the door bell with channel number */
	dma_common_wr(pci->atu_base, pcie->channel,
		      DMA_WRITE_DOORBELL_OFF);

	if (pcie->dma_poll) {
		now = jiffies;
		while (true) {
			val = dma_common_rd(pci->atu_base,
					    DMA_WRITE_INT_STATUS_OFF);
			/* check the status of all busy marked channels */
			for_each_set_bit(bit, &pcie->wr_busy, DMA_WR_CHNL_NUM) {
				if (BIT(bit) & val) {
					pcie->wr_end_time = ktime_get();
					dma_common_wr(pci->atu_base,
						      BIT(bit),
						      DMA_WRITE_INT_CLEAR_OFF);
					/* clear status */
					pcie->wr_busy &= ~(BIT(bit));
				}
			}

			val = dma_common_rd(pci->atu_base,
					    DMA_WRITE_ERR_STATUS_OFF);
			if (val) dev_info(dev, "WRITE_ERR status: 0x%x\n", val);
			/* check the status of all error marked channels
			for_each_set_bit(pcie->channel, &pcie->wr_busy, DMA_WR_CHNL_NUM) {
				if (BIT(bit) & val) {
					pcie->wr_end_time = ktime_get();
					dma_common_wr(pci->atu_base,
						      BIT(bit),
						      DMA_WRITE_INT_CLEAR_OFF);
				}
			} */

			if (!pcie->wr_busy)
				break;
			if (time_after(jiffies, now + timeout)) {
				dev_err(dev, "DMA write timed out & poll end\n");
				ret = -ETIMEDOUT;
				/* if timeout, clear the mess, sanitize channel
				 * & return err
				 */
				dma_common_wr(pci->atu_base,
					      DMA_WRITE_DOORBELL_OFF_WR_STOP |
					      tx->channel,
					      DMA_WRITE_DOORBELL_OFF);
				goto exit;
			}
		}
		dev_info(dev, "DMA write. Size: %u bytes, Time diff: %lld ns\n",
			 tx->size, ktime_to_ns(pcie->wr_end_time) -
			 ktime_to_ns(pcie->wr_start_time));
	} else {
		/* wait for completion or timeout */
		ret = wait_for_completion_timeout(&pcie->wr_cpl[tx->channel],
						  msecs_to_jiffies(5000));
		if (ret == 0) {
			dev_err(dev, "DMA write timed out and no interrupt\n");
			ret = -ETIMEDOUT;
			/* if timeout, clear the mess, sanitize channel &
			 * return err
			 */
			dma_common_wr(pci->atu_base,
				      DMA_WRITE_DOORBELL_OFF_WR_STOP |
				      tx->channel,
				      DMA_WRITE_DOORBELL_OFF);
			goto exit;
		}
	}

exit:
	mutex_unlock(&pcie->wr_lock[tx->channel]);
	return ret;
}

and the read source code is as follows;

		case DPC_ETR_INI_DMA: /* receive data size and initiate DMA read op. */
			if (copy_from_user((void *)&get_info, (void __user *)arg, (unsigned long)sizeof(struct ioctl_info))) {
				ret = -EFAULT;
				break;
            }

				/* read real data */
				if (sci_read(scidma_dev->pcie, &get_info) > 0) {
                                        //OK
				} else {
					ret = -EFAULT;
				}
			}
			break;


static int sci_read(struct tegra_pcie_dw *pdev, void *data)
{
	struct tegra_pcie_dw *pcie = pdev;
	struct dma_tx tx;
	int ret = 0;
	void __iomem *dst_cpu_virt;

	memset(&tx, 0x0, sizeof(struct dma_tx));
	tx.src = pcie->src = RC_DMA_PHY_ADDR; /* 0x1f40000000, DMA memory address */
	tx.dst = pcie->dst = pdev->dma_addr;  /*   0xFFFFE000, BAR: not data-src address for data transfer */
	tx.size = pcie->size = ((struct ioctl_info*)data)->size;
	tx.channel = (pcie->channel)?(pcie->channel):0;/* default channel # is 0 */

	dst_cpu_virt = ioremap(pcie->src, pcie->size);

	/* fill source with random data */
	#if 0
	get_random_bytes(dst_cpu_virt, pcie->size);
	#endif

	ret = sci_dma_read(pcie, &tx);
	if (ret <= 0) {
		dev_err(pcie->dev, "DMA-Read test FAILED (dma_read fail)\n");
		ret = -EIO;
		goto err_out;
	}
	else {
		/* compare copied data */
		if (memcmp(dst_cpu_virt/*RC_DMA_PHY_ADDR*/,
			pcie->cpu_virt_addr, pcie->size)) {
			dev_info(pcie->dev, "DMA-Read test FAILED\n");

			sci_send_sig(pcie, EP_TO_RC_DMA_ERROR, tx.size);
		}
		else {
			/* send READ completion to application */
			sci_send_sig(pcie, EP_TO_RC_DMA_DONE, tx.size);
		}
	}

err_out:
	iounmap(dst_cpu_virt);
	return ret;
}


static int sci_dma_read(struct tegra_pcie_dw *pcie, struct dma_tx *tx)
{
	struct dw_pcie *pci = &pcie->pci;
	struct device *dev = pcie->dev;
	u32 val = 0, bit = 0;
	unsigned long int ret = 0;
	unsigned long now, timeout = msecs_to_jiffies(6000);

	if (tx->channel > 1) {
		dev_err(dev, "Invalid channel num, should be within [0~1]\n");
		return -EINVAL;
	}

	/* acquire lock for channel HW */
	mutex_lock(&pcie->rd_lock[tx->channel]);

	/* program registers */
	/* Enable Read Engine */
	dma_common_wr(pci->atu_base, DMA_READ_ENGINE_EN_OFF_ENABLE,
		      DMA_READ_ENGINE_EN_OFF);

	/* Un Mask DONE and ABORT interrupts */
	val = dma_common_rd(pci->atu_base, DMA_READ_INT_MASK_OFF);
	val &= ~(1 << tx->channel);		/* DONE */
	val &= ~(1 << ((tx->channel) + 16));	/* ABORT */
	dma_common_wr(pci->atu_base, val, DMA_READ_INT_MASK_OFF);

	val = dma_channel_rd(pci->atu_base, tx->channel,
			     DMA_CH_CONTROL1_OFF_RDCH);
	if (tx->ll)
		val = DMA_CH_CONTROL1_OFF_RDCH_LLE;
	else
		val = DMA_CH_CONTROL1_OFF_RDCH_LIE;
	dma_channel_wr(pci->atu_base, tx->channel, val,
		       DMA_CH_CONTROL1_OFF_RDCH);

	if (tx->ll) {
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->src & 0xFFFFFFFF),
			       DMA_LLP_LOW_OFF_RDCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->src >> 32) & 0xFFFFFFFF),
			       DMA_LLP_HIGH_OFF_RDCH);
	} else {
		dma_channel_wr(pci->atu_base, tx->channel, tx->size,
			       DMA_TRANSFER_SIZE_OFF_RDCH);
		/* DMA Read: The SAR is the address of the remote memory. */
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->src & 0xFFFFFFFF),
			       DMA_SAR_LOW_OFF_RDCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->src >> 32) & 0xFFFFFFFF),
			       DMA_SAR_HIGH_OFF_RDCH);
		/* DMA Read: The DAR is the address of the local memory. */
		dma_channel_wr(pci->atu_base, tx->channel,
			       (tx->dst & 0xFFFFFFFF),
			       DMA_DAR_LOW_OFF_RDCH);
		dma_channel_wr(pci->atu_base, tx->channel,
			       ((tx->dst >> 32) & 0xFFFFFFFF),
			       DMA_DAR_HIGH_OFF_RDCH);
	}

	/* acquire lock for busy-data and mark it as busy and then release */
	pcie->rd_busy |= 1 << tx->channel;

	pcie->rd_start_time = ktime_get();
	/* start DMA (ring the door bell) */
	/* ring the door bell with channel number */
	dma_common_wr(pci->atu_base, pcie->channel,
		      DMA_READ_DOORBELL_OFF);

	if (pcie->dma_poll) {
		now = jiffies;
		while (true) {
			val = dma_common_rd(pci->atu_base,
					    DMA_READ_INT_STATUS_OFF);
			/* check the status of all busy marked channels */
			for_each_set_bit(bit, &pcie->rd_busy, DMA_RD_CHNL_NUM) {
				if (BIT(bit) & val) {
					pcie->rd_end_time = ktime_get();
					dma_common_wr(pci->atu_base,
						      BIT(bit),
						      DMA_READ_INT_CLEAR_OFF);
					/* clear status */
					pcie->rd_busy &= ~(BIT(bit));
				}
			}
			if (!pcie->rd_busy)
				break;
			if (time_after(jiffies, now + timeout)) {
				dev_err(dev, "DMA read timed out & poll end\n");
				ret = -ETIMEDOUT;
				/* if timeout, clear the mess, sanitize channel
				 * & return err
				 */
				dma_common_wr(pci->atu_base,
					      DMA_READ_DOORBELL_OFF_RD_STOP |
					      tx->channel,
					      DMA_READ_DOORBELL_OFF);
				goto exit;
			}
		}
		dev_info(dev, "DMA read. Size: %u bytes, Time diff: %lld ns\n",
			 tx->size, ktime_to_ns(pcie->rd_end_time) -
			 ktime_to_ns(pcie->rd_start_time));
	} else {
		/* wait for completion or timeout */
		ret = wait_for_completion_timeout(&pcie->rd_cpl[tx->channel],
						  msecs_to_jiffies(5000));
		if (ret == 0) {
			dev_err(dev, "DMA read timed out and no interrupt\n");
			ret = -ETIMEDOUT;
			/* if timeout, clear the mess, sanitize channel
			 * & return err
			 */
			dma_common_wr(pci->atu_base,
				      DMA_READ_DOORBELL_OFF_RD_STOP |
				      tx->channel,
				      DMA_READ_DOORBELL_OFF);
			goto exit;
		}
	}

exit:
	mutex_unlock(&pcie->rd_lock[tx->channel]);
	return ret;
}

Hello supporters,

Is there anybody concern this topic in Nvidia? Please have a look this issue and let me know what should I do.

I have tried to find out what shall I do for this issue and I found the following article;

but there’s no clear answers on it.

Thanks,
Matt

Sorry for the late response, let me bring this issue to internal team to do the investigation and see if can have suggestions. Thanks

Yes, please. I can’t make it process further at the moment.
Thanks,
Matt

Hello mayccc,

Is there any update about this topic?

pcie dma only two read channel?

Sorry for annoying… this issue was resolved by fixing the FPGA’s configuration at the end.
This topic will be closed promptly.
Matt

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.