PCIe non-prefetch space size in Xavier

Hi,

we are looking into porting our PCIe clustering and IO-expansion solution to the Xavier platform using a Jetson AGX Xavier Developer Kit. Now we have run into a problem caused by the relatively small non-prefetch memory range of 30 MB per PCIe controller.

Is that size and range hard-wired in the silicon, or could some spare range in the SoC’s System Address Space be used to extend the non-pref memory range of at least one Root Port?

Kind regards,
Friedrich

Please use following patches to increase aperture to map non-prefetchable BAR of endpoints from 30MB to 3GB. You can adjust the values in the device-tree patch to increase it further.

diff --git a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
index 2333198..cf2176e 100644
--- a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
+++ b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
@@ -572,8 +572,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x38100000 0x0 0x38100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x38200000 0x0 0x38200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x18 0x00000000 0x18 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+			  0x82000000 0x0 0x40000000 0x1B 0x40000000 0x0 0xC0000000     /* non-prefetchable memory (3GB) */
+			  0xc2000000 0x18 0x00000000 0x18 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x1c4>;
 		nvidia,cap-pl16g-status = <0x174>;
@@ -640,8 +640,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x30100000 0x0 0x30100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x30200000 0x0 0x30200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x12 0x00000000 0x12 0x00000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+			  0x82000000 0x0 0x40000000 0x12 0x30000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+			  0xc2000000 0x12 0x00000000 0x12 0x00000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x194>;
 		nvidia,cap-pl16g-status = <0x164>;
@@ -707,8 +707,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x32100000 0x0 0x32100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x32200000 0x0 0x32200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x12 0x40000000 0x12 0x40000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+			  0x82000000 0x0 0x40000000 0x12 0x70000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+			  0xc2000000 0x12 0x40000000 0x12 0x40000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x194>;
 		nvidia,cap-pl16g-status = <0x164>;
@@ -774,8 +774,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x34100000 0x0 0x34100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x34200000 0x0 0x34200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x12 0x80000000 0x12 0x80000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+			  0x82000000 0x0 0x40000000 0x12 0xB0000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+			  0xc2000000 0x12 0x80000000 0x12 0x80000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x194>;
 		nvidia,cap-pl16g-status = <0x164>;
@@ -841,8 +841,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x36100000 0x0 0x36100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x36200000 0x0 0x36200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x14 0x00000000 0x14 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+			  0x82000000 0x0 0x40000000 0x17 0x40000000 0x0 0xC0000000      /* non-prefetchable memory (3GB) */
+			  0xc2000000 0x14 0x00000000 0x14 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x1b0>;
 		nvidia,cap-pl16g-status = <0x174>;
@@ -913,8 +913,8 @@
 
 		bus-range = <0x0 0xff>;
 		ranges = <0x81000000 0x0 0x3a100000 0x0 0x3a100000 0x0 0x00100000      /* downstream I/O (1MB) */
-			  0x82000000 0x0 0x3a200000 0x0 0x3a200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-			  0xc2000000 0x1c 0x00000000 0x1c 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+			  0x82000000 0x0 0x40000000 0x1f 0x40000000 0x0 0xC0000000     /* non-prefetchable memory (3GB) */
+			  0xc2000000 0x1c 0x00000000 0x1c 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
 
 		nvidia,cfg-link-cap-l1sub = <0x1c4>;
 		nvidia,cap-pl16g-status = <0x174>;

and

diff --git a/drivers/pci/dwc/pcie-tegra.c b/drivers/pci/dwc/pcie-tegra.c
index d118cf9..8593dee 100644
--- a/drivers/pci/dwc/pcie-tegra.c
+++ b/drivers/pci/dwc/pcie-tegra.c
@@ -2959,12 +2959,14 @@
 			/* program iATU for Non-prefetchable MEM mapping */
 			outbound_atu(pp, PCIE_ATU_REGION_INDEX3,
 				     PCIE_ATU_TYPE_MEM, win->res->start,
-				     win->res->start, resource_size(win->res));
+				     win->res->start - win->offset,
+				     resource_size(win->res));
 		} else if (win->res->flags & IORESOURCE_MEM) {
 			/* program iATU for Non-prefetchable MEM mapping */
 			outbound_atu(pp, PCIE_ATU_REGION_INDEX2,
 				     PCIE_ATU_TYPE_MEM, win->res->start,
-				     win->res->start, resource_size(win->res));
+				     win->res->start - win->offset,
+				     resource_size(win->res));
 		}
 	}

Hi,

thank you very much for the prompt reply. We will test the patch as soon as possible and report back.

Kind regards,
Friedrich

Hi again,

the patch is apparently based on something newer than L4T 31.1. Could you please provide a patch for kernel-4.9? Or could we update to a newer (beta) version of L4T?

Kind regards,
Friedrich

You can use the following. Just the file name is different (in K-4.9, it is pcie-tegra-dw.c)

diff --git a/drivers/pci/host/pcie-tegra-dw.c b/drivers/pci/host/pcie-tegra-dw.c
index 10a2d827be9d..58fb54f86998 100644
--- a/drivers/pci/host/pcie-tegra-dw.c
+++ b/drivers/pci/host/pcie-tegra-dw.c
@@ -2563,12 +2563,14 @@ static void tegra_pcie_dw_scan_bus(struct pcie_port *pp)
                        /* program iATU for Non-prefetchable MEM mapping */
                        outbound_atu(pp, PCIE_ATU_REGION_INDEX3,
                                     PCIE_ATU_TYPE_MEM, win->res->start,
-                                    win->res->start, resource_size(win->res));
+                                    win->res->start - win->offset,
+                                    resource_size(win->res));
                } else if (win->res->flags & IORESOURCE_MEM) {
                        /* program iATU for Non-prefetchable MEM mapping */
                        outbound_atu(pp, PCIE_ATU_REGION_INDEX2,
                                     PCIE_ATU_TYPE_MEM, win->res->start,
-                                    win->res->start, resource_size(win->res));
+                                    win->res->start - win->offset,
+                                    resource_size(win->res));
                }
        }

Hi,

the patch solves the problem and our card is now enumerated correctly. Thanks again.
We do have some performance related questions, but I’ll open another topic for that.

Kind regards,
Friedrich