Increasing size of BAR0 in Endpoint Mode

Hi,

Some patches here in the device tree.

Increasing aperture size for mapping non-prefetchable BARs of endpoints from its current 30MB size to 3GB size

diff --git a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
index 2333198..cf2176e 100644
--- a/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
+++ b/kernel-dts/tegra194-soc/tegra194-soc-pcie.dtsi
@@ -572,8 +572,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x38100000 0x0 0x38100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x38200000 0x0 0x38200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x18 0x00000000 0x18 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+             0x82000000 0x0 0x40000000 0x1B 0x40000000 0x0 0xC0000000     /* non-prefetchable memory (3GB) */
+             0xc2000000 0x18 0x00000000 0x18 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
  
        nvidia,cfg-link-cap-l1sub = <0x1c4>;
        nvidia,cap-pl16g-status = <0x174>;
@@ -640,8 +640,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x30100000 0x0 0x30100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x30200000 0x0 0x30200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x12 0x00000000 0x12 0x00000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+             0x82000000 0x0 0x40000000 0x12 0x30000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+             0xc2000000 0x12 0x00000000 0x12 0x00000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
  
        nvidia,cfg-link-cap-l1sub = <0x194>;
        nvidia,cap-pl16g-status = <0x164>;
@@ -707,8 +707,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x32100000 0x0 0x32100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x32200000 0x0 0x32200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x12 0x40000000 0x12 0x40000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+             0x82000000 0x0 0x40000000 0x12 0x70000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+             0xc2000000 0x12 0x40000000 0x12 0x40000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
  
        nvidia,cfg-link-cap-l1sub = <0x194>;
        nvidia,cap-pl16g-status = <0x164>;
@@ -774,8 +774,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x34100000 0x0 0x34100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x34200000 0x0 0x34200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x12 0x80000000 0x12 0x80000000 0x0 0x40000000>;  /* prefetchable memory (1GB) */
+             0x82000000 0x0 0x40000000 0x12 0xB0000000 0x0 0x10000000     /* non-prefetchable memory (256MB) */
+             0xc2000000 0x12 0x80000000 0x12 0x80000000 0x0 0x30000000>;  /* prefetchable memory (768MB) */
  
        nvidia,cfg-link-cap-l1sub = <0x194>;
        nvidia,cap-pl16g-status = <0x164>;
@@ -841,8 +841,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x36100000 0x0 0x36100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x36200000 0x0 0x36200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x14 0x00000000 0x14 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+             0x82000000 0x0 0x40000000 0x17 0x40000000 0x0 0xC0000000      /* non-prefetchable memory (3GB) */
+             0xc2000000 0x14 0x00000000 0x14 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
  
        nvidia,cfg-link-cap-l1sub = <0x1b0>;
        nvidia,cap-pl16g-status = <0x174>;
@@ -913,8 +913,8 @@
  
        bus-range = <0x0 0xff>;
        ranges = <0x81000000 0x0 0x3a100000 0x0 0x3a100000 0x0 0x00100000      /* downstream I/O (1MB) */
-             0x82000000 0x0 0x3a200000 0x0 0x3a200000 0x0 0x01E00000      /* non-prefetchable memory (30MB) */
-             0xc2000000 0x1c 0x00000000 0x1c 0x00000000 0x4 0x00000000>;  /* prefetchable memory (16GB) */
+             0x82000000 0x0 0x40000000 0x1f 0x40000000 0x0 0xC0000000     /* non-prefetchable memory (3GB) */
+             0xc2000000 0x1c 0x00000000 0x1c 0x00000000 0x3 0x40000000>;  /* prefetchable memory (12GB) */
  
        nvidia,cfg-link-cap-l1sub = <0x1c4>;
        nvidia,cap-pl16g-status = <0x174>;

and

diff --git a/drivers/pci/dwc/pcie-tegra.c b/drivers/pci/dwc/pcie-tegra.c
index d118cf9..8593dee 100644
--- a/drivers/pci/dwc/pcie-tegra.c
+++ b/drivers/pci/dwc/pcie-tegra.c
@@ -2959,12 +2959,14 @@
            /* program iATU for Non-prefetchable MEM mapping */
            outbound_atu(pp, PCIE_ATU_REGION_INDEX3,
                     PCIE_ATU_TYPE_MEM, win->res->start,
-                    win->res->start, resource_size(win->res));
+                    win->res->start - win->offset,
+                    resource_size(win->res));
        } else if (win->res->flags & IORESOURCE_MEM) {
            /* program iATU for Non-prefetchable MEM mapping */
            outbound_atu(pp, PCIE_ATU_REGION_INDEX2,
                     PCIE_ATU_TYPE_MEM, win->res->start,
-                    win->res->start, resource_size(win->res));
+                    win->res->start - win->offset,
+                    resource_size(win->res));
        }
    }

Increase BAR size of endpoint to 512 MB

diff --git a/drivers/pci/endpoint/functions/pci-epf-nv-test.c b/drivers/pci/endpoint/functions/pci-epf-nv-test.c
index 8b2a1dcecab8..d3496a199842 100644
--- a/drivers/pci/endpoint/functions/pci-epf-nv-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-nv-test.c
@@ -16,7 +16,7 @@
 #include <linux/pci-epc.h>
 #include <linux/pci-epf.h>
  
-#define BAR0_SIZE SZ_64K
+#define BAR0_SIZE SZ_512M
  
 struct pci_epf_nv_test {
    struct pci_epf_header header;
@@ -30,14 +30,11 @@ static void pci_epf_nv_test_unbind(struct pci_epf *epf)
    struct pci_epf_nv_test *epfnv = epf_get_drvdata(epf);
    struct pci_epc *epc = epf->epc;
    struct device *cdev = epc->dev.parent;
-   struct iommu_domain *domain = iommu_get_domain_for_dev(cdev);
  
    pci_epc_stop(epc);
    pci_epc_clear_bar(epc, BAR_0);
-   vunmap(epfnv->bar0_ram_map);
-   iommu_unmap(domain, epfnv->bar0_iova, PAGE_SIZE);
-   iommu_dma_free_iova(cdev, epfnv->bar0_iova, BAR0_SIZE);
-   __free_pages(epfnv->bar0_ram_page, 1);
+   dma_free_coherent(cdev, BAR0_SIZE, epfnv->bar0_ram_map,
+             epfnv->bar0_iova);
 }
  
 static int pci_epf_nv_test_bind(struct pci_epf *epf)
@@ -47,7 +44,6 @@ static int pci_epf_nv_test_bind(struct pci_epf *epf)
    struct pci_epf_header *header = epf->header;
    struct device *fdev = &epf->dev;
    struct device *cdev = epc->dev.parent;
-   struct iommu_domain *domain = iommu_get_domain_for_dev(cdev);
    int ret;
  
    ret = pci_epc_write_header(epc, header);
@@ -56,60 +52,29 @@ static int pci_epf_nv_test_bind(struct pci_epf *epf)
        return ret;
    }
  
-   epfnv->bar0_ram_page = alloc_pages(GFP_KERNEL, 1);
-   if (!epfnv->bar0_ram_page) {
-       dev_err(fdev, "alloc_pages() failed\n");
-       ret = -ENOMEM;
-       goto fail;
-   }
-   dev_info(fdev, "BAR0 RAM phys: 0x%llx\n",
-        page_to_phys(epfnv->bar0_ram_page));
-
-   epfnv->bar0_iova = iommu_dma_alloc_iova(cdev, BAR0_SIZE,
-                       cdev->coherent_dma_mask);
-   if (!epfnv->bar0_iova) {
-       dev_err(fdev, "iommu_dma_alloc_iova() failed\n");
-       ret = -ENOMEM;
-       goto fail_free_pages;
-   }
-
-   dev_info(fdev, "BAR0 RAM IOVA: 0x%08llx\n", epfnv->bar0_iova);
-
-   ret = iommu_map(domain, epfnv->bar0_iova,
-           page_to_phys(epfnv->bar0_ram_page),
-           PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
-   if (ret) {
-       dev_err(fdev, "iommu_map(RAM) failed: %d\n", ret);
-       goto fail_free_iova;
-   }
-   epfnv->bar0_ram_map = vmap(&epfnv->bar0_ram_page, 1, VM_MAP,
-                  PAGE_KERNEL);
+   epfnv->bar0_ram_map = dma_alloc_coherent(cdev, BAR0_SIZE,
+                        &epfnv->bar0_iova, GFP_KERNEL);
    if (!epfnv->bar0_ram_map) {
-       dev_err(fdev, "vmap() failed\n");
+       dev_err(fdev, "dma_alloc_coherent() failed\n");
        ret = -ENOMEM;
-       goto fail_unmap_ram_iova;
+       return ret;
    }
-   dev_info(fdev, "BAR0 RAM virt: 0x%p\n", epfnv->bar0_ram_map);
+   dev_info(fdev, "BAR0 RAM IOVA: 0x%llx\n", epfnv->bar0_iova);
  
    ret = pci_epc_set_bar(epc, BAR_0, epfnv->bar0_iova, BAR0_SIZE,
                  PCI_BASE_ADDRESS_SPACE_MEMORY |
-                 PCI_BASE_ADDRESS_MEM_TYPE_32);
+                 PCI_BASE_ADDRESS_MEM_TYPE_32 |
+                 PCI_BASE_ADDRESS_MEM_PREFETCH);
    if (ret) {
        dev_err(fdev, "pci_epc_set_bar() failed: %d\n", ret);
-       goto fail_unmap_ram_virt;
+       goto fail_set_bar;
    }
  
    return 0;
  
-fail_unmap_ram_virt:
-   vunmap(epfnv->bar0_ram_map);
-fail_unmap_ram_iova:
-   iommu_unmap(domain, epfnv->bar0_iova, PAGE_SIZE);
-fail_free_iova:
-   iommu_dma_free_iova(cdev, epfnv->bar0_iova, BAR0_SIZE);
-fail_free_pages:
-   __free_pages(epfnv->bar0_ram_page, 1);
-fail:
+fail_set_bar:
+   dma_free_coherent(cdev, BAR0_SIZE, epfnv->bar0_ram_map,
+             epfnv->bar0_iova);
    return ret;
 }