From e60b53f93b0c4df3f910a535f9e09b359c98092d Mon Sep 17 00:00:00 2001
From: Jiandi An <jan@nvidia.com>
Date: Fri, 6 Mar 2026 02:36:01 -0600
Subject: [PATCH 001/143] Revert "NVIDIA: VR: SAUCE: cxl: add support for cxl
 reset"

This reverts commit f198764ea997285f369d115202a577f6dee55b0a.

The CXL reset implementation is being reverted to allow
"NVIDIA: VR: SAUCE: CXL/PCI: Move CXL DVSEC definitions into
uapi/linux/pci_regs.h" to apply cleanly.  The reset functionality
will be replaced by the version currently being pursued upstream.

Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/cxlpci.h |  40 ++++--------
 drivers/pci/pci.c    | 147 -------------------------------------------
 include/linux/pci.h  |   2 +-
 3 files changed, 14 insertions(+), 175 deletions(-)

diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 67ad5b007498e..54e219b0049ea 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -16,33 +16,19 @@
 
 /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
 #define CXL_DVSEC_PCIE_DEVICE					0
-#define   CXL_DVSEC_CAP_OFFSET			0xA
-#define     CXL_DVSEC_CACHE_CAPABLE		BIT(0)
-#define     CXL_DVSEC_MEM_CAPABLE		BIT(2)
-#define     CXL_DVSEC_HDM_COUNT_MASK		GENMASK(5, 4)
-#define     CXL_DVSEC_CACHE_WBI_CAPABLE		BIT(6)
-#define     CXL_DVSEC_CXL_RST_CAPABLE		BIT(7)
-#define     CXL_DVSEC_CXL_RST_TIMEOUT_MASK	GENMASK(10, 8)
-#define     CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE	BIT(11)
-#define   CXL_DVSEC_CTRL_OFFSET			0xC
-#define     CXL_DVSEC_MEM_ENABLE		BIT(2)
-#define   CXL_DVSEC_CTRL2_OFFSET		0x10
-#define     CXL_DVSEC_DISABLE_CACHING		BIT(0)
-#define     CXL_DVSEC_INIT_CACHE_WBI		BIT(1)
-#define     CXL_DVSEC_INIT_CXL_RESET		BIT(2)
-#define     CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE	BIT(3)
-#define   CXL_DVSEC_STATUS2_OFFSET		0x12
-#define     CXL_DVSEC_CACHE_INVALID		BIT(0)
-#define     CXL_DVSEC_CXL_RST_COMPLETE		BIT(1)
-#define     CXL_DVSEC_CXL_RESET_ERR		BIT(2)
-#define   CXL_DVSEC_RANGE_SIZE_HIGH(i)		(0x18 + ((i) * 0x10))
-#define   CXL_DVSEC_RANGE_SIZE_LOW(i)		(0x1C + ((i) * 0x10))
-#define     CXL_DVSEC_MEM_INFO_VALID		BIT(0)
-#define     CXL_DVSEC_MEM_ACTIVE		BIT(1)
-#define     CXL_DVSEC_MEM_SIZE_LOW_MASK		GENMASK(31, 28)
-#define   CXL_DVSEC_RANGE_BASE_HIGH(i)		(0x20 + ((i) * 0x10))
-#define   CXL_DVSEC_RANGE_BASE_LOW(i)		(0x24 + ((i) * 0x10))
-#define     CXL_DVSEC_MEM_BASE_LOW_MASK		GENMASK(31, 28)
+#define   CXL_DVSEC_CAP_OFFSET		0xA
+#define     CXL_DVSEC_MEM_CAPABLE	BIT(2)
+#define     CXL_DVSEC_HDM_COUNT_MASK	GENMASK(5, 4)
+#define   CXL_DVSEC_CTRL_OFFSET		0xC
+#define     CXL_DVSEC_MEM_ENABLE	BIT(2)
+#define   CXL_DVSEC_RANGE_SIZE_HIGH(i)	(0x18 + (i * 0x10))
+#define   CXL_DVSEC_RANGE_SIZE_LOW(i)	(0x1C + (i * 0x10))
+#define     CXL_DVSEC_MEM_INFO_VALID	BIT(0)
+#define     CXL_DVSEC_MEM_ACTIVE	BIT(1)
+#define     CXL_DVSEC_MEM_SIZE_LOW_MASK	GENMASK(31, 28)
+#define   CXL_DVSEC_RANGE_BASE_HIGH(i)	(0x20 + (i * 0x10))
+#define   CXL_DVSEC_RANGE_BASE_LOW(i)	(0x24 + (i * 0x10))
+#define     CXL_DVSEC_MEM_BASE_LOW_MASK	GENMASK(31, 28)
 
 #define CXL_DVSEC_RANGE_MAX		2
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9a6943688e6db..372de7961d2a6 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -30,7 +30,6 @@
 #include <asm/dma.h>
 #include <linux/aer.h>
 #include <linux/bitfield.h>
-#include "../cxl/cxlpci.h"
 #include "pci.h"
 
 DEFINE_MUTEX(pci_slot_mutex);
@@ -5134,151 +5133,6 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
 	return rc;
 }
 
-static int cxl_reset_prepare(struct pci_dev *dev, u16 dvsec)
-{
-	u32 timeout_us = 100, timeout_tot_us = 10000;
-	u16 reg, cap;
-	int rc;
-
-	if (!pci_wait_for_pending_transaction(dev))
-		pci_err(dev, "timed out waiting for pending transaction; performing cxl reset anyway\n");
-
-	/* Check if the device is cache capable. */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &cap);
-	if (rc)
-		return rc;
-
-	if (!(cap & CXL_DVSEC_CACHE_CAPABLE))
-		return 0;
-
-	/* Disable cache. WB and invalidate cache if capability is advertised */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, &reg);
-	if (rc)
-		return rc;
-	reg |= CXL_DVSEC_DISABLE_CACHING;
-	/*
-	 * DEVCTL2 bits are written only once. So check WB+I capability while
-	 * keeping disable caching set.
-	 */
-	if (cap & CXL_DVSEC_CACHE_WBI_CAPABLE)
-		reg |= CXL_DVSEC_INIT_CACHE_WBI;
-	pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg);
-
-	/*
-	 * From Section 9.6: "Software may leverage the cache size reported in
-	 * the DVSEC CXL Capability2 register to compute a suitable timeout
-	 * value".
-	 * Given there is no conversion factor for cache size -> timeout,
-	 * setting timer for default 10ms.
-	 */
-	do {
-		if (timeout_tot_us == 0)
-			return -ETIMEDOUT;
-		usleep_range(timeout_us, timeout_us + 1);
-		timeout_tot_us -= timeout_us;
-		rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET,
-					  &reg);
-		if (rc)
-			return rc;
-	} while (!(reg & CXL_DVSEC_CACHE_INVALID));
-
-	return 0;
-}
-
-static int cxl_reset_init(struct pci_dev *dev, u16 dvsec)
-{
-	/*
-	 * Timeout values ref CXL Spec v3.2 Ch 8 Control and Status Registers,
-	 * under section 8.1.3.1 DVSEC CXL Capability.
-	 */
-	u32 reset_timeouts_ms[] = { 10, 100, 1000, 10000, 100000 };
-	u16 reg;
-	u32 timeout_ms;
-	int rc, ind;
-
-	/* Check if CXL Reset MEM CLR is supported. */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &reg);
-	if (rc)
-		return rc;
-
-	if (reg & CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE) {
-		rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET,
-					  &reg);
-		if (rc)
-			return rc;
-
-		reg |= CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE;
-		pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg);
-	}
-
-	/* Read timeout value. */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &reg);
-	if (rc)
-		return rc;
-	ind = FIELD_GET(CXL_DVSEC_CXL_RST_TIMEOUT_MASK, reg);
-	timeout_ms = reset_timeouts_ms[ind];
-
-	/* Write reset config. */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, &reg);
-	if (rc)
-		return rc;
-
-	reg |= CXL_DVSEC_INIT_CXL_RESET;
-	pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg);
-
-	/* Wait till timeout and then check reset status is complete. */
-	msleep(timeout_ms);
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_STATUS2_OFFSET, &reg);
-	if (rc)
-		return rc;
-	if (reg & CXL_DVSEC_CXL_RESET_ERR ||
-	    ~reg & CXL_DVSEC_CXL_RST_COMPLETE)
-		return -ETIMEDOUT;
-
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, &reg);
-	if (rc)
-		return rc;
-	reg &= (~CXL_DVSEC_DISABLE_CACHING);
-	pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg);
-
-	return 0;
-}
-
-/**
- * cxl_reset - initiate a cxl reset
- * @dev: device to reset
- * @probe: if true, return 0 if device can be reset this way
- *
- * Initiate a cxl reset on @dev.
- */
-static int cxl_reset(struct pci_dev *dev, bool probe)
-{
-	u16 dvsec, reg;
-	int rc;
-
-	dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
-					  CXL_DVSEC_PCIE_DEVICE);
-	if (!dvsec)
-		return -ENOTTY;
-
-	/* Check if CXL Reset is supported. */
-	rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &reg);
-	if (rc)
-		return -ENOTTY;
-
-	if ((reg & CXL_DVSEC_CXL_RST_CAPABLE) == 0)
-		return -ENOTTY;
-
-	if (probe)
-		return 0;
-
-	rc = cxl_reset_prepare(dev, dvsec);
-	if (rc)
-		return rc;
-
-	return cxl_reset_init(dev, dvsec);
-}
-
 void pci_dev_lock(struct pci_dev *dev)
 {
 	/* block PM suspend, driver probe, etc. */
@@ -5365,7 +5219,6 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = {
 	{ pci_dev_acpi_reset, .name = "acpi" },
 	{ pcie_reset_flr, .name = "flr" },
 	{ pci_af_flr, .name = "af_flr" },
-	{ cxl_reset, .name = "cxl_reset" },
 	{ pci_pm_reset, .name = "pm" },
 	{ pci_reset_bus_function, .name = "bus" },
 	{ cxl_reset_bus_function, .name = "cxl_bus" },
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a5837cd74faad..1bdfd152eb1f8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -51,7 +51,7 @@
 			       PCI_STATUS_PARITY)
 
 /* Number of reset methods used in pci_reset_fn_methods array in pci.c */
-#define PCI_NUM_RESET_METHODS 9
+#define PCI_NUM_RESET_METHODS 8
 
 #define PCI_RESET_PROBE		true
 #define PCI_RESET_DO_RESET	false

From 96cada1a358236846eb36504d786303787df59a4 Mon Sep 17 00:00:00 2001
From: Xichao Zhao <zhao.xichao@vivo.com>
Date: Mon, 11 Aug 2025 20:25:19 +0800
Subject: [PATCH 002/143] cxl/hdm: Use str_plural() to simplify the code

Use the string choice helper function str_plural() to simplify the code.

Signed-off-by: Xichao Zhao <zhao.xichao@vivo.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20250811122519.543554-1-zhao.xichao@vivo.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 22fb4ad898853323f4943de3e0dc555915547ccc)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index e930191057c04..777b8ac0c49c1 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -197,7 +197,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 	 */
 	if (should_emulate_decoders(info)) {
 		dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges,
-			info->ranges > 1 ? "s" : "");
+			str_plural(info->ranges));
 		cxlhdm->decoder_count = info->ranges;
 	}
 

From 72491e4fb4c57977379ca27fb2b51f3a067a7257 Mon Sep 17 00:00:00 2001
From: Nai-Chen Cheng <bleach1827@gmail.com>
Date: Tue, 12 Aug 2025 00:49:46 +0800
Subject: [PATCH 003/143] cxl/region: use str_enabled_disabled() instead of
 ternary operator

Replace ternary operator with str_enabled_disabled() helper to enhance
code readability and consistency.

[dj: Fix spelling in commit log and subject. ]

Signed-off-by: Nai-Chen Cheng <bleach1827@gmail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20250812-cxl-region-string-choices-v1-1-50200b0bc782@gmail.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 733c4e9bcec9c481afee3891218277d9ecd06599)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index adebbb1db5078..5c581b175013c 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -10,6 +10,7 @@
 #include <linux/sort.h>
 #include <linux/idr.h>
 #include <linux/memory-tiers.h>
+#include <linux/string_choices.h>
 #include <cxlmem.h>
 #include <cxl.h>
 #include "core.h"
@@ -1468,9 +1469,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 				dev_name(port->uport_dev), dev_name(&port->dev),
 				__func__, cxld->interleave_ways,
 				cxld->interleave_granularity,
-				(cxld->flags & CXL_DECODER_F_ENABLE) ?
-					"enabled" :
-					"disabled",
+				str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE),
 				cxld->hpa_range.start, cxld->hpa_range.end);
 			return -ENXIO;
 		}

From bcb76b20b4b26516cd3b90087687578a731950e3 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Mon, 4 Aug 2025 01:00:09 -0700
Subject: [PATCH 004/143] cxl: Move hpa_to_spa callback to a new root decoder
 ops structure

The root decoder's HPA to SPA translation logic was implemented using
a single function pointer. In preparation for additional per-decoder
callbacks, convert this into a struct cxl_rd_ops and move the
hpa_to_spa pointer into it.

To avoid maintaining a static ops instance populated with mostly NULL
pointers, allocate the ops structure dynamically only when a platform
requires overrides (e.g. XOR interleave decoding).

The setup can be extended as additional callbacks are added.

Co-developed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/818530c82c351a9c0d3a204f593068dd2126a5a9.1754290144.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 524b2b76f365fb90a7f894ac17261ea760464e2c)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c        | 10 +++++++---
 drivers/cxl/core/port.c   |  1 +
 drivers/cxl/core/region.c | 11 ++++++++---
 drivers/cxl/cxl.h         | 12 +++++++++---
 4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 87f0ed3f3f51f..de5f08122aa92 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -20,7 +20,6 @@ static const guid_t acpi_cxl_qtg_id_guid =
 	GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
 		  0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);
 
-
 static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
 {
 	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
@@ -472,8 +471,13 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 
 	cxlrd->qos_class = cfmws->qtg_id;
 
-	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR)
-		cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa;
+	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
+		cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL);
+		if (!cxlrd->ops)
+			return -ENOMEM;
+
+		cxlrd->ops->hpa_to_spa = cxl_xor_hpa_to_spa;
+	}
 
 	rc = cxl_decoder_add(cxld, target_map);
 	if (rc)
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 29197376b18e3..8f36ff413f5d5 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -450,6 +450,7 @@ static void cxl_root_decoder_release(struct device *dev)
 	if (atomic_read(&cxlrd->region_id) >= 0)
 		memregion_free(atomic_read(&cxlrd->region_id));
 	__cxl_decoder_release(&cxlrd->cxlsd.cxld);
+	kfree(cxlrd->ops);
 	kfree(cxlrd);
 }
 
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5c581b175013c..ef1f69ba8899d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2917,6 +2917,11 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
 	return false;
 }
 
+static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd)
+{
+	return cxlrd->ops && cxlrd->ops->hpa_to_spa;
+}
+
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa)
 {
@@ -2971,8 +2976,8 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	hpa = hpa_offset + p->res->start + p->cache_size;
 
 	/* Root decoder translation overrides typical modulo decode */
-	if (cxlrd->hpa_to_spa)
-		hpa = cxlrd->hpa_to_spa(cxlrd, hpa);
+	if (has_hpa_to_spa(cxlrd))
+		hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa);
 
 	if (!cxl_resource_contains_addr(p->res, hpa)) {
 		dev_dbg(&cxlr->dev,
@@ -2981,7 +2986,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	}
 
 	/* Simple chunk check, by pos & gran, only applies to modulo decodes */
-	if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
+	if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
 		return ULLONG_MAX;
 
 	return hpa;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 847e37be42c47..4b247ab188833 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -419,27 +419,33 @@ struct cxl_switch_decoder {
 };
 
 struct cxl_root_decoder;
-typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa);
+/**
+ * struct cxl_rd_ops - CXL root decoder callback operations
+ * @hpa_to_spa: Convert host physical address to system physical address
+ */
+struct cxl_rd_ops {
+	u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa);
+};
 
 /**
  * struct cxl_root_decoder - Static platform CXL address decoder
  * @res: host / parent resource for region allocations
  * @cache_size: extended linear cache size if exists, otherwise zero.
  * @region_id: region id for next region provisioning event
- * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address
  * @platform_data: platform specific configuration data
  * @range_lock: sync region autodiscovery by address range
  * @qos_class: QoS performance class cookie
+ * @ops: CXL root decoder operations
  * @cxlsd: base cxl switch decoder
  */
 struct cxl_root_decoder {
 	struct resource *res;
 	resource_size_t cache_size;
 	atomic_t region_id;
-	cxl_hpa_to_spa_fn hpa_to_spa;
 	void *platform_data;
 	struct mutex range_lock;
 	int qos_class;
+	struct cxl_rd_ops *ops;
 	struct cxl_switch_decoder cxlsd;
 };
 

From 9adb9f38bc2e1952346c75c12a58db3eda0d3e39 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Mon, 4 Aug 2025 01:00:10 -0700
Subject: [PATCH 005/143] cxl: Define a SPA->CXL HPA root decoder callback for
 XOR Math

When DPA->SPA translation was introduced, it included a helper that
applied the XOR maps to do the CXL HPA -> SPA translation for XOR
region interleaves. In preparation for adding SPA->DPA address
translation, introduce the reverse callback.

The root decoder callback is defined generically and not all usages
may be self inverting like this XOR function. Add another root decoder
callback that is the spa_to_hpa function.

Update the existing cxl_xor_hpa_to_spa() with a name that reflects
what it does without directionality: cxl_apply_xor_maps(), a generic
parameter: addr replaces hpa, and code comments stating that the
function supports the translation in either direction.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/79d9d72230c599cae94d7221781ead6392ae6d3f.1754290144.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b83ee9614a3ec196111f0ae54335b99700f78b45)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c | 27 ++++++++++++++++-----------
 drivers/cxl/cxl.h  |  2 ++
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index de5f08122aa92..b9ba1c33e4d24 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -20,7 +20,7 @@ static const guid_t acpi_cxl_qtg_id_guid =
 	GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
 		  0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);
 
-static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
+static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
 {
 	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
 	int hbiw = cxlrd->cxlsd.nr_targets;
@@ -29,19 +29,23 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
 
 	/* No xormaps for host bridge interleave ways of 1 or 3 */
 	if (hbiw == 1 || hbiw == 3)
-		return hpa;
+		return addr;
 
 	/*
-	 * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore
-	 * the position bit to its value before the xormap was applied at
-	 * HPA->DPA translation.
+	 * In regions using XOR interleave arithmetic the CXL HPA may not
+	 * be the same as the SPA. This helper performs the SPA->CXL HPA
+	 * or the CXL HPA->SPA translation. Since XOR is self-inverting,
+	 * so is this function.
+	 *
+	 * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the
+	 * xormaps will toggle a position bit.
 	 *
 	 * pos is the lowest set bit in an XORMAP
-	 * val is the XORALLBITS(HPA & XORMAP)
+	 * val is the XORALLBITS(addr & XORMAP)
 	 *
 	 * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS
 	 * as an operation that outputs a single bit by XORing all the
-	 * bits in the input (hpa & xormap). Implement XORALLBITS using
+	 * bits in the input (addr & xormap). Implement XORALLBITS using
 	 * hweight64(). If the hamming weight is even the XOR of those
 	 * bits results in val==0, if odd the XOR result is val==1.
 	 */
@@ -50,11 +54,11 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa)
 		if (!cximsd->xormaps[i])
 			continue;
 		pos = __ffs(cximsd->xormaps[i]);
-		val = (hweight64(hpa & cximsd->xormaps[i]) & 1);
-		hpa = (hpa & ~(1ULL << pos)) | (val << pos);
+		val = (hweight64(addr & cximsd->xormaps[i]) & 1);
+		addr = (addr & ~(1ULL << pos)) | (val << pos);
 	}
 
-	return hpa;
+	return addr;
 }
 
 struct cxl_cxims_context {
@@ -476,7 +480,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 		if (!cxlrd->ops)
 			return -ENOMEM;
 
-		cxlrd->ops->hpa_to_spa = cxl_xor_hpa_to_spa;
+		cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps;
+		cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps;
 	}
 
 	rc = cxl_decoder_add(cxld, target_map);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 4b247ab188833..4fe3df06f57a3 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -422,9 +422,11 @@ struct cxl_root_decoder;
 /**
  * struct cxl_rd_ops - CXL root decoder callback operations
  * @hpa_to_spa: Convert host physical address to system physical address
+ * @spa_to_hpa: Convert system physical address to host physical address
  */
 struct cxl_rd_ops {
 	u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa);
+	u64 (*spa_to_hpa)(struct cxl_root_decoder *cxlrd, u64 spa);
 };
 
 /**

From 81a3bda34d786fc94199f9cdf2c742e6df096e9f Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Mon, 4 Aug 2025 01:00:11 -0700
Subject: [PATCH 006/143] cxl/region: Introduce SPA to DPA address translation

Add infrastructure to translate System Physical Addresses (SPA) to
Device Physical Addresses (DPA) within CXL regions. This capability
will be used by follow-on patches that add poison inject and clear
operations at the region level.

The SPA-to-DPA translation process follows these steps:
1. Apply root decoder transformations (SPA to HPA) if configured.
2. Extract the position in region interleave from the HPA offset.
3. Extract the DPA offset from the HPA offset.
4. Use position to find endpoint decoder.
5. Use endpoint decoder to find memdev and calculate DPA from offset.
6. Return the result - a memdev and a DPA.

It is Step 1 above that makes this a driver level operation and not
work we can push to user space. Rather than exporting the XOR maps for
root decoders configured with XOR interleave, the driver performs this
complex calculation for the user.

Steps 2 and 3 follow the CXL Spec 3.2 Section 8.2.4.20.13
Implementation Note: Device Decode Logic.

These calculations mirror much of the logic introduced earlier in DPA
to SPA translation, see cxl_dpa_to_hpa(), where the driver needed to
reverse the spec defined 'Device Decode Logic'.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/422f0e27742c6ca9a11f7cd83e6ba9fa1a8d0c74.1754290144.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit dc181170491bda9944f95ca39017667fe7fd767d)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 101 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index ef1f69ba8899d..5892de29b470b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2922,6 +2922,11 @@ static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd)
 	return cxlrd->ops && cxlrd->ops->hpa_to_spa;
 }
 
+static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
+{
+	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
+}
+
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa)
 {
@@ -2992,6 +2997,102 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	return hpa;
 }
 
+struct dpa_result {
+	struct cxl_memdev *cxlmd;
+	u64 dpa;
+};
+
+static int __maybe_unused region_offset_to_dpa_result(struct cxl_region *cxlr,
+						      u64 offset,
+						      struct dpa_result *result)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_endpoint_decoder *cxled;
+	u64 hpa, hpa_offset, dpa_offset;
+	u64 bits_upper, bits_lower;
+	u64 shifted, rem, temp;
+	u16 eig = 0;
+	u8 eiw = 0;
+	int pos;
+
+	lockdep_assert_held(&cxl_rwsem.region);
+	lockdep_assert_held(&cxl_rwsem.dpa);
+
+	/* Input validation ensures valid ways and gran */
+	granularity_to_eig(p->interleave_granularity, &eig);
+	ways_to_eiw(p->interleave_ways, &eiw);
+
+	/*
+	 * If the root decoder has SPA to CXL HPA callback, use it. Otherwise
+	 * CXL HPA is assumed to equal SPA.
+	 */
+	if (has_spa_to_hpa(cxlrd)) {
+		hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset);
+		hpa_offset = hpa - p->res->start;
+	} else {
+		hpa_offset = offset;
+	}
+	/*
+	 * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
+	 * eiw < 8
+	 *	Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
+	 *	Per spec "remove IW bits starting with bit position IG+8"
+	 * eiw >= 8
+	 *	Position is not explicitly stored in HPA_OFFSET bits. It is
+	 *	derived from the modulo operation of the upper bits using
+	 *	the total number of interleave ways.
+	 */
+	if (eiw < 8) {
+		pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
+	} else {
+		shifted = hpa_offset >> (eig + 8);
+		div64_u64_rem(shifted, p->interleave_ways, &rem);
+		pos = rem;
+	}
+	if (pos < 0 || pos >= p->nr_targets) {
+		dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",
+			pos, p->nr_targets);
+		return -ENXIO;
+	}
+
+	/*
+	 * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
+	 * Lower bits [IG+7:0] pass through unchanged
+	 * (eiw < 8)
+	 *	Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
+	 *	Clear the position bits to isolate upper section, then
+	 *	reverse the left shift by eiw that occurred during DPA->HPA
+	 * (eiw >= 8)
+	 *	Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
+	 *	Extract upper bits from the correct bit range and divide by 3
+	 *	to recover the original DPA upper bits
+	 */
+	bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
+	if (eiw < 8) {
+		temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0));
+		dpa_offset = temp >> eiw;
+	} else {
+		bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
+		dpa_offset = bits_upper << (eig + 8);
+	}
+	dpa_offset |= bits_lower;
+
+	/* Look-up and return the result: a memdev and a DPA */
+	for (int i = 0; i < p->nr_targets; i++) {
+		cxled = p->targets[i];
+		if (cxled->pos != pos)
+			continue;
+		result->cxlmd = cxled_to_memdev(cxled);
+		result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset;
+
+		return 0;
+	}
+	dev_err(&cxlr->dev, "No device found for position %d\n", pos);
+
+	return -ENXIO;
+}
+
 static struct lock_class_key cxl_pmem_region_key;
 
 static int cxl_pmem_region_alloc(struct cxl_region *cxlr)

From 39224db3084bf946efba3b9eece03e5253ce329c Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Mon, 4 Aug 2025 01:00:12 -0700
Subject: [PATCH 007/143] cxl/core: Add locked variants of the poison inject
 and clear funcs

The core functions that validate and send inject and clear commands
to the memdev devices require holding both the dpa_rwsem and the
region_rwsem.

In preparation for another caller of these functions that must hold
the locks upon entry, split the work into a locked and unlocked pair.

Consideration was given to moving the locking to both callers,
however, the existing caller is not in the core (mem.c) and cannot
access the locks.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/1d601f586975195733984ca63d1b5789bbe8690f.1754290144.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 25a0207828bc52f1ebb6588f9417eb43ca4960a3)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/memdev.c | 52 +++++++++++++++++++++++++++------------
 drivers/cxl/cxlmem.h      |  2 ++
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index c569e00a511f4..90d3390d9c7c6 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -276,7 +276,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa)
 	return 0;
 }
 
-int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
+int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa)
 {
 	struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
 	struct cxl_mbox_inject_poison inject;
@@ -288,13 +288,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
-	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
-		return rc;
-
-	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
-	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
-		return rc;
+	lockdep_assert_held(&cxl_rwsem.dpa);
+	lockdep_assert_held(&cxl_rwsem.region);
 
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
@@ -324,9 +319,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
 
 	return 0;
 }
+
+int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+	int rc;
+
+	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+		return rc;
+
+	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+		return rc;
+
+	return cxl_inject_poison_locked(cxlmd, dpa);
+}
 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL");
 
-int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
+int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa)
 {
 	struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
 	struct cxl_mbox_clear_poison clear;
@@ -338,13 +348,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
-	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
-	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
-		return rc;
-
-	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
-	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
-		return rc;
+	lockdep_assert_held(&cxl_rwsem.dpa);
+	lockdep_assert_held(&cxl_rwsem.region);
 
 	rc = cxl_validate_poison_dpa(cxlmd, dpa);
 	if (rc)
@@ -383,6 +388,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 
 	return 0;
 }
+
+int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
+{
+	int rc;
+
+	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+		return rc;
+
+	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+		return rc;
+
+	return cxl_clear_poison_locked(cxlmd, dpa);
+}
 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL");
 
 static struct attribute *cxl_memdev_attributes[] = {
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 751478dfc4106..434031a0c1f74 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -869,6 +869,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
+int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa);
+int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa);
 
 #ifdef CONFIG_CXL_EDAC_MEM_FEATURES
 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd);

From 1b68abfb430981aaa26a99b333e7e4894e6bedcc Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Mon, 4 Aug 2025 01:00:13 -0700
Subject: [PATCH 008/143] cxl/region: Add inject and clear poison by region
 offset

Add CXL region debugfs attributes to inject and clear poison based
on an offset into the region. These new interfaces allow users to
operate on poison at the region level without needing to resolve
Device Physical Addresses (DPA) or target individual memdevs.

The implementation uses a new helper, region_offset_to_dpa_result()
that applies decoder interleave logic, including XOR-based address
decoding when applicable. Note that XOR decodes rely on driver
internal xormaps which are not exposed to userspace. So, this support
is not only a simplification of poison operations that could be done
using existing per memdev operations, but also it enables this
functionality for XOR interleaved regions for the first time.

New debugfs attributes are added in /sys/kernel/debug/cxl/regionX/:
inject_poison and clear_poison. These are only exposed if all memdevs
participating in the region support both inject and clear commands,
ensuring consistent and reliable behavior across multi-device regions.

If tracing is enabled, these operations are logged as cxl_poison
events in /sys/kernel/tracing/trace.

The ABI documentation warns users of the significant risks that
come with using these capabilities.

A CXL Maturity Map update shows this user flow is now supported.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/f3fd8628ab57ea79704fb2d645902cd499c066af.1754290144.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit c3dd67681c70cc95cc2c889b1b58a1667bb1c48b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/ABI/testing/debugfs-cxl         |  87 ++++++++++++
 Documentation/driver-api/cxl/maturity-map.rst |   2 +-
 drivers/cxl/core/core.h                       |   4 +
 drivers/cxl/core/memdev.c                     |   8 ++
 drivers/cxl/core/region.c                     | 131 +++++++++++++++++-
 5 files changed, 228 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl
index e95e21f131e96..2989d4da96c1b 100644
--- a/Documentation/ABI/testing/debugfs-cxl
+++ b/Documentation/ABI/testing/debugfs-cxl
@@ -19,6 +19,20 @@ Description:
 		is returned to the user. The inject_poison attribute is only
 		visible for devices supporting the capability.
 
+		TEST-ONLY INTERFACE: This interface is intended for testing
+		and validation purposes only. It is not a data repair mechanism
+		and should never be used on production systems or live data.
+
+		DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+		poison injection can result in permanent data loss. Injected
+		poison may render data permanently inaccessible even after
+		clearing, as the clear operation writes zeros and does not
+		recover original data.
+
+		SYSTEM STABILITY RISK: For volatile memory, poison injection
+		can cause kernel crashes, system instability, or unpredictable
+		behavior if the poisoned addresses are accessed by running code
+		or critical kernel structures.
 
 What:		/sys/kernel/debug/cxl/memX/clear_poison
 Date:		April, 2023
@@ -35,6 +49,79 @@ Description:
 		The clear_poison attribute is only visible for devices
 		supporting the capability.
 
+		TEST-ONLY INTERFACE: This interface is intended for testing
+		and validation purposes only. It is not a data repair mechanism
+		and should never be used on production systems or live data.
+
+		CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+		specified address range and removes the address from the poison
+		list. It does NOT recover or restore original data that may have
+		been present before poison injection. Any original data at the
+		cleared address is permanently lost and replaced with zeros.
+
+		CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+		purposes only and should not be used as a data repair tool.
+		Clearing poison is fundamentally different from data recovery
+		or error correction.
+
+What:		/sys/kernel/debug/cxl/regionX/inject_poison
+Date:		August, 2025
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) When a Host Physical Address (HPA) is written to this
+		attribute, the region driver translates it to a Device
+		Physical Address (DPA) and identifies the corresponding
+		memdev. It then sends an inject poison command to that memdev
+		at the translated DPA. Refer to the memdev ABI entry at:
+		/sys/kernel/debug/cxl/memX/inject_poison for the detailed
+		behavior. This attribute is only visible if all memdevs
+		participating in the region support both inject and clear
+		poison commands.
+
+		TEST-ONLY INTERFACE: This interface is intended for testing
+		and validation purposes only. It is not a data repair mechanism
+		and should never be used on production systems or live data.
+
+		DATA LOSS RISK: For CXL persistent memory (PMEM) devices,
+		poison injection can result in permanent data loss. Injected
+		poison may render data permanently inaccessible even after
+		clearing, as the clear operation writes zeros and does not
+		recover original data.
+
+		SYSTEM STABILITY RISK: For volatile memory, poison injection
+		can cause kernel crashes, system instability, or unpredictable
+		behavior if the poisoned addresses are accessed by running code
+		or critical kernel structures.
+
+What:		/sys/kernel/debug/cxl/regionX/clear_poison
+Date:		August, 2025
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) When a Host Physical Address (HPA) is written to this
+		attribute, the region driver translates it to a Device
+		Physical Address (DPA) and identifies the corresponding
+		memdev. It then sends a clear poison command to that memdev
+		at the translated DPA. Refer to the memdev ABI entry at:
+		/sys/kernel/debug/cxl/memX/clear_poison for the detailed
+		behavior. This attribute is only visible if all memdevs
+		participating in the region support both inject and clear
+		poison commands.
+
+		TEST-ONLY INTERFACE: This interface is intended for testing
+		and validation purposes only. It is not a data repair mechanism
+		and should never be used on production systems or live data.
+
+		CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the
+		specified address range and removes the address from the poison
+		list. It does NOT recover or restore original data that may have
+		been present before poison injection. Any original data at the
+		cleared address is permanently lost and replaced with zeros.
+
+		CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing
+		purposes only and should not be used as a data repair tool.
+		Clearing poison is fundamentally different from data recovery
+		or error correction.
+
 What:		/sys/kernel/debug/cxl/einj_types
 Date:		January, 2024
 KernelVersion:	v6.9
diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst
index 1330f3f52129a..282c1102dd819 100644
--- a/Documentation/driver-api/cxl/maturity-map.rst
+++ b/Documentation/driver-api/cxl/maturity-map.rst
@@ -173,7 +173,7 @@ Accelerator
 User Flow Support
 -----------------
 
-* [0] Inject & clear poison by HPA
+* [2] Inject & clear poison by region offset
 
 Details
 =======
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 2669f251d6775..eac8cc1bdaa07 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -135,6 +135,10 @@ enum cxl_poison_trace_type {
 	CXL_POISON_TRACE_CLEAR,
 };
 
+enum poison_cmd_enabled_bits;
+bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd,
+			       enum poison_cmd_enabled_bits cmd);
+
 long cxl_pci_get_latency(struct pci_dev *pdev);
 int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
 int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 90d3390d9c7c6..e370d733e4400 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -200,6 +200,14 @@ static ssize_t security_erase_store(struct device *dev,
 static struct device_attribute dev_attr_security_erase =
 	__ATTR(erase, 0200, NULL, security_erase_store);
 
+bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd,
+			       enum poison_cmd_enabled_bits cmd)
+{
+	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+	return test_bit(cmd, mds->poison.enabled_cmds);
+}
+
 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
 {
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5892de29b470b..04d326c274875 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
 #include <linux/memregion.h>
 #include <linux/genalloc.h>
+#include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/memory.h>
@@ -3002,9 +3003,8 @@ struct dpa_result {
 	u64 dpa;
 };
 
-static int __maybe_unused region_offset_to_dpa_result(struct cxl_region *cxlr,
-						      u64 offset,
-						      struct dpa_result *result)
+static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
+				       struct dpa_result *result)
 {
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
@@ -3652,6 +3652,105 @@ static void shutdown_notifiers(void *_cxlr)
 	unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
 }
 
+static void remove_debugfs(void *dentry)
+{
+	debugfs_remove_recursive(dentry);
+}
+
+static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	resource_size_t region_size;
+	u64 hpa;
+
+	if (offset < p->cache_size) {
+		dev_err(&cxlr->dev,
+			"Offset %#llx is within extended linear cache %#llx\n",
+			offset, p->cache_size);
+		return -EINVAL;
+	}
+
+	region_size = resource_size(p->res);
+	if (offset >= region_size) {
+		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %#llx\n",
+			offset, region_size);
+		return -EINVAL;
+	}
+
+	hpa = p->res->start + offset;
+	if (hpa < p->res->start || hpa > p->res->end) {
+		dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa,
+			p->res);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
+{
+	struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
+	struct cxl_region *cxlr = data;
+	int rc;
+
+	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+		return rc;
+
+	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+		return rc;
+
+	if (validate_region_offset(cxlr, offset))
+		return -EINVAL;
+
+	rc = region_offset_to_dpa_result(cxlr, offset, &result);
+	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+		dev_dbg(&cxlr->dev,
+			"Failed to resolve DPA for region offset %#llx rc %d\n",
+			offset, rc);
+
+		return rc ? rc : -EINVAL;
+	}
+
+	return cxl_inject_poison_locked(result.cxlmd, result.dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL,
+			 cxl_region_debugfs_poison_inject, "%llx\n");
+
+static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
+{
+	struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL };
+	struct cxl_region *cxlr = data;
+	int rc;
+
+	ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &region_rwsem)))
+		return rc;
+
+	ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem)))
+		return rc;
+
+	if (validate_region_offset(cxlr, offset))
+		return -EINVAL;
+
+	rc = region_offset_to_dpa_result(cxlr, offset, &result);
+	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
+		dev_dbg(&cxlr->dev,
+			"Failed to resolve DPA for region offset %#llx rc %d\n",
+			offset, rc);
+
+		return rc ? rc : -EINVAL;
+	}
+
+	return cxl_clear_poison_locked(result.cxlmd, result.dpa);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
+			 cxl_region_debugfs_poison_clear, "%llx\n");
+
 static int cxl_region_can_probe(struct cxl_region *cxlr)
 {
 	struct cxl_region_params *p = &cxlr->params;
@@ -3681,6 +3780,7 @@ static int cxl_region_probe(struct device *dev)
 {
 	struct cxl_region *cxlr = to_cxl_region(dev);
 	struct cxl_region_params *p = &cxlr->params;
+	bool poison_supported = true;
 	int rc;
 
 	rc = cxl_region_can_probe(cxlr);
@@ -3704,6 +3804,31 @@ static int cxl_region_probe(struct device *dev)
 	if (rc)
 		return rc;
 
+	/* Create poison attributes if all memdevs support the capabilities */
+	for (int i = 0; i < p->nr_targets; i++) {
+		struct cxl_endpoint_decoder *cxled = p->targets[i];
+		struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+
+		if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) ||
+		    !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) {
+			poison_supported = false;
+			break;
+		}
+	}
+
+	if (poison_supported) {
+		struct dentry *dentry;
+
+		dentry = cxl_debugfs_create_dir(dev_name(dev));
+		debugfs_create_file("inject_poison", 0200, dentry, cxlr,
+				    &cxl_poison_inject_fops);
+		debugfs_create_file("clear_poison", 0200, dentry, cxlr,
+				    &cxl_poison_clear_fops);
+		rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
+		if (rc)
+			return rc;
+	}
+
 	switch (cxlr->mode) {
 	case CXL_PARTMODE_PMEM:
 		rc = devm_cxl_region_edac_register(cxlr);

From 803e7861416e492ce663cb32d61b70dfb816e458 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 18 Aug 2025 08:39:53 -0700
Subject: [PATCH 009/143] cxl: Fix emit of type resource_size_t argument for
 validate_region_offset()

0day reported warnings of:
drivers/cxl/core/region.c:3664:25: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=]

drivers/cxl/core/region.c:3671:37: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=]

Replace %#llx with %pr to emit resource_size_t arguments.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202508160513.NAZ9i9rQ-lkp@intel.com/
Cc: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20250818153953.3658952-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit e6a9530b3ee7407b70b60e4df70688db0d239e1a)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 04d326c274875..d9d65229eb58a 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3665,15 +3665,15 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
 
 	if (offset < p->cache_size) {
 		dev_err(&cxlr->dev,
-			"Offset %#llx is within extended linear cache %#llx\n",
-			offset, p->cache_size);
+			"Offset %#llx is within extended linear cache %pr\n",
+			offset, &p->cache_size);
 		return -EINVAL;
 	}
 
 	region_size = resource_size(p->res);
 	if (offset >= region_size) {
-		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %#llx\n",
-			offset, region_size);
+		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n",
+			offset, &region_size);
 		return -EINVAL;
 	}
 

From a77661f34717e3ad46331341dd5f1c3f61367a7e Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:04 -0700
Subject: [PATCH 010/143] mm/memory_hotplug: Update comment for hotplug memory
 callback priorities

Add clarification to comment for memory hotplug callback ordering as the
current comment does not provide clear language on which callback happens
first.

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 65128868bb3b0621d2d8e71f19852675a064b373)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 include/linux/memory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 2a770e7c6ab1e..d231a2323331a 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -115,8 +115,8 @@ struct notifier_block;
 struct mem_section;
 
 /*
- * Priorities for the hotplug memory callback routines (stored in decreasing
- * order in the callback chain)
+ * Priorities for the hotplug memory callback routines. Invoked from
+ * high to low. Higher priorities correspond to higher numbers.
  */
 #define DEFAULT_CALLBACK_PRI	0
 #define SLAB_CALLBACK_PRI	1

From 7615f21661b24857c82bf91baf50789873879790 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:05 -0700
Subject: [PATCH 011/143] drivers/base/node: Add a helper function
 node_update_perf_attrs()

Add helper function node_update_perf_attrs() to allow update of node access
coordinates computed by an external agent such as CXL. The helper allows
updating of coordinates after the attribute being created by HMAT.

Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b57fc652ca24ada3b0c888327f9944ed21559286)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/base/node.c  | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/node.h |  8 ++++++++
 2 files changed, 46 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 67b01d5797377..3e2329ccb618d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 }
 EXPORT_SYMBOL_GPL(node_set_perf_attrs);
 
+/**
+ * node_update_perf_attrs - Update the performance values for given access class
+ * @nid: Node identifier to be updated
+ * @coord: Heterogeneous memory performance coordinates
+ * @access: The access class for the given attributes
+ */
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access)
+{
+	struct node_access_nodes *access_node;
+	struct node *node;
+	int i;
+
+	if (WARN_ON_ONCE(!node_online(nid)))
+		return;
+
+	node = node_devices[nid];
+	list_for_each_entry(access_node, &node->access_list, list_node) {
+		if (access_node->access != access)
+			continue;
+
+		access_node->coord = *coord;
+		for (i = 0; access_attrs[i]; i++) {
+			sysfs_notify(&access_node->dev.kobj,
+				     NULL, access_attrs[i]->name);
+		}
+		break;
+	}
+
+	/* When setting CPU access coordinates, update mempolicy */
+	if (access != ACCESS_COORDINATE_CPU)
+		return;
+
+	if (mempolicy_set_node_perf(nid, coord))
+		pr_info("failed to set mempolicy attrs for node %d\n", nid);
+}
+EXPORT_SYMBOL_GPL(node_update_perf_attrs);
+
 /**
  * struct node_cache_info - Internal tracking for memory node caches
  * @dev:	Device represeting the cache level
diff --git a/include/linux/node.h b/include/linux/node.h
index 2c7529335b21a..866e3323f1fdc 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -85,6 +85,8 @@ struct node_cache_attrs {
 void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
 void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord,
 			 enum access_coordinate_class access);
+void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord,
+			    enum access_coordinate_class access);
 #else
 static inline void node_add_cache(unsigned int nid,
 				  struct node_cache_attrs *cache_attrs)
@@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid,
 				       enum access_coordinate_class access)
 {
 }
+
+static inline void node_update_perf_attrs(unsigned int nid,
+					  struct access_coordinate *coord,
+					  enum access_coordinate_class access)
+{
+}
 #endif
 
 struct node {

From 184617310d5312f33d2e527647482af30018bce7 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:06 -0700
Subject: [PATCH 012/143] cxl, acpi/hmat: Update CXL access coordinates
 directly instead of through HMAT

The current implementation of CXL memory hotplug notifier gets called
before the HMAT memory hotplug notifier. The CXL driver calculates the
access coordinates (bandwidth and latency values) for the CXL end to
end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL
memory hotplug notifier writes the access coordinates to the HMAT target
structs. Then the HMAT memory hotplug notifier is called and it creates
the access coordinates for the node sysfs attributes.

During testing on an Intel platform, it was found that although the
newly calculated coordinates were pushed to sysfs, the sysfs attributes for
the access coordinates showed up with the wrong initiator. The system has
4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are
CXL nodes. The expectation is that node 2 would show up as a target to node
0:
/sys/devices/system/node/node2/access0/initiators/node0

However it was observed that node 2 showed up as a target under node 1:
/sys/devices/system/node/node2/access0/initiators/node1

The original intent of the 'ext_updated' flag in HMAT handling code was to
stop HMAT memory hotplug callback from clobbering the access coordinates
after CXL has injected its calculated coordinates and replaced the generic
target access coordinates provided by the HMAT table in the HMAT target
structs. However the flag is hacky at best and blocks the updates from
other CXL regions that are onlined in the same node later on. Remove the
'ext_updated' flag usage and just update the access coordinates for the
nodes directly without touching HMAT target data.

The hotplug memory callback ordering is changed. Instead of changing CXL,
move HMAT back so there's room for the levels rather than have CXL share
the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL
callback to be executed after the HMAT callback.

With the change, the CXL hotplug memory notifier runs after the HMAT
callback. The HMAT callback will create the node sysfs attributes for
access coordinates. The CXL callback will write the access coordinates to
the now created node sysfs attributes directly and will not pollute the
HMAT target values.

A nodemask is introduced to keep track if a node has been updated and
prevents further updates.

Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region")
Cc: stable@vger.kernel.org
Tested-by: Marc Herbert <marc.herbert@linux.intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 2e454fb8056df6da4bba7d89a57bf60e217463c0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/acpi/numa/hmat.c  |  6 ------
 drivers/cxl/core/cdat.c   |  5 -----
 drivers/cxl/core/core.h   |  1 -
 drivers/cxl/core/region.c | 20 ++++++++++++--------
 include/linux/memory.h    |  2 +-
 5 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 9085375830605..f153a2c18f1d9 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -74,7 +74,6 @@ struct memory_target {
 	struct node_cache_attrs cache_attrs;
 	u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
 	bool registered;
-	bool ext_updated;	/* externally updated */
 };
 
 struct memory_initiator {
@@ -391,7 +390,6 @@ int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
 				  coord->read_bandwidth, access);
 	hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
 				  coord->write_bandwidth, access);
-	target->ext_updated = true;
 
 	return 0;
 }
@@ -773,10 +771,6 @@ static void hmat_update_target_attrs(struct memory_target *target,
 	u32 best = 0;
 	int i;
 
-	/* Don't update if an external agent has changed the data.  */
-	if (target->ext_updated)
-		return;
-
 	/* Don't update for generic port if there's no device handle */
 	if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL ||
 	     access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index c0af645425f4a..c891fd618cfda 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -1081,8 +1081,3 @@ int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 {
 	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
 }
-
-bool cxl_need_node_perf_attrs_update(int nid)
-{
-	return !acpi_node_backed_by_real_pxm(nid);
-}
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index eac8cc1bdaa07..e5157a328f30c 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -143,7 +143,6 @@ long cxl_pci_get_latency(struct pci_dev *pdev);
 int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
 int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
 				       enum access_coordinate_class access);
-bool cxl_need_node_perf_attrs_update(int nid);
 int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 					struct access_coordinate *c);
 
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index d9d65229eb58a..238b148768148 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -32,6 +32,12 @@
  * 3. Decoder targets
  */
 
+/*
+ * nodemask that sets per node when the access_coordinates for the node has
+ * been updated by the CXL memory hotplug notifier.
+ */
+static nodemask_t nodemask_region_seen = NODE_MASK_NONE;
+
 static struct cxl_region *to_cxl_region(struct device *dev);
 
 #define __ACCESS_ATTR_RO(_level, _name) {				\
@@ -2442,14 +2448,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 
 	for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
 		if (cxlr->coord[i].read_bandwidth) {
-			rc = 0;
-			if (cxl_need_node_perf_attrs_update(nid))
-				node_set_perf_attrs(nid, &cxlr->coord[i], i);
-			else
-				rc = cxl_update_hmat_access_coordinates(nid, cxlr, i);
-
-			if (rc == 0)
-				cset++;
+			node_update_perf_attrs(nid, &cxlr->coord[i], i);
+			cset++;
 		}
 	}
 
@@ -2487,6 +2487,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 	if (nid != region_nid)
 		return NOTIFY_DONE;
 
+	/* No action needed if node bit already set */
+	if (node_test_and_set(nid, nodemask_region_seen))
+		return NOTIFY_DONE;
+
 	if (!cxl_region_update_coordinates(cxlr, nid))
 		return NOTIFY_DONE;
 
diff --git a/include/linux/memory.h b/include/linux/memory.h
index d231a2323331a..55f0a47c85ebf 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -120,8 +120,8 @@ struct mem_section;
  */
 #define DEFAULT_CALLBACK_PRI	0
 #define SLAB_CALLBACK_PRI	1
-#define HMAT_CALLBACK_PRI	2
 #define CXL_CALLBACK_PRI	5
+#define HMAT_CALLBACK_PRI	6
 #define MM_COMPUTE_BATCH_PRI	10
 #define CPUSET_CALLBACK_PRI	10
 #define MEMTIER_HOTPLUG_PRI	100

From dccc854c5b09521b1a8a2a3d80f87098c65c2ab3 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 15:29:07 -0700
Subject: [PATCH 013/143] acpi/hmat: Remove now unused
 hmat_update_target_coordinates()

Remove deadcode since CXL no longer calls hmat_update_target_coordinates().

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20250829222907.1290912-5-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit e99ecbc4c89adf551cccbbc00b5cb08c50969af6)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/acpi/numa/hmat.c | 28 ----------------------------
 drivers/cxl/core/cdat.c  |  6 ------
 drivers/cxl/core/core.h  |  2 --
 include/linux/acpi.h     | 12 ------------
 4 files changed, 48 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index f153a2c18f1d9..11e4483685c9c 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -367,34 +367,6 @@ static void hmat_update_target_access(struct memory_target *target,
 	}
 }
 
-int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
-				   enum access_coordinate_class access)
-{
-	struct memory_target *target;
-	int pxm;
-
-	if (nid == NUMA_NO_NODE)
-		return -EINVAL;
-
-	pxm = node_to_pxm(nid);
-	guard(mutex)(&target_lock);
-	target = find_mem_target(pxm);
-	if (!target)
-		return -ENODEV;
-
-	hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY,
-				  coord->read_latency, access);
-	hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY,
-				  coord->write_latency, access);
-	hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH,
-				  coord->read_bandwidth, access);
-	hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
-				  coord->write_bandwidth, access);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(hmat_update_target_coordinates);
-
 static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
 {
 	struct memory_locality *loc;
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index c891fd618cfda..bca1ec279651d 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -1075,9 +1075,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
 		cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
 	}
 }
-
-int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
-				       enum access_coordinate_class access)
-{
-	return hmat_update_target_coordinates(nid, &cxlr->coord[access], access);
-}
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e5157a328f30c..5707cd60a8eb0 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -141,8 +141,6 @@ bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd,
 
 long cxl_pci_get_latency(struct pci_dev *pdev);
 int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
-int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr,
-				       enum access_coordinate_class access);
 int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 					struct access_coordinate *c);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 42cbeaba2a510..0c6087ea979b2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1637,18 +1637,6 @@ static inline void acpi_use_parent_companion(struct device *dev)
 	ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent));
 }
 
-#ifdef CONFIG_ACPI_HMAT
-int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
-				   enum access_coordinate_class access);
-#else
-static inline int hmat_update_target_coordinates(int nid,
-						 struct access_coordinate *coord,
-						 enum access_coordinate_class access)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
 bool acpi_node_backed_by_real_pxm(int nid);
 #else

From c21080e76843e965ec8736b8e6dd77f0f6678939 Mon Sep 17 00:00:00 2001
From: Rakuram Eswaran <rakuram.e96@gmail.com>
Date: Mon, 18 Aug 2025 23:23:34 +0530
Subject: [PATCH 014/143] Documentation/driver-api: Fix typo error in cxl

Fixed the following typo errors

intersparsed ==> interspersed
in Documentation/driver-api/cxl/platform/bios-and-efi.rst

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/20250818175335.5312-1-rakuram.e96@gmail.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit a414408126d13d6d5b2d2c4e537295771cc256cb)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/driver-api/cxl/platform/bios-and-efi.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/cxl/platform/bios-and-efi.rst b/Documentation/driver-api/cxl/platform/bios-and-efi.rst
index 645322632cc9b..a9aa0ccd92af7 100644
--- a/Documentation/driver-api/cxl/platform/bios-and-efi.rst
+++ b/Documentation/driver-api/cxl/platform/bios-and-efi.rst
@@ -202,7 +202,7 @@ future and such a configuration should be avoided.
 
 Memory Holes
 ------------
-If your platform includes memory holes intersparsed between your CXL memory, it
+If your platform includes memory holes interspersed between your CXL memory, it
 is recommended to utilize multiple decoders to cover these regions of memory,
 rather than try to program the decoders to accept the entire range and expect
 Linux to manage the overlap.

From feab42d8032e913a466f30c41c1a07b8db37aacc Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 8 Sep 2025 09:00:34 -0700
Subject: [PATCH 015/143] cxl/acpi: Rename CFMW coherency restrictions

ACPICA commit 710745713ad3a2543dbfb70e84764f31f0e46bdc

This has been renamed in more recent CXL specs, as
type3 (memory expanders) can also use HDM-DB for
device coherent memory.

Link: https://github.com/acpica/acpica/commit/710745713ad3a2543dbfb70e84764f31f0e46bdc
Acked-by: Rafael J. Wysocki (Intel) <rafael@kernel.org>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20250908160034.86471-1-dave@stgolabs.net
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit c4272905c37930c19b54fa3549b22899122ce69e)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c           |  4 ++--
 include/acpi/actbl1.h        |  4 ++--
 tools/testing/cxl/test/cxl.c | 18 +++++++++---------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index b9ba1c33e4d24..b8f124685f1dc 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -116,9 +116,9 @@ static unsigned long cfmws_to_decoder_flags(int restrictions)
 {
 	unsigned long flags = CXL_DECODER_F_ENABLE;
 
-	if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2)
+	if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_DEVMEM)
 		flags |= CXL_DECODER_F_TYPE2;
-	if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3)
+	if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM)
 		flags |= CXL_DECODER_F_TYPE3;
 	if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE)
 		flags |= CXL_DECODER_F_RAM;
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 99fd1588ff382..eb787dfbd2fa0 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -560,8 +560,8 @@ struct acpi_cedt_cfmws_target_element {
 
 /* Values for Restrictions field above */
 
-#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2      (1)
-#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3      (1<<1)
+#define ACPI_CEDT_CFMWS_RESTRICT_DEVMEM      (1)
+#define ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM (1<<1)
 #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE   (1<<2)
 #define ACPI_CEDT_CFMWS_RESTRICT_PMEM       (1<<3)
 #define ACPI_CEDT_CFMWS_RESTRICT_FIXED      (1<<4)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index f4dceecf7e335..8b5b8d17b8b84 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -210,7 +210,7 @@ static struct {
 			},
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 4UL,
@@ -225,7 +225,7 @@ static struct {
 			},
 			.interleave_ways = 1,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 8UL,
@@ -240,7 +240,7 @@ static struct {
 			},
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 4UL,
@@ -255,7 +255,7 @@ static struct {
 			},
 			.interleave_ways = 1,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 8UL,
@@ -270,7 +270,7 @@ static struct {
 			},
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 4UL,
@@ -285,7 +285,7 @@ static struct {
 			},
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_VOLATILE,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M,
@@ -302,7 +302,7 @@ static struct {
 			.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
 			.interleave_ways = 0,
 			.granularity = 4,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 8UL,
@@ -318,7 +318,7 @@ static struct {
 			.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
 			.interleave_ways = 1,
 			.granularity = 0,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_256M * 8UL,
@@ -334,7 +334,7 @@ static struct {
 			.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
 			.interleave_ways = 8,
 			.granularity = 1,
-			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
+			.restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM |
 					ACPI_CEDT_CFMWS_RESTRICT_PMEM,
 			.qtg_id = FAKE_QTG_ID,
 			.window_size = SZ_512M * 6UL,

From 7c6f7d990111b793bacc0665a486305417fa69c2 Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fabio.m.de.francesco@linux.intel.com>
Date: Mon, 15 Sep 2025 16:57:20 +0200
Subject: [PATCH 016/143] cxl: Documentation/driver-api/cxl: Describe the x86
 Low Memory Hole solution

Add documentation on how to resolve conflicts between CXL Fixed Memory
Windows, Platform Low Memory Holes, intermediate Switch and Endpoint
Decoders.

[dj]: Fixed inconsistent spacing after '.'
[dj]: Fixed subject line from Alison.
[dj]: Removed '::' before table from Bagas.

Reviewed-by: Gregory Price <gourry@gourry.net>
Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit c5dca38633daa1e240144bac453cf9065604a413)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/driver-api/cxl/conventions.rst | 135 +++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst
index da347a81a237a..e37336d7b116e 100644
--- a/Documentation/driver-api/cxl/conventions.rst
+++ b/Documentation/driver-api/cxl/conventions.rst
@@ -45,3 +45,138 @@ Detailed Description of the Change
 ----------------------------------
 
 <Propose spec language that corrects the conflict.>
+
+
+Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders
+============================================================================
+
+Document
+--------
+
+CXL Revision 3.2, Version 1.0
+
+License
+-------
+
+SPDX-License Identifier: CC-BY-4.0
+
+Creator/Contributors
+--------------------
+
+- Fabio M. De Francesco, Intel
+- Dan J. Williams, Intel
+- Mahesh Natu, Intel
+
+Summary of the Change
+---------------------
+
+According to the current Compute Express Link (CXL) Specifications (Revision
+3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero
+or more Host Physical Address (HPA) windows associated with each CXL Host
+Bridge. Each window represents a contiguous HPA range that may be interleaved
+across one or more targets, including CXL Host Bridges. Each window has a set
+of restrictions that govern its usage. It is the Operating System-directed
+configuration and Power Management (OSPM) responsibility to utilize each window
+for the specified use.
+
+Table 9-22 of the current CXL Specifications states that the Window Size field
+contains the total number of consecutive bytes of HPA this window describes.
+This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB.
+
+Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a
+memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases,
+the CFMWS Range Size may not adhere to the NIW * 256 MB rule.
+
+The HPA represents the actual physical memory address space that the CXL devices
+can decode and respond to, while the System Physical Address (SPA), a related
+but distinct concept, represents the system-visible address space that users can
+direct transaction to and so it excludes reserved regions.
+
+BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms
+with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole,
+resulting in lost capacity in the Endpoints with no SPA to map to that part of
+the HPA range that intersects the hole.
+
+E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB:
+
+ +--------+------------+-------------------+------------------+-------------------+------+
+ | Window | CFMWS Base |    CFMWS Size     | HDM Decoder Base |  HDM Decoder Size | Ways |
+ +========+============+===================+==================+===================+======+
+ |   0    |   0 GB     |       2 GB        |      0 GB        |       3 GB        |  12  |
+ +--------+------------+-------------------+------------------+-------------------+------+
+ |   1    |   4 GB     | NIW*256MB Aligned |      4 GB        | NIW*256MB Aligned |  12  |
+ +--------+------------+-------------------+------------------+-------------------+------+
+
+HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of
+a 12 ways region and all the intermediate Switch Decoders. They are configured
+by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of
+3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root
+Decoder HPA range that results smaller (2GB) than that of the Switch and
+Endpoint Decoders in the hierarchy (3GB).
+
+This creates 2 issues which lead to a failure to construct a region:
+
+1) A mismatch in region size between root and any HDM decoder. The root decoders
+   will always be smaller due to the trim.
+
+2) The trim causes the root decoder to violate the (NIW * 256MB) rule.
+
+This change allows a region with a base address of 0GB to bypass these checks to
+allow for region creation with the trimmed root decoder address range.
+
+This change does not allow for any other arbitrary region to violate these
+checks - it is intended exclusively to enable x86 platforms which map CXL memory
+under 4GB.
+
+Despite the HDM decoders covering the PCIE hole HPA region, it is expected that
+the platform will never route address accesses to the CXL complex because the
+root decoder only covers the trimmed region (which excludes this). This is
+outside the ability of Linux to enforce.
+
+On the example platform, only the first 2GB will be potentially usable, but
+Linux, aiming to adhere to the current specifications, fails to construct
+Regions and attach Endpoint and intermediate Switch Decoders to them.
+
+There are several points of failure that due to the expectation that the Root
+Decoder HPA size, that is equal to the CFMWS from which it is configured, has
+to be greater or equal to the matching Switch and Endpoint HDM Decoders.
+
+In order to succeed with construction and attachment, Linux must construct a
+Region with Root Decoder HPA range size, and then attach to that all the
+intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy
+regardless of their range sizes.
+
+Benefits of the Change
+----------------------
+
+Without the change, the OSPM wouldn't match intermediate Switch and Endpoint
+Decoders with Root Decoders configured with CFMWS HPA sizes that don't align
+with the NIW * 256MB constraint, and so it leads to lost memdev capacity.
+
+This change allows the OSPM to construct Regions and attach intermediate Switch
+and Endpoint Decoders to them, so that the addressable part of the memory
+devices total capacity is made available to the users.
+
+References
+----------
+
+Compute Express Link Specification Revision 3.2, Version 1.0
+<https://www.computeexpresslink.org/>
+
+Detailed Description of the Change
+----------------------------------
+
+The description of the Window Size field in table 9-22 needs to account for
+platforms with Low Memory Holes, where SPA ranges might be subsets of the
+endpoints HPA. Therefore, it has to be changed to the following:
+
+"The total number of consecutive bytes of HPA this window represents. This value
+shall be a multiple of NIW * 256 MB.
+
+On platforms that reserve physical addresses below 4 GB, such as the Low Memory
+Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might
+have a size that doesn't align with the NIW * 256 MB constraint.
+
+Note that the matching intermediate Switch Decoders and the Endpoint Decoders
+HPA range sizes must still align to the above-mentioned rule, but the memory
+capacity that exceeds the CFMWS window size won't be accessible.".

From cec28eff80d39cb43b14c94a176107cb13aea1b7 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:19 -0700
Subject: [PATCH 017/143] cxl: Add helper to detect top of CXL device topology

Add a helper to replace the open code detection of CXL device hierarchy
root, or the host bridge. The helper will be used for delayed downstream
port (dport) creation.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Robert Richter <rrichter@amd.com>
Tested-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 4fde89539a18d39169a511fda00db65eeba1a8e0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/port.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 8f36ff413f5d5..66c0c849c4a0d 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -33,6 +33,15 @@
 static DEFINE_IDA(cxl_port_ida);
 static DEFINE_XARRAY(cxl_root_buses);
 
+/*
+ * The terminal device in PCI is NULL and @platform_bus
+ * for platform devices (for cxl_test)
+ */
+static bool is_cxl_host_bridge(struct device *dev)
+{
+	return (!dev || dev == &platform_bus);
+}
+
 int cxl_num_decoders_committed(struct cxl_port *port)
 {
 	lockdep_assert_held(&cxl_rwsem.region);
@@ -1542,7 +1551,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 	resource_size_t component_reg_phys;
 	int rc;
 
-	if (!dparent) {
+	if (is_cxl_host_bridge(dparent)) {
 		/*
 		 * The iteration reached the topology root without finding the
 		 * CXL-root 'cxl_port' on a previous iteration, fail for now to
@@ -1630,11 +1639,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
 		struct device *uport_dev;
 		struct cxl_dport *dport;
 
-		/*
-		 * The terminal "grandparent" in PCI is NULL and @platform_bus
-		 * for platform devices
-		 */
-		if (!dport_dev || dport_dev == &platform_bus)
+		if (is_cxl_host_bridge(dport_dev))
 			return 0;
 
 		uport_dev = dport_dev->parent;

From 05e634c07a82cc42ce2e96f4247a7225516e578c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:20 -0700
Subject: [PATCH 018/143] cxl: Add helper to delete dport

Refactor the code in reap_dports() out to provide a helper function that
reaps a single dport. This will be used later in the cleanup path for
allocating a dport. Renaming to del_port() and del_dports() to mirror
devm_cxl_add_dport().

[dj] Fixed up subject per Robert

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 8330671c57c7056ef5e1e8dccfcdda7d5fe6d0b0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/port.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 66c0c849c4a0d..dbea9feacdddf 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1433,7 +1433,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL");
  * through ->remove(). This "bottom-up" removal selectively removes individual
  * child ports manually. This depends on devm_cxl_add_port() to not change is
  * devm action registration order, and for dports to have already been
- * destroyed by reap_dports().
+ * destroyed by del_dports().
  */
 static void delete_switch_port(struct cxl_port *port)
 {
@@ -1442,18 +1442,24 @@ static void delete_switch_port(struct cxl_port *port)
 	devm_release_action(port->dev.parent, unregister_port, port);
 }
 
-static void reap_dports(struct cxl_port *port)
+static void del_dport(struct cxl_dport *dport)
+{
+	struct cxl_port *port = dport->port;
+
+	devm_release_action(&port->dev, cxl_dport_unlink, dport);
+	devm_release_action(&port->dev, cxl_dport_remove, dport);
+	devm_kfree(&port->dev, dport);
+}
+
+static void del_dports(struct cxl_port *port)
 {
 	struct cxl_dport *dport;
 	unsigned long index;
 
 	device_lock_assert(&port->dev);
 
-	xa_for_each(&port->dports, index, dport) {
-		devm_release_action(&port->dev, cxl_dport_unlink, dport);
-		devm_release_action(&port->dev, cxl_dport_remove, dport);
-		devm_kfree(&port->dev, dport);
-	}
+	xa_for_each(&port->dports, index, dport)
+		del_dport(dport);
 }
 
 struct detach_ctx {
@@ -1511,7 +1517,7 @@ static void cxl_detach_ep(void *data)
 			 */
 			died = true;
 			port->dead = true;
-			reap_dports(port);
+			del_dports(port);
 		}
 		device_unlock(&port->dev);
 

From d56872375583099739bfc4d4ba8f59bcd1a1a76f Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:21 -0700
Subject: [PATCH 019/143] cxl: Add a cached copy of target_map to cxl_decoder

Add a cached copy of the hardware port-id list that is available at init
before all @dport objects have been instantiated. Change is in preparation
of delayed dport instantiation.

Reviewed-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 02edab6ceefaaf8cb917e864d8c26dbac0ea9686)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c           |  7 +++----
 drivers/cxl/core/hdm.c       | 20 ++++++++------------
 drivers/cxl/core/port.c      | 22 +++++++---------------
 drivers/cxl/core/region.c    |  4 +++-
 drivers/cxl/cxl.h            |  8 ++++++--
 tools/testing/cxl/test/cxl.c |  8 ++++----
 6 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index b8f124685f1dc..bd2e282ca93a0 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -401,7 +401,6 @@ DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T))
 static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 			     struct cxl_cfmws_context *ctx)
 {
-	int target_map[CXL_DECODER_MAX_INTERLEAVE];
 	struct cxl_port *root_port = ctx->root_port;
 	struct cxl_cxims_context cxims_ctx;
 	struct device *dev = ctx->dev;
@@ -419,8 +418,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 	rc = eig_to_granularity(cfmws->granularity, &ig);
 	if (rc)
 		return rc;
-	for (i = 0; i < ways; i++)
-		target_map[i] = cfmws->interleave_targets[i];
 
 	struct resource *res __free(del_cxl_resource) = alloc_cxl_resource(
 		cfmws->base_hpa, cfmws->window_size, ctx->id++);
@@ -446,6 +443,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 		.end = cfmws->base_hpa + cfmws->window_size - 1,
 	};
 	cxld->interleave_ways = ways;
+	for (i = 0; i < ways; i++)
+		cxld->target_map[i] = cfmws->interleave_targets[i];
 	/*
 	 * Minimize the x1 granularity to advertise support for any
 	 * valid region granularity
@@ -484,7 +483,7 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 		cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps;
 	}
 
-	rc = cxl_decoder_add(cxld, target_map);
+	rc = cxl_decoder_add(cxld);
 	if (rc)
 		return rc;
 
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 777b8ac0c49c1..13c53b9c17d13 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -21,12 +21,11 @@ struct cxl_rwsem cxl_rwsem = {
 	.dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa),
 };
 
-static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
-			   int *target_map)
+static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld)
 {
 	int rc;
 
-	rc = cxl_decoder_add_locked(cxld, target_map);
+	rc = cxl_decoder_add_locked(cxld);
 	if (rc) {
 		put_device(&cxld->dev);
 		dev_err(&port->dev, "Failed to add decoder\n");
@@ -54,7 +53,6 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	struct cxl_switch_decoder *cxlsd;
 	struct cxl_dport *dport = NULL;
-	int single_port_map[1];
 	unsigned long index;
 	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
 
@@ -73,9 +71,9 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 
 	xa_for_each(&port->dports, index, dport)
 		break;
-	single_port_map[0] = dport->port_id;
+	cxlsd->cxld.target_map[0] = dport->port_id;
 
-	return add_hdm_decoder(port, &cxlsd->cxld, single_port_map);
+	return add_hdm_decoder(port, &cxlsd->cxld);
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL");
 
@@ -984,7 +982,7 @@ static int cxl_setup_hdm_decoder_from_dvsec(
 }
 
 static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
-			    int *target_map, void __iomem *hdm, int which,
+			    void __iomem *hdm, int which,
 			    u64 *dpa_base, struct cxl_endpoint_dvsec_info *info)
 {
 	struct cxl_endpoint_decoder *cxled = NULL;
@@ -1104,7 +1102,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 		hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which));
 		target_list.value = (hi << 32) + lo;
 		for (i = 0; i < cxld->interleave_ways; i++)
-			target_map[i] = target_list.target_id[i];
+			cxld->target_map[i] = target_list.target_id[i];
 
 		return 0;
 	}
@@ -1180,7 +1178,6 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 	cxl_settle_decoders(cxlhdm);
 
 	for (i = 0; i < cxlhdm->decoder_count; i++) {
-		int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
 		int rc, target_count = cxlhdm->target_count;
 		struct cxl_decoder *cxld;
 
@@ -1208,8 +1205,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 			cxld = &cxlsd->cxld;
 		}
 
-		rc = init_hdm_decoder(port, cxld, target_map, hdm, i,
-				      &dpa_base, info);
+		rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info);
 		if (rc) {
 			if (rc == -ENOSPC)
 				continue;
@@ -1219,7 +1215,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 			put_device(&cxld->dev);
 			return rc;
 		}
-		rc = add_hdm_decoder(port, cxld, target_map);
+		rc = add_hdm_decoder(port, cxld);
 		if (rc) {
 			dev_warn(&port->dev,
 				 "Failed to add decoder%d.%d\n", port->id, i);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index dbea9feacdddf..c36e089e53990 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1716,13 +1716,11 @@ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
 EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, "CXL");
 
 static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
-				    struct cxl_port *port, int *target_map)
+				    struct cxl_port *port)
 {
+	struct cxl_decoder *cxld = &cxlsd->cxld;
 	int i;
 
-	if (!target_map)
-		return 0;
-
 	device_lock_assert(&port->dev);
 
 	if (xa_empty(&port->dports))
@@ -1730,7 +1728,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
 
 	guard(rwsem_write)(&cxl_rwsem.region);
 	for (i = 0; i < cxlsd->cxld.interleave_ways; i++) {
-		struct cxl_dport *dport = find_dport(port, target_map[i]);
+		struct cxl_dport *dport = find_dport(port, cxld->target_map[i]);
 
 		if (!dport)
 			return -ENXIO;
@@ -1922,9 +1920,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL");
 /**
  * cxl_decoder_add_locked - Add a decoder with targets
  * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
- * @target_map: A list of downstream ports that this decoder can direct memory
- *              traffic to. These numbers should correspond with the port number
- *              in the PCIe Link Capabilities structure.
  *
  * Certain types of decoders may not have any targets. The main example of this
  * is an endpoint device. A more awkward example is a hostbridge whose root
@@ -1938,7 +1933,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL");
  * Return: Negative error code if the decoder wasn't properly configured; else
  *	   returns 0.
  */
-int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
+int cxl_decoder_add_locked(struct cxl_decoder *cxld)
 {
 	struct cxl_port *port;
 	struct device *dev;
@@ -1959,7 +1954,7 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
 	if (!is_endpoint_decoder(dev)) {
 		struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
 
-		rc = decoder_populate_targets(cxlsd, port, target_map);
+		rc = decoder_populate_targets(cxlsd, port);
 		if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) {
 			dev_err(&port->dev,
 				"Failed to populate active decoder targets\n");
@@ -1978,9 +1973,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL");
 /**
  * cxl_decoder_add - Add a decoder with targets
  * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
- * @target_map: A list of downstream ports that this decoder can direct memory
- *              traffic to. These numbers should correspond with the port number
- *              in the PCIe Link Capabilities structure.
  *
  * This is the unlocked variant of cxl_decoder_add_locked().
  * See cxl_decoder_add_locked().
@@ -1988,7 +1980,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL");
  * Context: Process context. Takes and releases the device lock of the port that
  *	    owns the @cxld.
  */
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+int cxl_decoder_add(struct cxl_decoder *cxld)
 {
 	struct cxl_port *port;
 
@@ -2001,7 +1993,7 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
 	port = to_cxl_port(cxld->dev.parent);
 
 	guard(device)(&port->dev);
-	return cxl_decoder_add_locked(cxld, target_map);
+	return cxl_decoder_add_locked(cxld);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL");
 
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 238b148768148..32675a70cadf9 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1516,8 +1516,10 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 				cxl_rr->nr_targets_set);
 			return -ENXIO;
 		}
-	} else
+	} else {
 		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
+		cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id;
+	}
 	inc = 1;
 out_target_set:
 	cxl_rr->nr_targets_set += inc;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 4fe3df06f57a3..5be51b6abecd7 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -357,6 +357,9 @@ enum cxl_decoder_type {
  * @target_type: accelerator vs expander (type2 vs type3) selector
  * @region: currently assigned region for this decoder
  * @flags: memory type capabilities and locking
+ * @target_map: cached copy of hardware port-id list, available at init
+ *              before all @dport objects have been instantiated. While
+ *              dport id is 8bit, CFMWS interleave targets are 32bits.
  * @commit: device/decoder-type specific callback to commit settings to hw
  * @reset: device/decoder-type specific callback to reset hw settings
 */
@@ -369,6 +372,7 @@ struct cxl_decoder {
 	enum cxl_decoder_type target_type;
 	struct cxl_region *region;
 	unsigned long flags;
+	u32 target_map[CXL_DECODER_MAX_INTERLEAVE];
 	int (*commit)(struct cxl_decoder *cxld);
 	void (*reset)(struct cxl_decoder *cxld);
 };
@@ -789,9 +793,9 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 						unsigned int nr_targets);
 struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 						    unsigned int nr_targets);
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map);
+int cxl_decoder_add(struct cxl_decoder *cxld);
 struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port);
-int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map);
+int cxl_decoder_add_locked(struct cxl_decoder *cxld);
 int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld);
 static inline int cxl_root_decoder_autoremove(struct device *host,
 					      struct cxl_root_decoder *cxlrd)
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 8b5b8d17b8b84..306c5cbc24187 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -651,7 +651,7 @@ static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
 
 
 struct target_map_ctx {
-	int *target_map;
+	u32 *target_map;
 	int index;
 	int target_count;
 };
@@ -955,9 +955,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 		target_count = NR_CXL_SWITCH_PORTS;
 
 	for (i = 0; i < NR_CXL_PORT_DECODERS; i++) {
-		int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
 		struct target_map_ctx ctx = {
-			.target_map = target_map,
 			.target_count = target_count,
 		};
 		struct cxl_decoder *cxld;
@@ -986,6 +984,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 			cxld = &cxled->cxld;
 		}
 
+		ctx.target_map = cxld->target_map;
+
 		mock_init_hdm_decoder(cxld);
 
 		if (target_count) {
@@ -997,7 +997,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 			}
 		}
 
-		rc = cxl_decoder_add_locked(cxld, target_map);
+		rc = cxl_decoder_add_locked(cxld);
 		if (rc) {
 			put_device(&cxld->dev);
 			dev_err(&port->dev, "Failed to add decoder\n");

From f0daa5e8205d7dc7d400be0f5ebfef8049fa911e Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:23 -0700
Subject: [PATCH 020/143] cxl/test: Refactor decoder setup to reduce cxl_test
 burden

Group the decoder setup code in switch and endpoint port probe into a
single function for each to reduce the number of functions to be mocked
in cxl_test. Introduce devm_cxl_switch_port_decoders_setup() and
devm_cxl_endpoint_decoders_setup(). These two functions will be mocked
instead with some functions optimized out since the mock version does
not do anything. Remove devm_cxl_setup_hdm(),
devm_cxl_add_passthrough_decoder(), and devm_cxl_enumerate_decoders() in
cxl_test mock code. In turn, mock_cxl_add_passthrough_decoder() can be
removed since cxl_test does not setup passthrough decoders.
__wrap_cxl_hdm_decode_init() and __wrap_cxl_dvsec_rr_decode() can be
removed as well since they only return 0 when called.

[dj: drop 'struct cxl_port' forward declaration (Robert)]

Suggested-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 68d5d9734c12fce20ad493fe24738ab2019108c0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h       |  5 +++
 drivers/cxl/core/hdm.c        | 81 +++++++++++++++++++++++++++++++----
 drivers/cxl/core/pci.c        | 42 ++++++++++++++++++
 drivers/cxl/cxl.h             |  9 ++--
 drivers/cxl/cxlpci.h          |  2 -
 drivers/cxl/port.c            | 38 +---------------
 tools/testing/cxl/Kbuild      |  7 +--
 tools/testing/cxl/test/cxl.c  | 42 +++++++++++++-----
 tools/testing/cxl/test/mock.c | 69 ++++-------------------------
 tools/testing/cxl/test/mock.h |  7 +--
 10 files changed, 169 insertions(+), 133 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 5707cd60a8eb0..1fb66132b7777 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -148,6 +148,11 @@ int cxl_ras_init(void);
 void cxl_ras_exit(void);
 int cxl_gpf_port_setup(struct cxl_dport *dport);
 
+struct cxl_hdm;
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
+			struct cxl_endpoint_dvsec_info *info);
+int cxl_port_get_possible_dports(struct cxl_port *port);
+
 #ifdef CONFIG_CXL_FEATURES
 struct cxl_feat_entry *
 cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid);
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 13c53b9c17d13..d435178f63b82 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -49,7 +49,7 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld)
  * are claimed and passed to the single dport. Disable the range until the first
  * CXL region is enumerated / activated.
  */
-int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+static int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	struct cxl_switch_decoder *cxlsd;
 	struct cxl_dport *dport = NULL;
@@ -75,7 +75,6 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 
 	return add_hdm_decoder(port, &cxlsd->cxld);
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL");
 
 static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
 {
@@ -145,8 +144,8 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
  * @port: cxl_port to map
  * @info: cached DVSEC range register info
  */
-struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
-				   struct cxl_endpoint_dvsec_info *info)
+static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
+					  struct cxl_endpoint_dvsec_info *info)
 {
 	struct cxl_register_map *reg_map = &port->reg_map;
 	struct device *dev = &port->dev;
@@ -201,7 +200,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
 
 	return cxlhdm;
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, "CXL");
 
 static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth)
 {
@@ -1167,8 +1165,8 @@ static void cxl_settle_decoders(struct cxl_hdm *cxlhdm)
  * @cxlhdm: Structure to populate with HDM capabilities
  * @info: cached DVSEC range register info
  */
-int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
-				struct cxl_endpoint_dvsec_info *info)
+static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
+				       struct cxl_endpoint_dvsec_info *info)
 {
 	void __iomem *hdm = cxlhdm->regs.hdm_decoder;
 	struct cxl_port *port = cxlhdm->port;
@@ -1225,4 +1223,71 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 
 	return 0;
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, "CXL");
+
+/**
+ * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders
+ * @port: CXL port context
+ *
+ * Return 0 or -errno on error
+ */
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+	struct cxl_hdm *cxlhdm;
+
+	if (is_cxl_root(port) || is_cxl_endpoint(port))
+		return -EOPNOTSUPP;
+
+	cxlhdm = devm_cxl_setup_hdm(port, NULL);
+	if (!IS_ERR(cxlhdm))
+		return devm_cxl_enumerate_decoders(cxlhdm, NULL);
+
+	if (PTR_ERR(cxlhdm) != -ENODEV) {
+		dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+		return PTR_ERR(cxlhdm);
+	}
+
+	if (cxl_port_get_possible_dports(port) == 1) {
+		dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
+		return devm_cxl_add_passthrough_decoder(port);
+	}
+
+	dev_err(&port->dev, "HDM decoder capability not found\n");
+	return -ENXIO;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
+
+/**
+ * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders
+ * @port: CXL port context
+ *
+ * Return 0 or -errno on error
+ */
+int devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
+	struct cxl_endpoint_dvsec_info info = { .port = port };
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+	struct cxl_hdm *cxlhdm;
+	int rc;
+
+	if (!is_cxl_endpoint(port))
+		return -EOPNOTSUPP;
+
+	rc = cxl_dvsec_rr_decode(cxlds, &info);
+	if (rc < 0)
+		return rc;
+
+	cxlhdm = devm_cxl_setup_hdm(port, &info);
+	if (IS_ERR(cxlhdm)) {
+		if (PTR_ERR(cxlhdm) == -ENODEV)
+			dev_err(&port->dev, "HDM decoder registers not found\n");
+		return PTR_ERR(cxlhdm);
+	}
+
+	rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info);
+	if (rc)
+		return rc;
+
+	return devm_cxl_enumerate_decoders(cxlhdm, &info);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL");
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index b50551601c2e4..fa02366d35f2d 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1169,3 +1169,45 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
 
 	return 0;
 }
+
+static int count_dports(struct pci_dev *pdev, void *data)
+{
+	struct cxl_walk_context *ctx = data;
+	int type = pci_pcie_type(pdev);
+
+	if (pdev->bus != ctx->bus)
+		return 0;
+	if (!pci_is_pcie(pdev))
+		return 0;
+	if (type != ctx->type)
+		return 0;
+
+	ctx->count++;
+	return 0;
+}
+
+int cxl_port_get_possible_dports(struct cxl_port *port)
+{
+	struct pci_bus *bus = cxl_port_to_pci_bus(port);
+	struct cxl_walk_context ctx;
+	int type;
+
+	if (!bus) {
+		dev_err(&port->dev, "No PCI bus found for port %s\n",
+			dev_name(&port->dev));
+		return -ENXIO;
+	}
+
+	if (pci_is_root_bus(bus))
+		type = PCI_EXP_TYPE_ROOT_PORT;
+	else
+		type = PCI_EXP_TYPE_DOWNSTREAM;
+
+	ctx = (struct cxl_walk_context) {
+		.bus = bus,
+		.type = type,
+	};
+	pci_walk_bus(bus, count_dports, &ctx);
+
+	return ctx.count;
+}
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 5be51b6abecd7..e4f37c143c1ef 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -818,12 +818,9 @@ struct cxl_endpoint_dvsec_info {
 	struct range dvsec_range[2];
 };
 
-struct cxl_hdm;
-struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
-				   struct cxl_endpoint_dvsec_info *info);
-int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
-				struct cxl_endpoint_dvsec_info *info);
-int devm_cxl_add_passthrough_decoder(struct cxl_port *port);
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+int devm_cxl_endpoint_decoders_setup(struct cxl_port *port);
+
 struct cxl_dev_state;
 int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 			struct cxl_endpoint_dvsec_info *info);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 54e219b0049ea..7ae621e618e79 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -129,8 +129,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
 
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
-int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
-			struct cxl_endpoint_dvsec_info *info);
 void read_cdat_data(struct cxl_port *port);
 void cxl_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index cf32dc50b7a61..d8cae2b5bac6c 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -59,7 +59,6 @@ static int discover_region(struct device *dev, void *unused)
 
 static int cxl_switch_port_probe(struct cxl_port *port)
 {
-	struct cxl_hdm *cxlhdm;
 	int rc;
 
 	/* Cache the data early to ensure is_visible() works */
@@ -71,43 +70,14 @@ static int cxl_switch_port_probe(struct cxl_port *port)
 
 	cxl_switch_parse_cdat(port);
 
-	cxlhdm = devm_cxl_setup_hdm(port, NULL);
-	if (!IS_ERR(cxlhdm))
-		return devm_cxl_enumerate_decoders(cxlhdm, NULL);
-
-	if (PTR_ERR(cxlhdm) != -ENODEV) {
-		dev_err(&port->dev, "Failed to map HDM decoder capability\n");
-		return PTR_ERR(cxlhdm);
-	}
-
-	if (rc == 1) {
-		dev_dbg(&port->dev, "Fallback to passthrough decoder\n");
-		return devm_cxl_add_passthrough_decoder(port);
-	}
-
-	dev_err(&port->dev, "HDM decoder capability not found\n");
-	return -ENXIO;
+	return devm_cxl_switch_port_decoders_setup(port);
 }
 
 static int cxl_endpoint_port_probe(struct cxl_port *port)
 {
-	struct cxl_endpoint_dvsec_info info = { .port = port };
 	struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
-	struct cxl_dev_state *cxlds = cxlmd->cxlds;
-	struct cxl_hdm *cxlhdm;
 	int rc;
 
-	rc = cxl_dvsec_rr_decode(cxlds, &info);
-	if (rc < 0)
-		return rc;
-
-	cxlhdm = devm_cxl_setup_hdm(port, &info);
-	if (IS_ERR(cxlhdm)) {
-		if (PTR_ERR(cxlhdm) == -ENODEV)
-			dev_err(&port->dev, "HDM decoder registers not found\n");
-		return PTR_ERR(cxlhdm);
-	}
-
 	/* Cache the data early to ensure is_visible() works */
 	read_cdat_data(port);
 	cxl_endpoint_parse_cdat(port);
@@ -117,11 +87,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
 	if (rc)
 		return rc;
 
-	rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info);
-	if (rc)
-		return rc;
-
-	rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
+	rc = devm_cxl_endpoint_decoders_setup(port);
 	if (rc)
 		return rc;
 
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index d07f14cb7aa45..51b8ab289eae9 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -5,16 +5,13 @@ ldflags-y += --wrap=acpi_evaluate_integer
 ldflags-y += --wrap=acpi_pci_find_root
 ldflags-y += --wrap=nvdimm_bus_register
 ldflags-y += --wrap=devm_cxl_port_enumerate_dports
-ldflags-y += --wrap=devm_cxl_setup_hdm
-ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
-ldflags-y += --wrap=devm_cxl_enumerate_decoders
 ldflags-y += --wrap=cxl_await_media_ready
-ldflags-y += --wrap=cxl_hdm_decode_init
-ldflags-y += --wrap=cxl_dvsec_rr_decode
 ldflags-y += --wrap=devm_cxl_add_rch_dport
 ldflags-y += --wrap=cxl_rcd_component_reg_phys
 ldflags-y += --wrap=cxl_endpoint_parse_cdat
 ldflags-y += --wrap=cxl_dport_init_ras_reporting
+ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup
+ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 306c5cbc24187..36dff58275a25 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -643,13 +643,6 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
 	return cxlhdm;
 }
 
-static int mock_cxl_add_passthrough_decoder(struct cxl_port *port)
-{
-	dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n");
-	return -EOPNOTSUPP;
-}
-
-
 struct target_map_ctx {
 	u32 *target_map;
 	int index;
@@ -1013,6 +1006,36 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 	return 0;
 }
 
+static int __mock_cxl_decoders_setup(struct cxl_port *port)
+{
+	struct cxl_hdm *cxlhdm;
+
+	cxlhdm = mock_cxl_setup_hdm(port, NULL);
+	if (IS_ERR(cxlhdm)) {
+		if (PTR_ERR(cxlhdm) != -ENODEV)
+			dev_err(&port->dev, "Failed to map HDM decoder capability\n");
+		return PTR_ERR(cxlhdm);
+	}
+
+	return mock_cxl_enumerate_decoders(cxlhdm, NULL);
+}
+
+static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+	if (is_cxl_root(port) || is_cxl_endpoint(port))
+		return -EOPNOTSUPP;
+
+	return __mock_cxl_decoders_setup(port);
+}
+
+static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
+{
+	if (!is_cxl_endpoint(port))
+		return -EOPNOTSUPP;
+
+	return __mock_cxl_decoders_setup(port);
+}
+
 static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
 {
 	struct platform_device **array;
@@ -1127,10 +1150,9 @@ static struct cxl_mock_ops cxl_mock_ops = {
 	.acpi_table_parse_cedt = mock_acpi_table_parse_cedt,
 	.acpi_evaluate_integer = mock_acpi_evaluate_integer,
 	.acpi_pci_find_root = mock_acpi_pci_find_root,
+	.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
+	.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
 	.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
-	.devm_cxl_setup_hdm = mock_cxl_setup_hdm,
-	.devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder,
-	.devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders,
 	.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 1989ae020df3d..f335889b7756a 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -131,55 +131,35 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
-struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port,
-					  struct cxl_endpoint_dvsec_info *info)
-
-{
-	int index;
-	struct cxl_hdm *cxlhdm;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_port(port->uport_dev))
-		cxlhdm = ops->devm_cxl_setup_hdm(port, info);
-	else
-		cxlhdm = devm_cxl_setup_hdm(port, info);
-	put_cxl_mock_ops(index);
-
-	return cxlhdm;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL");
-
-int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
 {
 	int rc, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport_dev))
-		rc = ops->devm_cxl_add_passthrough_decoder(port);
+		rc = ops->devm_cxl_switch_port_decoders_setup(port);
 	else
-		rc = devm_cxl_add_passthrough_decoder(port);
+		rc = devm_cxl_switch_port_decoders_setup(port);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL");
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_switch_port_decoders_setup, "CXL");
 
-int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
-				       struct cxl_endpoint_dvsec_info *info)
+int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
 {
 	int rc, index;
-	struct cxl_port *port = cxlhdm->port;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
 	if (ops && ops->is_mock_port(port->uport_dev))
-		rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info);
+		rc = ops->devm_cxl_endpoint_decoders_setup(port);
 	else
-		rc = devm_cxl_enumerate_decoders(cxlhdm, info);
+		rc = devm_cxl_endpoint_decoders_setup(port);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL");
+EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
 
 int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
 {
@@ -211,39 +191,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL");
 
-int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
-			       struct cxl_hdm *cxlhdm,
-			       struct cxl_endpoint_dvsec_info *info)
-{
-	int rc = 0, index;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_dev(cxlds->dev))
-		rc = 0;
-	else
-		rc = cxl_hdm_decode_init(cxlds, cxlhdm, info);
-	put_cxl_mock_ops(index);
-
-	return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
-
-int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
-			       struct cxl_endpoint_dvsec_info *info)
-{
-	int rc = 0, index;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_dev(cxlds->dev))
-		rc = 0;
-	else
-		rc = cxl_dvsec_rr_decode(cxlds, info);
-	put_cxl_mock_ops(index);
-
-	return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL");
-
 struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
 						struct device *dport_dev,
 						int port_id,
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index d1b0271d28220..9d5ad3fd55ecc 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -20,11 +20,8 @@ struct cxl_mock_ops {
 	bool (*is_mock_port)(struct device *dev);
 	bool (*is_mock_dev)(struct device *dev);
 	int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
-	struct cxl_hdm *(*devm_cxl_setup_hdm)(
-		struct cxl_port *port, struct cxl_endpoint_dvsec_info *info);
-	int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port);
-	int (*devm_cxl_enumerate_decoders)(
-		struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info);
+	int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
+	int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
 	void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
 };
 

From 11b6f7c7f7c6543bff7e9a3e2b67579bd0481ac9 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:24 -0700
Subject: [PATCH 021/143] cxl: Defer dport allocation for switch ports

The current implementation enumerates the dports during the cxl_port
driver probe. Without an endpoint connected, the dport may not be
active during port probe. This scheme may prevent a valid hardware
dport id to be retrieved and MMIO registers to be read when an endpoint
is hot-plugged. Move the dport allocation and setup to behind memdev
probe so the endpoint is guaranteed to be connected.

In the original enumeration behavior, there are 3 phases (or 2 if no CXL
switches) for port creation. cxl_acpi() creates a Root Port (RP) from the
ACPI0017.N device. Through that it enumerates downstream ports composed
of ACPI0016.N devices through add_host_bridge_dport(). Once done, it
uses add_host_bridge_uport() to create the ports that enumerate the PCI
RPs as the dports of these ports. Every time a port is created, the port
driver is attached, cxl_switch_porbe_probe() is called and
devm_cxl_port_enumerate_dports() is invoked to enumerate and probe
the dports.

The second phase is if there are any CXL switches. When the pci endpoint
device driver (cxl_pci) calls probe, it will add a mem device and triggers
the cxl_mem_probe(). cxl_mem_probe() calls devm_cxl_enumerate_ports()
and attempts to discovery and create all the ports represent CXL switches.
During this phase, a port is created per switch and the attached dports
are also enumerated and probed.

The last phase is creating endpoint port which happens for all endpoint
devices.

The new sequence is instead of creating all possible dports at initial
port creation, defer port instantiation until a memdev beneath that
dport arrives. Introduce devm_cxl_create_or_extend_port() to centralize
the creation and extension of ports with new dports as memory devices
arrive. As part of this rework, switch decoder target list is amended
at runtime as dports show up.

While the decoders are allocated during the port driver probe,
The decoders must also be updated since previously they were setup when
all the dports are setup. Now every time a dport is setup per endpoint,
the switch target listing need to be updated with new dport. A
guard(rwsem_write) is used to update decoder targets. This is similar to
when decoder_populate_target() is called and the decoder programming
must be protected.

Also the port registers are probed the first time when the first dport
shows up. This ensures that the CXL link is established when the port
registers are probed.

[dj] Use ERR_CAST() (Jonathan)

Link: https://lore.kernel.org/linux-cxl/20250305100123.3077031-1-rrichter@amd.com/
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 4f06d81e7c6a02f850bfe9812295b1e859ab2db0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/cdat.c |   2 +-
 drivers/cxl/core/core.h |   2 +
 drivers/cxl/core/hdm.c  |   6 -
 drivers/cxl/core/pci.c  |  46 ++++++++
 drivers/cxl/core/port.c | 240 ++++++++++++++++++++++++++++++++--------
 drivers/cxl/port.c      |  11 +-
 6 files changed, 247 insertions(+), 60 deletions(-)

diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index bca1ec279651d..44c1c778b7cce 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -338,7 +338,7 @@ static int match_cxlrd_hb(struct device *dev, void *data)
 
 	guard(rwsem_read)(&cxl_rwsem.region);
 	for (int i = 0; i < cxlsd->nr_targets; i++) {
-		if (host_bridge == cxlsd->target[i]->dport_dev)
+		if (cxlsd->target[i] && host_bridge == cxlsd->target[i]->dport_dev)
 			return 1;
 	}
 
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 1fb66132b7777..c7c314a372a95 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -147,6 +147,8 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
 int cxl_gpf_port_setup(struct cxl_dport *dport);
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					    struct device *dport_dev);
 
 struct cxl_hdm;
 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index d435178f63b82..4ecbf1d23bc59 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -52,8 +52,6 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld)
 static int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 {
 	struct cxl_switch_decoder *cxlsd;
-	struct cxl_dport *dport = NULL;
-	unsigned long index;
 	struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
 
 	/*
@@ -69,10 +67,6 @@ static int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
 
 	device_lock_assert(&port->dev);
 
-	xa_for_each(&port->dports, index, dport)
-		break;
-	cxlsd->cxld.target_map[0] = dport->port_id;
-
 	return add_hdm_decoder(port, &cxlsd->cxld);
 }
 
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index fa02366d35f2d..9ec288ed39aea 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -24,6 +24,52 @@ static unsigned short media_ready_timeout = 60;
 module_param(media_ready_timeout, ushort, 0644);
 MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
 
+static int pci_get_port_num(struct pci_dev *pdev)
+{
+	u32 lnkcap;
+	int type;
+
+	type = pci_pcie_type(pdev);
+	if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT)
+		return -EINVAL;
+
+	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
+				  &lnkcap))
+		return -ENXIO;
+
+	return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
+}
+
+/**
+ * devm_cxl_add_dport_by_dev - allocate a dport by the dport device
+ * @port: cxl_port that hosts the dport
+ * @dport_dev: 'struct device' of the dport
+ *
+ * Returns the allocated dport on success or ERR_PTR() of -errno on error
+ */
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					    struct device *dport_dev)
+{
+	struct cxl_register_map map;
+	struct pci_dev *pdev;
+	int port_num, rc;
+
+	if (!dev_is_pci(dport_dev))
+		return ERR_PTR(-EINVAL);
+
+	pdev = to_pci_dev(dport_dev);
+	port_num = pci_get_port_num(pdev);
+	if (port_num < 0)
+		return ERR_PTR(port_num);
+
+	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+	if (rc)
+		return ERR_PTR(rc);
+
+	device_lock_assert(&port->dev);
+	return devm_cxl_add_dport(port, dport_dev, port_num, map.resource);
+}
+
 struct cxl_walk_context {
 	struct pci_bus *bus;
 	struct cxl_port *port;
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index c36e089e53990..c016eaa1e91b0 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1358,21 +1358,6 @@ static struct cxl_port *find_cxl_port(struct device *dport_dev,
 	return port;
 }
 
-static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port,
-					 struct device *dport_dev,
-					 struct cxl_dport **dport)
-{
-	struct cxl_find_port_ctx ctx = {
-		.dport_dev = dport_dev,
-		.parent_port = parent_port,
-		.dport = dport,
-	};
-	struct cxl_port *port;
-
-	port = __find_cxl_port(&ctx);
-	return port;
-}
-
 /*
  * All users of grandparent() are using it to walk PCIe-like switch port
  * hierarchy. A PCIe switch is comprised of a bridge device representing the
@@ -1548,13 +1533,154 @@ static resource_size_t find_component_registers(struct device *dev)
 	return map.resource;
 }
 
+static int match_port_by_uport(struct device *dev, const void *data)
+{
+	const struct device *uport_dev = data;
+	struct cxl_port *port;
+
+	if (!is_cxl_port(dev))
+		return 0;
+
+	port = to_cxl_port(dev);
+	return uport_dev == port->uport_dev;
+}
+
+/*
+ * Function takes a device reference on the port device. Caller should do a
+ * put_device() when done.
+ */
+static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
+{
+	struct device *dev;
+
+	dev = bus_find_device(&cxl_bus_type, NULL, uport_dev, match_port_by_uport);
+	if (dev)
+		return to_cxl_port(dev);
+	return NULL;
+}
+
+static int update_decoder_targets(struct device *dev, void *data)
+{
+	struct cxl_dport *dport = data;
+	struct cxl_switch_decoder *cxlsd;
+	struct cxl_decoder *cxld;
+	int i;
+
+	if (!is_switch_decoder(dev))
+		return 0;
+
+	cxlsd = to_cxl_switch_decoder(dev);
+	cxld = &cxlsd->cxld;
+	guard(rwsem_write)(&cxl_rwsem.region);
+
+	for (i = 0; i < cxld->interleave_ways; i++) {
+		if (cxld->target_map[i] == dport->port_id) {
+			cxlsd->target[i] = dport;
+			dev_dbg(dev, "dport%d found in target list, index %d\n",
+				dport->port_id, i);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T))
+static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port,
+					    struct device *dport_dev)
+{
+	struct cxl_dport *dport;
+	int rc;
+
+	device_lock_assert(&port->dev);
+	if (!port->dev.driver)
+		return ERR_PTR(-ENXIO);
+
+	dport = cxl_find_dport_by_dev(port, dport_dev);
+	if (dport) {
+		dev_dbg(&port->dev, "dport%d:%s already exists\n",
+			dport->port_id, dev_name(dport_dev));
+		return ERR_PTR(-EBUSY);
+	}
+
+	struct cxl_dport *new_dport __free(del_cxl_dport) =
+		devm_cxl_add_dport_by_dev(port, dport_dev);
+	if (IS_ERR(new_dport))
+		return new_dport;
+
+	cxl_switch_parse_cdat(port);
+
+	if (ida_is_empty(&port->decoder_ida)) {
+		rc = devm_cxl_switch_port_decoders_setup(port);
+		if (rc)
+			return ERR_PTR(rc);
+		dev_dbg(&port->dev, "first dport%d:%s added with decoders\n",
+			new_dport->port_id, dev_name(dport_dev));
+		return no_free_ptr(new_dport);
+	}
+
+	/* New dport added, update the decoder targets */
+	device_for_each_child(&port->dev, new_dport, update_decoder_targets);
+
+	dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id,
+		dev_name(dport_dev));
+
+	return no_free_ptr(new_dport);
+}
+
+static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev,
+					      struct cxl_port *parent_port,
+					      struct cxl_dport *parent_dport,
+					      struct device *uport_dev,
+					      struct device *dport_dev)
+{
+	resource_size_t component_reg_phys;
+
+	device_lock_assert(&parent_port->dev);
+	if (!parent_port->dev.driver) {
+		dev_warn(ep_dev,
+			 "port %s:%s:%s disabled, failed to enumerate CXL.mem\n",
+			 dev_name(&parent_port->dev), dev_name(uport_dev),
+			 dev_name(dport_dev));
+	}
+
+	struct cxl_port *port __free(put_cxl_port) =
+		find_cxl_port_by_uport(uport_dev);
+	if (!port) {
+		component_reg_phys = find_component_registers(uport_dev);
+		port = devm_cxl_add_port(&parent_port->dev, uport_dev,
+					 component_reg_phys, parent_dport);
+		if (IS_ERR(port))
+			return ERR_CAST(port);
+
+		/*
+		 * retry to make sure a port is found. a port device
+		 * reference is taken.
+		 */
+		port = find_cxl_port_by_uport(uport_dev);
+		if (!port)
+			return ERR_PTR(-ENODEV);
+
+		dev_dbg(ep_dev, "created port %s:%s\n",
+			dev_name(&port->dev), dev_name(port->uport_dev));
+	} else {
+		/*
+		 * Port was created before right before this function is
+		 * called. Signal the caller to deal with it.
+		 */
+		return ERR_PTR(-EAGAIN);
+	}
+
+	guard(device)(&port->dev);
+	return cxl_port_add_dport(port, dport_dev);
+}
+
 static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 			      struct device *uport_dev,
 			      struct device *dport_dev)
 {
 	struct device *dparent = grandparent(dport_dev);
 	struct cxl_dport *dport, *parent_dport;
-	resource_size_t component_reg_phys;
 	int rc;
 
 	if (is_cxl_host_bridge(dparent)) {
@@ -1569,42 +1695,31 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 	}
 
 	struct cxl_port *parent_port __free(put_cxl_port) =
-		find_cxl_port(dparent, &parent_dport);
+		find_cxl_port_by_uport(dparent->parent);
 	if (!parent_port) {
 		/* iterate to create this parent_port */
 		return -EAGAIN;
 	}
 
-	/*
-	 * Definition with __free() here to keep the sequence of
-	 * dereferencing the device of the port before the parent_port releasing.
-	 */
-	struct cxl_port *port __free(put_cxl_port) = NULL;
 	scoped_guard(device, &parent_port->dev) {
-		if (!parent_port->dev.driver) {
-			dev_warn(&cxlmd->dev,
-				 "port %s:%s disabled, failed to enumerate CXL.mem\n",
-				 dev_name(&parent_port->dev), dev_name(uport_dev));
-			return -ENXIO;
+		parent_dport = cxl_find_dport_by_dev(parent_port, dparent);
+		if (!parent_dport) {
+			parent_dport = cxl_port_add_dport(parent_port, dparent);
+			if (IS_ERR(parent_dport))
+				return PTR_ERR(parent_dport);
 		}
 
-		port = find_cxl_port_at(parent_port, dport_dev, &dport);
-		if (!port) {
-			component_reg_phys = find_component_registers(uport_dev);
-			port = devm_cxl_add_port(&parent_port->dev, uport_dev,
-						 component_reg_phys, parent_dport);
-			if (IS_ERR(port))
-				return PTR_ERR(port);
-
-			/* retry find to pick up the new dport information */
-			port = find_cxl_port_at(parent_port, dport_dev, &dport);
-			if (!port)
-				return -ENXIO;
+		dport = devm_cxl_create_port(&cxlmd->dev, parent_port,
+					     parent_dport, uport_dev,
+					     dport_dev);
+		if (IS_ERR(dport)) {
+			/* Port already exists, restart iteration */
+			if (PTR_ERR(dport) == -EAGAIN)
+				return 0;
+			return PTR_ERR(dport);
 		}
 	}
 
-	dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
-		dev_name(&port->dev), dev_name(port->uport_dev));
 	rc = cxl_add_ep(dport, &cxlmd->dev);
 	if (rc == -EBUSY) {
 		/*
@@ -1617,6 +1732,25 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 	return rc;
 }
 
+static struct cxl_dport *find_or_add_dport(struct cxl_port *port,
+					   struct device *dport_dev)
+{
+	struct cxl_dport *dport;
+
+	device_lock_assert(&port->dev);
+	dport = cxl_find_dport_by_dev(port, dport_dev);
+	if (!dport) {
+		dport = cxl_port_add_dport(port, dport_dev);
+		if (IS_ERR(dport))
+			return dport;
+
+		/* New dport added, restart iteration */
+		return ERR_PTR(-EAGAIN);
+	}
+
+	return dport;
+}
+
 int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
 {
 	struct device *dev = &cxlmd->dev;
@@ -1659,12 +1793,26 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
 			dev_name(iter), dev_name(dport_dev),
 			dev_name(uport_dev));
 		struct cxl_port *port __free(put_cxl_port) =
-			find_cxl_port(dport_dev, &dport);
+			find_cxl_port_by_uport(uport_dev);
 		if (port) {
 			dev_dbg(&cxlmd->dev,
 				"found already registered port %s:%s\n",
 				dev_name(&port->dev),
 				dev_name(port->uport_dev));
+
+			/*
+			 * RP port enumerated by cxl_acpi without dport will
+			 * have the dport added here.
+			 */
+			scoped_guard(device, &port->dev) {
+				dport = find_or_add_dport(port, dport_dev);
+				if (IS_ERR(dport)) {
+					if (PTR_ERR(dport) == -EAGAIN)
+						goto retry;
+					return PTR_ERR(dport);
+				}
+			}
+
 			rc = cxl_add_ep(dport, &cxlmd->dev);
 
 			/*
@@ -1724,14 +1872,16 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
 	device_lock_assert(&port->dev);
 
 	if (xa_empty(&port->dports))
-		return -EINVAL;
+		return 0;
 
 	guard(rwsem_write)(&cxl_rwsem.region);
 	for (i = 0; i < cxlsd->cxld.interleave_ways; i++) {
 		struct cxl_dport *dport = find_dport(port, cxld->target_map[i]);
 
-		if (!dport)
-			return -ENXIO;
+		if (!dport) {
+			/* dport may be activated later */
+			continue;
+		}
 		cxlsd->target[i] = dport;
 	}
 
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index d8cae2b5bac6c..51c8f2f84717a 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -59,18 +59,13 @@ static int discover_region(struct device *dev, void *unused)
 
 static int cxl_switch_port_probe(struct cxl_port *port)
 {
-	int rc;
+	/* Reset nr_dports for rebind of driver */
+	port->nr_dports = 0;
 
 	/* Cache the data early to ensure is_visible() works */
 	read_cdat_data(port);
 
-	rc = devm_cxl_port_enumerate_dports(port);
-	if (rc < 0)
-		return rc;
-
-	cxl_switch_parse_cdat(port);
-
-	return devm_cxl_switch_port_decoders_setup(port);
+	return 0;
 }
 
 static int cxl_endpoint_port_probe(struct cxl_port *port)

From 6762d6e771ab8acf14568b4554ebab9f70cc41e8 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:25 -0700
Subject: [PATCH 022/143] cxl/test: Add mock version of
 devm_cxl_add_dport_by_dev()

devm_cxl_add_dport_by_dev() outside of cxl_test is done through PCI
hierarchy. However with cxl_test, it needs to be done through the
platform device hierarchy. Add the mock function for
devm_cxl_add_dport_by_dev().

When cxl_core calls a cxl_core exported function and that function is
mocked by cxl_test, the call chain causes a circular dependency issue. Dan
provided a workaround to avoid this issue. Apply the method to changes from
the late dport allocation changes in order to enable cxl-test.

In cxl_core they are defined with "__" added in front of the function. A
macro is used to define the original function names for when non-test
version of the kernel is built. A bit of macros and typedefs are used to
allow mocking of those functions in cxl_test.

Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Tested-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit d96eb90d9ca6e4652c8a23d48c94364aa061fdc4)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h              |  2 --
 drivers/cxl/core/pci.c               |  7 ++--
 drivers/cxl/cxl.h                    | 20 +++++++++++
 tools/testing/cxl/Kbuild             |  1 +
 tools/testing/cxl/cxl_core_exports.c | 12 +++++++
 tools/testing/cxl/exports.h          | 10 ++++++
 tools/testing/cxl/test/cxl.c         | 53 ++++++++++++++++++++++++++--
 tools/testing/cxl/test/mock.c        | 23 ++++++++++++
 tools/testing/cxl/test/mock.h        |  2 ++
 9 files changed, 123 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/cxl/exports.h

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index c7c314a372a95..1fb66132b7777 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -147,8 +147,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
 int cxl_gpf_port_setup(struct cxl_dport *dport);
-struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
-					    struct device *dport_dev);
 
 struct cxl_hdm;
 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 9ec288ed39aea..18825e1505d6a 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -41,14 +41,14 @@ static int pci_get_port_num(struct pci_dev *pdev)
 }
 
 /**
- * devm_cxl_add_dport_by_dev - allocate a dport by the dport device
+ * __devm_cxl_add_dport_by_dev - allocate a dport by dport device
  * @port: cxl_port that hosts the dport
  * @dport_dev: 'struct device' of the dport
  *
  * Returns the allocated dport on success or ERR_PTR() of -errno on error
  */
-struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
-					    struct device *dport_dev)
+struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					      struct device *dport_dev)
 {
 	struct cxl_register_map map;
 	struct pci_dev *pdev;
@@ -69,6 +69,7 @@ struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
 	device_lock_assert(&port->dev);
 	return devm_cxl_add_dport(port, dport_dev, port_num, map.resource);
 }
+EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
 
 struct cxl_walk_context {
 	struct pci_bus *bus;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index e4f37c143c1ef..ed0df7db628ac 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -914,6 +914,10 @@ void cxl_coordinates_combine(struct access_coordinate *out,
 			     struct access_coordinate *c2);
 
 bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					    struct device *dport_dev);
+struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					      struct device *dport_dev);
 
 /*
  * Unit test builds overrides this to __weak, find the 'strong' version
@@ -924,4 +928,20 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port);
 #endif
 
 u16 cxl_gpf_get_dvsec(struct device *dev);
+
+/*
+ * Declaration for functions that are mocked by cxl_test that are called by
+ * cxl_core. The respective functions are defined as __foo() and called by
+ * cxl_core as foo(). The macros below ensures that those functions would
+ * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and
+ * tools/testing/cxl/exports.h for setting up the mock functions. The dance
+ * is done to avoid a circular dependency where cxl_core calls a function that
+ * ends up being a mock function and goes to * cxl_test where it calls a
+ * cxl_core function.
+ */
+#ifndef CXL_TEST_ENABLE
+#define DECLARE_TESTABLE(x) __##x
+#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev)
+#endif
+
 #endif /* __CXL_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 51b8ab289eae9..81e3795673c5a 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -18,6 +18,7 @@ CXL_SRC := $(DRIVERS)/cxl
 CXL_CORE_SRC := $(DRIVERS)/cxl/core
 ccflags-y := -I$(srctree)/drivers/cxl/
 ccflags-y += -D__mock=__weak
+ccflags-y += -DCXL_TEST_ENABLE=1
 ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/
 
 obj-m += cxl_acpi.o
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index f088792a8925f..0d18abc1f5a31 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -2,6 +2,18 @@
 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
 
 #include "cxl.h"
+#include "exports.h"
 
 /* Exporting of cxl_core symbols that are only used by cxl_test */
 EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL");
+
+cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev =
+	__devm_cxl_add_dport_by_dev;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL");
+
+struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
+					    struct device *dport_dev)
+{
+	return _devm_cxl_add_dport_by_dev(port, dport_dev);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
new file mode 100644
index 0000000000000..9261ce6f11973
--- /dev/null
+++ b/tools/testing/cxl/exports.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation */
+#ifndef __MOCK_CXL_EXPORTS_H_
+#define __MOCK_CXL_EXPORTS_H_
+
+typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
+							  struct device *dport_dev);
+extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
+
+#endif
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 36dff58275a25..b10434236590f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -1036,10 +1036,12 @@ static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port)
 	return __mock_cxl_decoders_setup(port);
 }
 
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+static int get_port_array(struct cxl_port *port,
+			  struct platform_device ***port_array,
+			  int *port_array_size)
 {
 	struct platform_device **array;
-	int i, array_size;
+	int array_size;
 
 	if (port->depth == 1) {
 		if (is_multi_bridge(port->uport_dev)) {
@@ -1073,6 +1075,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
 		return -ENXIO;
 	}
 
+	*port_array = array;
+	*port_array_size = array_size;
+
+	return 0;
+}
+
+static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+	struct platform_device **array;
+	int i, array_size;
+	int rc;
+
+	rc = get_port_array(port, &array, &array_size);
+	if (rc)
+		return rc;
+
 	for (i = 0; i < array_size; i++) {
 		struct platform_device *pdev = array[i];
 		struct cxl_dport *dport;
@@ -1094,6 +1112,36 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
 	return 0;
 }
 
+static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
+						   struct device *dport_dev)
+{
+	struct platform_device **array;
+	int rc, i, array_size;
+
+	rc = get_port_array(port, &array, &array_size);
+	if (rc)
+		return ERR_PTR(rc);
+
+	for (i = 0; i < array_size; i++) {
+		struct platform_device *pdev = array[i];
+
+		if (pdev->dev.parent != port->uport_dev) {
+			dev_dbg(&port->dev, "%s: mismatch parent %s\n",
+				dev_name(port->uport_dev),
+				dev_name(pdev->dev.parent));
+			continue;
+		}
+
+		if (&pdev->dev != dport_dev)
+			continue;
+
+		return devm_cxl_add_dport(port, &pdev->dev, pdev->id,
+					  CXL_RESOURCE_NONE);
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
 /*
  * Faking the cxl_dpa_perf for the memdev when appropriate.
  */
@@ -1154,6 +1202,7 @@ static struct cxl_mock_ops cxl_mock_ops = {
 	.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
 	.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
 	.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
+	.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
 
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index f335889b7756a..e98101f083cd3 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -10,12 +10,18 @@
 #include <cxlmem.h>
 #include <cxlpci.h>
 #include "mock.h"
+#include "../exports.h"
 
 static LIST_HEAD(mock);
 
+static struct cxl_dport *
+redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+				   struct device *dport_dev);
+
 void register_cxl_mock_ops(struct cxl_mock_ops *ops)
 {
 	list_add_rcu(&ops->list, &mock);
+	_devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
 }
 EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
 
@@ -23,6 +29,7 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
 
 void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
 {
+	_devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
 	list_del_rcu(&ops->list);
 	synchronize_srcu(&cxl_mock_srcu);
 }
@@ -258,6 +265,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL");
 
+struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
+						     struct device *dport_dev)
+{
+	int index;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+	struct cxl_dport *dport;
+
+	if (ops && ops->is_mock_port(port->uport_dev))
+		dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev);
+	else
+		dport = __devm_cxl_add_dport_by_dev(port, dport_dev);
+	put_cxl_mock_ops(index);
+
+	return dport;
+}
+
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("cxl_test: emulation module");
 MODULE_IMPORT_NS("ACPI");
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 9d5ad3fd55ecc..4ed932e76aae8 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -23,6 +23,8 @@ struct cxl_mock_ops {
 	int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
 	int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
 	void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
+	struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
+						       struct device *dport_dev);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);

From 9982139286a0498afb1ec68094d037351d200ce6 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:26 -0700
Subject: [PATCH 023/143] cxl/test: Adjust the mock version of
 devm_cxl_switch_port_decoders_setup()

With devm_cxl_switch_port_decoders_setup() being called within cxl_core
instead of by the port driver probe, adjustments are needed to deal with
circular symbol dependency when this function is being mock'd. Add the
appropriate changes to get around the circular dependency.

Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 644685abc16b58b3afcc2feb0ac14e86476ca2ed)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c               |  6 +++---
 drivers/cxl/cxl.h                    |  2 ++
 tools/testing/cxl/Kbuild             |  1 -
 tools/testing/cxl/cxl_core_exports.c | 10 ++++++++++
 tools/testing/cxl/exports.h          |  3 +++
 tools/testing/cxl/test/mock.c        | 10 +++++++---
 6 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 4ecbf1d23bc59..de78601821e60 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -1219,12 +1219,12 @@ static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm,
 }
 
 /**
- * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders
+ * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders
  * @port: CXL port context
  *
  * Return 0 or -errno on error
  */
-int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
 {
 	struct cxl_hdm *cxlhdm;
 
@@ -1248,7 +1248,7 @@ int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
 	dev_err(&port->dev, "HDM decoder capability not found\n");
 	return -ENXIO;
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
+EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL");
 
 /**
  * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ed0df7db628ac..7374c81f55f44 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -819,6 +819,7 @@ struct cxl_endpoint_dvsec_info {
 };
 
 int devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
+int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
 int devm_cxl_endpoint_decoders_setup(struct cxl_port *port);
 
 struct cxl_dev_state;
@@ -942,6 +943,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev);
 #ifndef CXL_TEST_ENABLE
 #define DECLARE_TESTABLE(x) __##x
 #define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev)
+#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup)
 #endif
 
 #endif /* __CXL_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 81e3795673c5a..0d5ce4b74b9f7 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -10,7 +10,6 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport
 ldflags-y += --wrap=cxl_rcd_component_reg_phys
 ldflags-y += --wrap=cxl_endpoint_parse_cdat
 ldflags-y += --wrap=cxl_dport_init_ras_reporting
-ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup
 ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
 
 DRIVERS := ../../../drivers
diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c
index 0d18abc1f5a31..6754de35598d5 100644
--- a/tools/testing/cxl/cxl_core_exports.c
+++ b/tools/testing/cxl/cxl_core_exports.c
@@ -17,3 +17,13 @@ struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port,
 	return _devm_cxl_add_dport_by_dev(port, dport_dev);
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL");
+
+cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup =
+	__devm_cxl_switch_port_decoders_setup;
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL");
+
+int devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+{
+	return _devm_cxl_switch_port_decoders_setup(port);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL");
diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h
index 9261ce6f11973..7ebee7c0bd67e 100644
--- a/tools/testing/cxl/exports.h
+++ b/tools/testing/cxl/exports.h
@@ -7,4 +7,7 @@ typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port,
 							  struct device *dport_dev);
 extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev;
 
+typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port);
+extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup;
+
 #endif
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index e98101f083cd3..995269a75cbd1 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -17,11 +17,14 @@ static LIST_HEAD(mock);
 static struct cxl_dport *
 redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port,
 				   struct device *dport_dev);
+static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port);
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops)
 {
 	list_add_rcu(&ops->list, &mock);
 	_devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev;
+	_devm_cxl_switch_port_decoders_setup =
+		redirect_devm_cxl_switch_port_decoders_setup;
 }
 EXPORT_SYMBOL_GPL(register_cxl_mock_ops);
 
@@ -29,6 +32,8 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu);
 
 void unregister_cxl_mock_ops(struct cxl_mock_ops *ops)
 {
+	_devm_cxl_switch_port_decoders_setup =
+		__devm_cxl_switch_port_decoders_setup;
 	_devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev;
 	list_del_rcu(&ops->list);
 	synchronize_srcu(&cxl_mock_srcu);
@@ -138,7 +143,7 @@ __wrap_nvdimm_bus_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register);
 
-int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
+int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
 {
 	int rc, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
@@ -146,12 +151,11 @@ int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port)
 	if (ops && ops->is_mock_port(port->uport_dev))
 		rc = ops->devm_cxl_switch_port_decoders_setup(port);
 	else
-		rc = devm_cxl_switch_port_decoders_setup(port);
+		rc = __devm_cxl_switch_port_decoders_setup(port);
 	put_cxl_mock_ops(index);
 
 	return rc;
 }
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_switch_port_decoders_setup, "CXL");
 
 int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
 {

From 95be66117d66c0bc98c1926dbeb1346d9000d524 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:27 -0700
Subject: [PATCH 024/143] cxl/test: Setup target_map for cxl_test decoder
 initialization

cxl_test uses mock functions for decoder enumaration. Add initialization
of the cxld->target_map[] for cxl_test based decoders in the mock
functions.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 87439b598ad962ffc5744e2e0a8b461e78d8d32f)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/cxl.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index b10434236590f..cb18ee41a7cf8 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -887,15 +887,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 		 */
 		if (WARN_ON(!dev))
 			continue;
+
 		cxlsd = to_cxl_switch_decoder(dev);
 		if (i == 0) {
 			/* put cxl_mem.4 second in the decode order */
-			if (pdev->id == 4)
+			if (pdev->id == 4) {
 				cxlsd->target[1] = dport;
-			else
+				cxld->target_map[1] = dport->port_id;
+			} else {
 				cxlsd->target[0] = dport;
-		} else
+				cxld->target_map[0] = dport->port_id;
+			}
+		} else {
 			cxlsd->target[0] = dport;
+			cxld->target_map[0] = dport->port_id;
+		}
 		cxld = &cxlsd->cxld;
 		cxld->target_type = CXL_DECODER_HOSTONLYMEM;
 		cxld->flags = CXL_DECODER_F_ENABLE;

From a7610893815f428abc4108df22421f3390dd4b0a Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 29 Aug 2025 11:09:28 -0700
Subject: [PATCH 025/143] cxl: Change sslbis handler to only handle single
 dport

While cxl_switch_parse_cdat() is harmless to be run multiple times, it is
not efficient in the current scheme where one dport is being updated at
a time by the memdev probe path. Change the input parameter to the
specific dport being updated to pick up the SSLBIS information for just
that dport.

Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit d64035a5a37741b25712fb9c2f6aca535c2967ea)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/cdat.c | 23 ++++++++++-------------
 drivers/cxl/core/port.c |  2 +-
 drivers/cxl/cxl.h       |  2 +-
 3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 44c1c778b7cce..c4bd6e8a0cf03 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -440,8 +440,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
 	} *tbl = (struct acpi_cdat_sslbis_table *)header;
 	int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header);
 	struct acpi_cdat_sslbis *sslbis;
-	struct cxl_port *port = arg;
-	struct device *dev = &port->dev;
+	struct cxl_dport *dport = arg;
+	struct device *dev = &dport->port->dev;
 	int remain, entries, i;
 	u16 len;
 
@@ -467,8 +467,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
 		u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id);
 		__le64 le_base;
 		__le16 le_val;
-		struct cxl_dport *dport;
-		unsigned long index;
 		u16 dsp_id;
 		u64 val;
 
@@ -499,28 +497,27 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
 		val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
 				     sslbis->data_type);
 
-		xa_for_each(&port->dports, index, dport) {
-			if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
-			    dsp_id == dport->port_id) {
-				cxl_access_coordinate_set(dport->coord,
-							  sslbis->data_type,
-							  val);
-			}
+		if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
+		    dsp_id == dport->port_id) {
+			cxl_access_coordinate_set(dport->coord,
+						  sslbis->data_type, val);
+			return 0;
 		}
 	}
 
 	return 0;
 }
 
-void cxl_switch_parse_cdat(struct cxl_port *port)
+void cxl_switch_parse_cdat(struct cxl_dport *dport)
 {
+	struct cxl_port *port = dport->port;
 	int rc;
 
 	if (!port->cdat.table)
 		return;
 
 	rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler,
-			      port, port->cdat.table, port->cdat.length);
+			      dport, port->cdat.table, port->cdat.length);
 	rc = cdat_table_parse_output(rc);
 	if (rc)
 		dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index c016eaa1e91b0..960d8eb6275e5 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1608,7 +1608,7 @@ static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port,
 	if (IS_ERR(new_dport))
 		return new_dport;
 
-	cxl_switch_parse_cdat(port);
+	cxl_switch_parse_cdat(new_dport);
 
 	if (ida_is_empty(&port->decoder_ida)) {
 		rc = devm_cxl_switch_port_decoders_setup(port);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 7374c81f55f44..0e0e518031fb0 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -900,7 +900,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
 #endif
 
 void cxl_endpoint_parse_cdat(struct cxl_port *port);
-void cxl_switch_parse_cdat(struct cxl_port *port);
+void cxl_switch_parse_cdat(struct cxl_dport *dport);
 
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
 				      struct access_coordinate *coord);

From fe39189d5dce7f1847811cb4ba2aeeab298c2168 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 14 Aug 2025 15:21:44 -0700
Subject: [PATCH 026/143] cxl: Move port register setup to when first dport
 appear

This patch moves the port register setup to when the first dport appears
via the memdev probe path. At this point, the CXL link should be
established and the register access is expected to succeed. This change
addresses an error message observed when PCIe hotplug is enabled on
an Intel platform. The error messages "cxl portN: Couldn't locate the
CXL.cache and CXL.mem capability array header" is observed for the
host bridge (CHBCR) during cxl_acpi driver probe. If the cxl_acpi module
probe is running before the CXL link between the endpoint device and the
RP is established, then the platform may not have exposed DVSEC ID 3
and/or DVSEC ID 7 blocks which will trigger the error message. This
behavior is defined by the CXL spec r3.2 9.12.3 for RPs and DSPs, however
the Intel platform also added this behavior to the host bridge.

This change also needs the dport enumeration to be moved to the memdev
probe path in order to address the issue. This change is not a wholly
contained solution by itself.

[dj: Add missing var init during port alloc]

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit f6ee24913de24dbda8d49213e1a27f5e1a5204cc)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/port.c | 17 ++++++++++++++---
 drivers/cxl/cxl.h       |  2 ++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 960d8eb6275e5..d5f71eb1ade85 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -750,6 +750,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev,
 	xa_init(&port->dports);
 	xa_init(&port->endpoints);
 	xa_init(&port->regions);
+	port->component_reg_phys = CXL_RESOURCE_NONE;
 
 	device_initialize(dev);
 	lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth);
@@ -868,9 +869,7 @@ static int cxl_port_add(struct cxl_port *port,
 		if (rc)
 			return rc;
 
-		rc = cxl_port_setup_regs(port, component_reg_phys);
-		if (rc)
-			return rc;
+		port->component_reg_phys = component_reg_phys;
 	} else {
 		rc = dev_set_name(dev, "root%d", port->id);
 		if (rc)
@@ -1201,6 +1200,18 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
 
 	cxl_debugfs_create_dport_dir(dport);
 
+	/*
+	 * Setup port register if this is the first dport showed up. Having
+	 * a dport also means that there is at least 1 active link.
+	 */
+	if (port->nr_dports == 1 &&
+	    port->component_reg_phys != CXL_RESOURCE_NONE) {
+		rc = cxl_port_setup_regs(port, port->component_reg_phys);
+		if (rc)
+			return ERR_PTR(rc);
+		port->component_reg_phys = CXL_RESOURCE_NONE;
+	}
+
 	return dport;
 }
 
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 0e0e518031fb0..231ddccf89773 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -607,6 +607,7 @@ struct cxl_dax_region {
  * @cdat: Cached CDAT data
  * @cdat_available: Should a CDAT attribute be available in sysfs
  * @pci_latency: Upstream latency in picoseconds
+ * @component_reg_phys: Physical address of component register
  */
 struct cxl_port {
 	struct device dev;
@@ -630,6 +631,7 @@ struct cxl_port {
 	} cdat;
 	bool cdat_available;
 	long pci_latency;
+	resource_size_t component_reg_phys;
 };
 
 /**

From 7922d5cf26b682f614985a6df83bae7d4ccc4aa6 Mon Sep 17 00:00:00 2001
From: Li Ming <ming.li@zohomail.com>
Date: Wed, 1 Oct 2025 14:03:37 +0800
Subject: [PATCH 027/143] cxl/port: Avoid missing port component registers
 setup

port->nr_dports is used to represent how many dports added to the cxl
port, it will increase in add_dport() when a new dport is being added to
the cxl port, but it will not be reduced when a dport is removed from
the cxl port.

Currently, when the first dport is added to a cxl port, it will trigger
component registers setup on the cxl port, the implementation is using
port->nr_dports to confirm if the dport is the first dport.

A corner case here is that adding dport could fail after port->nr_dports
updating and before checking port->nr_dports for component registers
setup. If the failure happens during the first dport attaching, it will
cause that CXL subsystem has not chance to execute component registers
setup for the cxl port. the failure flow like below:

port->nr_dports = 0
dport 1 adding to the port:
	add_dport()	# port->nr_dports: 1
	failed on devm_add_action_or_reset() or sysfs_create_link()
	return error	# port->nr_dports: 1
dport 2 adding to the port:
	add_dport()	# port->nr_dports: 2
	no failure
	skip component registers setup because of port->nr_dports is 2

The solution here is that moving component registers setup closer to
add_dport(), so if add_dport() is executed correctly for the first
dport, component registers setup on the port will be executed
immediately after that.

Fixes: f6ee24913de2 ("cxl: Move port register setup to when first dport appear")
Signed-off-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 02e7567f5da023524476053a38c54f4f19130959)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/port.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index d5f71eb1ade85..8128fd2b5b317 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1182,6 +1182,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
 	if (rc)
 		return ERR_PTR(rc);
 
+	/*
+	 * Setup port register if this is the first dport showed up. Having
+	 * a dport also means that there is at least 1 active link.
+	 */
+	if (port->nr_dports == 1 &&
+	    port->component_reg_phys != CXL_RESOURCE_NONE) {
+		rc = cxl_port_setup_regs(port, port->component_reg_phys);
+		if (rc) {
+			xa_erase(&port->dports, (unsigned long)dport->dport_dev);
+			return ERR_PTR(rc);
+		}
+		port->component_reg_phys = CXL_RESOURCE_NONE;
+	}
+
 	get_device(dport_dev);
 	rc = devm_add_action_or_reset(host, cxl_dport_remove, dport);
 	if (rc)
@@ -1200,18 +1214,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev,
 
 	cxl_debugfs_create_dport_dir(dport);
 
-	/*
-	 * Setup port register if this is the first dport showed up. Having
-	 * a dport also means that there is at least 1 active link.
-	 */
-	if (port->nr_dports == 1 &&
-	    port->component_reg_phys != CXL_RESOURCE_NONE) {
-		rc = cxl_port_setup_regs(port, port->component_reg_phys);
-		if (rc)
-			return ERR_PTR(rc);
-		port->component_reg_phys = CXL_RESOURCE_NONE;
-	}
-
 	return dport;
 }
 

From a7e9e0ce8262a0e2640a830073d2fb9a862b6e6f Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Tue, 14 Oct 2025 00:31:04 -0700
Subject: [PATCH 028/143] cxl/region: Use %pa printk format to emit
 resource_size_t

KASAN reports a stack-out-of-bounds access in validate_region_offset()
while running the cxl-poison.sh unit test because the printk format
specifier, %pr format, is not a match for the resource_size_t type of
the variables. %pr expects struct resource pointers and attempts to
dereference the structure fields, reading beyond the bounds of the
stack variables.

Since these messages emit  an 'A exceeds B' type of message, keep
the resource_size_t's and use the %pa specifier to be architecture
safe.

BUG: KASAN: stack-out-of-bounds in resource_string.isra.0+0xe9a/0x1690
[] Read of size 8 at addr ffff88800a7afb40 by task bash/1397
...
[] The buggy address belongs to stack of task bash/1397
[]  and is located at offset 56 in frame:
[]  validate_region_offset+0x0/0x1c0 [cxl_core]

Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 257c4b03a2f7d8c15f79c79b09a561af9734f6c4)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 32675a70cadf9..149f9bdabbb40 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3671,14 +3671,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset)
 
 	if (offset < p->cache_size) {
 		dev_err(&cxlr->dev,
-			"Offset %#llx is within extended linear cache %pr\n",
+			"Offset %#llx is within extended linear cache %pa\n",
 			offset, &p->cache_size);
 		return -EINVAL;
 	}
 
 	region_size = resource_size(p->res);
 	if (offset >= region_size) {
-		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n",
+		dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n",
 			offset, &region_size);
 		return -EINVAL;
 	}

From 4fd81efb72528ee14ed47890b97e04ab397f7845 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 31 Oct 2025 10:32:24 -0700
Subject: [PATCH 029/143] cxl: Adjust offset calculation for poison injection

The HPA to DPA translation for poison injection assumes that the
base address starts from where the CXL region begins. When the
extended linear cache is active, the offset can be within the DRAM
region. Adjust the offset so that it correctly reflects the offset
within the CXL region.

[ dj: Add fixes tag from Alison ]

Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset")
Link: https://patch.msgid.link/20251031173224.3537030-5-dave.jiang@intel.com
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b6cfddd26ec55e865b4715f73e9bbb17a15091ed)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 149f9bdabbb40..cc18f2672ee7b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3710,6 +3710,7 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset)
 	if (validate_region_offset(cxlr, offset))
 		return -EINVAL;
 
+	offset -= cxlr->params.cache_size;
 	rc = region_offset_to_dpa_result(cxlr, offset, &result);
 	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
 		dev_dbg(&cxlr->dev,
@@ -3742,6 +3743,7 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset)
 	if (validate_region_offset(cxlr, offset))
 		return -EINVAL;
 
+	offset -= cxlr->params.cache_size;
 	rc = region_offset_to_dpa_result(cxlr, offset, &result);
 	if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) {
 		dev_dbg(&cxlr->dev,

From 80192f42359001ae681cb5d771d923b221dde0d1 Mon Sep 17 00:00:00 2001
From: Gregory Price <gourry@gourry.net>
Date: Fri, 3 Oct 2025 10:32:32 -0400
Subject: [PATCH 030/143] Documentation/driver-api/cxl: remove page-allocator
 quirk section

The node/zone quirk section of the cxl documentation is incorrect.
The actual reason for fallback allocation misbehavior in the
described configuration is due to a kswapd/reclaim thrashing scenario
fixed by the linked patch.  Remove this section.

Link: https://lore.kernel.org/linux-mm/20250919162134.1098208-1-hannes@cmpxchg.org/
Signed-off-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 82b5d7e30b24b7df5dbf10aea97292be38daf88d)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 .../cxl/allocation/page-allocator.rst         | 31 -------------------
 1 file changed, 31 deletions(-)

diff --git a/Documentation/driver-api/cxl/allocation/page-allocator.rst b/Documentation/driver-api/cxl/allocation/page-allocator.rst
index 7b8fe1b8d5bbb..3fa584a248bdd 100644
--- a/Documentation/driver-api/cxl/allocation/page-allocator.rst
+++ b/Documentation/driver-api/cxl/allocation/page-allocator.rst
@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
 will fallback to allocate from :code:`ZONE_NORMAL`.
 
 
-Zone and Node Quirks
-====================
-Let's consider a configuration where the local DRAM capacity is largely onlined
-into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
-CXL capacity has the opposite configuration - all onlined in
-:code:`ZONE_MOVABLE`.
-
-Under the default allocation policy, the page allocator will completely skip
-:code:`ZONE_MOVABLE` as a valid allocation target.  This is because, as of
-Linux v6.15, the page allocator does (approximately) the following: ::
-
-  for (each zone in local_node):
-
-    for (each node in fallback_order):
-
-      attempt_allocation(gfp_flags);
-
-Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
-functionally unreachable for direct allocation.  As a result, the only way
-for CXL capacity to be used is via `demotion` in the reclaim path.
-
-This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
-capacity - when that capacity is depleted, the page allocator will actually
-prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.
-
-We may wish to invert this priority in future Linux versions.
-
-If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
-when the DRAM nodes are depleted. See the reclaim section for more details.
-
-
 CGroups and CPUSets
 ===================
 Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined

From 2d3a899e50bfd9b941aa8a1f1c1285470fbcb63f Mon Sep 17 00:00:00 2001
From: Li Ming <ming.li@zohomail.com>
Date: Sat, 27 Sep 2025 18:07:09 +0800
Subject: [PATCH 031/143] cxl/port: Remove devm_cxl_port_enumerate_dports()

devm_cxl_port_enumerate_dports() is not longer used after below commit
commit 4f06d81e7c6a ("cxl: Defer dport allocation for switch ports")

Delete it and the relevant interface implemented in cxl_test.

Signed-off-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 3f5b8f7f34f6d8e63c02d177341e43ebee4c2d36)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c        | 87 ++++-------------------------------
 drivers/cxl/cxlpci.h          |  1 -
 tools/testing/cxl/Kbuild      |  1 -
 tools/testing/cxl/test/cxl.c  | 32 -------------
 tools/testing/cxl/test/mock.c | 15 ------
 tools/testing/cxl/test/mock.h |  1 -
 6 files changed, 8 insertions(+), 129 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 18825e1505d6a..5b023a0178a47 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
 }
 EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
 
-struct cxl_walk_context {
-	struct pci_bus *bus;
-	struct cxl_port *port;
-	int type;
-	int error;
-	int count;
-};
-
-static int match_add_dports(struct pci_dev *pdev, void *data)
-{
-	struct cxl_walk_context *ctx = data;
-	struct cxl_port *port = ctx->port;
-	int type = pci_pcie_type(pdev);
-	struct cxl_register_map map;
-	struct cxl_dport *dport;
-	u32 lnkcap, port_num;
-	int rc;
-
-	if (pdev->bus != ctx->bus)
-		return 0;
-	if (!pci_is_pcie(pdev))
-		return 0;
-	if (type != ctx->type)
-		return 0;
-	if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
-				  &lnkcap))
-		return 0;
-
-	rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
-	if (rc)
-		dev_dbg(&port->dev, "failed to find component registers\n");
-
-	port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
-	dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
-	if (IS_ERR(dport)) {
-		ctx->error = PTR_ERR(dport);
-		return PTR_ERR(dport);
-	}
-	ctx->count++;
-
-	return 0;
-}
-
-/**
- * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
- * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
- *
- * Returns a positive number of dports enumerated or a negative error
- * code.
- */
-int devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
-	struct pci_bus *bus = cxl_port_to_pci_bus(port);
-	struct cxl_walk_context ctx;
-	int type;
-
-	if (!bus)
-		return -ENXIO;
-
-	if (pci_is_root_bus(bus))
-		type = PCI_EXP_TYPE_ROOT_PORT;
-	else
-		type = PCI_EXP_TYPE_DOWNSTREAM;
-
-	ctx = (struct cxl_walk_context) {
-		.port = port,
-		.bus = bus,
-		.type = type,
-	};
-	pci_walk_bus(bus, match_add_dports, &ctx);
-
-	if (ctx.count == 0)
-		return -ENODEV;
-	if (ctx.error)
-		return ctx.error;
-	return ctx.count;
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
-
 static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
@@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
 	return 0;
 }
 
+struct cxl_walk_context {
+	struct pci_bus *bus;
+	struct cxl_port *port;
+	int type;
+	int error;
+	int count;
+};
+
 static int count_dports(struct pci_dev *pdev, void *data)
 {
 	struct cxl_walk_context *ctx = data;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 7ae621e618e79..1d526bea84312 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -127,7 +127,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
 	return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
 }
 
-int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
 void read_cdat_data(struct cxl_port *port);
 void cxl_cor_error_detected(struct pci_dev *pdev);
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0d5ce4b74b9f7..3dae06ac7fba5 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -4,7 +4,6 @@ ldflags-y += --wrap=is_acpi_device_node
 ldflags-y += --wrap=acpi_evaluate_integer
 ldflags-y += --wrap=acpi_pci_find_root
 ldflags-y += --wrap=nvdimm_bus_register
-ldflags-y += --wrap=devm_cxl_port_enumerate_dports
 ldflags-y += --wrap=cxl_await_media_ready
 ldflags-y += --wrap=devm_cxl_add_rch_dport
 ldflags-y += --wrap=cxl_rcd_component_reg_phys
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cb18ee41a7cf8..fc271561827b6 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -1087,37 +1087,6 @@ static int get_port_array(struct cxl_port *port,
 	return 0;
 }
 
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
-{
-	struct platform_device **array;
-	int i, array_size;
-	int rc;
-
-	rc = get_port_array(port, &array, &array_size);
-	if (rc)
-		return rc;
-
-	for (i = 0; i < array_size; i++) {
-		struct platform_device *pdev = array[i];
-		struct cxl_dport *dport;
-
-		if (pdev->dev.parent != port->uport_dev) {
-			dev_dbg(&port->dev, "%s: mismatch parent %s\n",
-				dev_name(port->uport_dev),
-				dev_name(pdev->dev.parent));
-			continue;
-		}
-
-		dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
-					   CXL_RESOURCE_NONE);
-
-		if (IS_ERR(dport))
-			return PTR_ERR(dport);
-	}
-
-	return 0;
-}
-
 static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
 						   struct device *dport_dev)
 {
@@ -1206,7 +1175,6 @@ static struct cxl_mock_ops cxl_mock_ops = {
 	.acpi_pci_find_root = mock_acpi_pci_find_root,
 	.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
 	.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
-	.devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
 	.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
 	.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 995269a75cbd1..6fd4edb9215c4 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -172,21 +172,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
 
-int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
-	int rc, index;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_port(port->uport_dev))
-		rc = ops->devm_cxl_port_enumerate_dports(port);
-	else
-		rc = devm_cxl_port_enumerate_dports(port);
-	put_cxl_mock_ops(index);
-
-	return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
-
 int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
 {
 	int rc, index;
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 4ed932e76aae8..580f383862245 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,7 +19,6 @@ struct cxl_mock_ops {
 	bool (*is_mock_bus)(struct pci_bus *bus);
 	bool (*is_mock_port)(struct device *dev);
 	bool (*is_mock_dev)(struct device *dev);
-	int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
 	int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
 	int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
 	void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);

From 8ba94b80a9a7648abca24e6f8d8d59fed130d4e1 Mon Sep 17 00:00:00 2001
From: Alok Tiwari <alok.a.tiwari@oracle.com>
Date: Sat, 11 Oct 2025 11:30:44 -0700
Subject: [PATCH 032/143] cxl: fix typos in cdat.c comments

- Corrected spelling of "bandwdith" -> "bandwidth"
- Fixed "wht" -> "with"

Signed-off-by: Alok Tiwari <alok.a.tiwari@oracle.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 040acb49bf862dd851144bfc0872555d4ac4ffd5)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/cdat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index c4bd6e8a0cf03..7120b5f2e31fe 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
 		cxl_coordinates_combine(coords, coords, ctx->coord);
 
 		/*
-		 * Take the min of the calculated bandwdith and the upstream
+		 * Take the min of the calculated bandwidth and the upstream
 		 * switch SSLBIS bandwidth if there's a parent switch
 		 */
 		if (!is_root)
@@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
 /**
  * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
  * @cxlr: The region being operated on
- * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
+ * @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
  */
 static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
 					struct xarray *input_xa)

From 866acc18eb466ce51702311277c570bb5d0cdbb7 Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Thu, 30 Oct 2025 17:38:39 +0100
Subject: [PATCH 033/143] cxl/pci: replace use of system_wq with
 system_percpu_wq

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.

This lack of consistency cannot be addressed without refactoring the API.

system_wq should be the per-cpu workqueue, yet in this name nothing makes
that clear, so replace system_wq with system_percpu_wq.

The old wq (system_wq) will be kept for a few release cycles.

See 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq")
for cause of changes.

[ dj: Add reference to commit that initiated the change. ]

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>> ---
Link: https://patch.msgid.link/20251030163839.307752-1-marco.crivellari@suse.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 952e9057e66c17a9718232664368ffdaca468f93)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index bd100ac31672d..0be4e508affe7 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -136,7 +136,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
 	if (opcode == CXL_MBOX_OP_SANITIZE) {
 		mutex_lock(&cxl_mbox->mbox_mutex);
 		if (mds->security.sanitize_node)
-			mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
+			mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0);
 		mutex_unlock(&cxl_mbox->mbox_mutex);
 	} else {
 		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */

From d385082c2cdb90d7c004aadb133e051100aaeb43 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Tue, 14 Oct 2025 01:24:30 -0700
Subject: [PATCH 034/143] cxl/region: Refactor address translation funcs for
 testing

In preparation for adding a test module that exercises the address
translation calculations, extract the core calculations into stand-
alone functions that operate on base parameters without dependencies
on struct cxl_region.

Perform additional parameter validation to protect against a test
module sending bad parameters. Export the validation function, as
well as the three core translation functions for use by test module
cxl_translate only.

This refactoring enables unit testing of the address translation logic
with controlled inputs, while preserving identical functionality in
the existing code paths.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b78b9e7b7979f86c7838f1ab7d084ca35a17702d)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 202 +++++++++++++++++++++++++++-----------
 drivers/cxl/cxl.h         |   6 ++
 2 files changed, 148 insertions(+), 60 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index cc18f2672ee7b..3af7561e2973a 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2934,28 +2934,119 @@ static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
 	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
 }
 
-u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
-		   u64 dpa)
+#define CXL_POS_ZERO 0
+/**
+ * cxl_validate_translation_params
+ * @eiw: encoded interleave ways
+ * @eig: encoded interleave granularity
+ * @pos: position in interleave
+ *
+ * Callers pass CXL_POS_ZERO when no position parameter needs validating.
+ *
+ * Returns: 0 on success, -EINVAL on first invalid parameter
+ */
+int cxl_validate_translation_params(u8 eiw, u16 eig, int pos)
 {
-	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
-	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
-	struct cxl_region_params *p = &cxlr->params;
-	struct cxl_endpoint_decoder *cxled = NULL;
-	u16 eig = 0;
-	u8 eiw = 0;
-	int pos;
+	int ways, gran;
 
-	for (int i = 0; i < p->nr_targets; i++) {
-		cxled = p->targets[i];
-		if (cxlmd == cxled_to_memdev(cxled))
-			break;
+	if (eiw_to_ways(eiw, &ways)) {
+		pr_debug("%s: invalid eiw=%u\n", __func__, eiw);
+		return -EINVAL;
+	}
+	if (eig_to_granularity(eig, &gran)) {
+		pr_debug("%s: invalid eig=%u\n", __func__, eig);
+		return -EINVAL;
 	}
-	if (!cxled || cxlmd != cxled_to_memdev(cxled))
+	if (pos < 0 || pos >= ways) {
+		pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos,
+			 ways);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate");
+
+u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig)
+{
+	u64 dpa_offset, bits_lower, bits_upper, temp;
+	int ret;
+
+	ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
+	if (ret)
 		return ULLONG_MAX;
 
-	pos = cxled->pos;
-	ways_to_eiw(p->interleave_ways, &eiw);
-	granularity_to_eig(p->interleave_granularity, &eig);
+	/*
+	 * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
+	 * Lower bits [IG+7:0] pass through unchanged
+	 * (eiw < 8)
+	 *	Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
+	 *	Clear the position bits to isolate upper section, then
+	 *	reverse the left shift by eiw that occurred during DPA->HPA
+	 * (eiw >= 8)
+	 *	Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
+	 *	Extract upper bits from the correct bit range and divide by 3
+	 *	to recover the original DPA upper bits
+	 */
+	bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
+	if (eiw < 8) {
+		temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0);
+		dpa_offset = temp >> eiw;
+	} else {
+		bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
+		dpa_offset = bits_upper << (eig + 8);
+	}
+	dpa_offset |= bits_lower;
+
+	return dpa_offset;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate");
+
+int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig)
+{
+	unsigned int ways = 0;
+	u64 shifted, rem;
+	int pos, ret;
+
+	ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO);
+	if (ret)
+		return ret;
+
+	if (!eiw)
+		/* position is 0 if no interleaving */
+		return 0;
+
+	/*
+	 * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
+	 * eiw < 8
+	 *	Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
+	 *	Per spec "remove IW bits starting with bit position IG+8"
+	 * eiw >= 8
+	 *	Position is not explicitly stored in HPA_OFFSET bits. It is
+	 *	derived from the modulo operation of the upper bits using
+	 *	the total number of interleave ways.
+	 */
+	if (eiw < 8) {
+		pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
+	} else {
+		shifted = hpa_offset >> (eig + 8);
+		eiw_to_ways(eiw, &ways);
+		div64_u64_rem(shifted, ways, &rem);
+		pos = rem;
+	}
+
+	return pos;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate");
+
+u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
+{
+	u64 mask_upper, hpa_offset, bits_upper;
+	int ret;
+
+	ret = cxl_validate_translation_params(eiw, eig, pos);
+	if (ret)
+		return ULLONG_MAX;
 
 	/*
 	 * The device position in the region interleave set was removed
@@ -2967,9 +3058,6 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	 * 8.2.4.19.13 Implementation Note: Device Decode Logic
 	 */
 
-	/* Remove the dpa base */
-	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
-
 	mask_upper = GENMASK_ULL(51, eig + 8);
 
 	if (eiw < 8) {
@@ -2984,6 +3072,37 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	/* The lower bits remain unchanged */
 	hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0);
 
+	return hpa_offset;
+}
+EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
+
+u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
+		   u64 dpa)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_endpoint_decoder *cxled = NULL;
+	u64 dpa_offset, hpa_offset, hpa;
+	u16 eig = 0;
+	u8 eiw = 0;
+	int pos;
+
+	for (int i = 0; i < p->nr_targets; i++) {
+		if (cxlmd == cxled_to_memdev(p->targets[i])) {
+			cxled = p->targets[i];
+			break;
+		}
+	}
+	if (!cxled)
+		return ULLONG_MAX;
+
+	pos = cxled->pos;
+	ways_to_eiw(p->interleave_ways, &eiw);
+	granularity_to_eig(p->interleave_granularity, &eig);
+
+	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+	hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
+
 	/* Apply the hpa_offset to the region base address */
 	hpa = hpa_offset + p->res->start + p->cache_size;
 
@@ -3016,8 +3135,6 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	struct cxl_endpoint_decoder *cxled;
 	u64 hpa, hpa_offset, dpa_offset;
-	u64 bits_upper, bits_lower;
-	u64 shifted, rem, temp;
 	u16 eig = 0;
 	u8 eiw = 0;
 	int pos;
@@ -3039,50 +3156,15 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
 	} else {
 		hpa_offset = offset;
 	}
-	/*
-	 * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13
-	 * eiw < 8
-	 *	Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8].
-	 *	Per spec "remove IW bits starting with bit position IG+8"
-	 * eiw >= 8
-	 *	Position is not explicitly stored in HPA_OFFSET bits. It is
-	 *	derived from the modulo operation of the upper bits using
-	 *	the total number of interleave ways.
-	 */
-	if (eiw < 8) {
-		pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0);
-	} else {
-		shifted = hpa_offset >> (eig + 8);
-		div64_u64_rem(shifted, p->interleave_ways, &rem);
-		pos = rem;
-	}
+
+	pos = cxl_calculate_position(hpa_offset, eiw, eig);
 	if (pos < 0 || pos >= p->nr_targets) {
 		dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",
 			pos, p->nr_targets);
 		return -ENXIO;
 	}
 
-	/*
-	 * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13
-	 * Lower bits [IG+7:0] pass through unchanged
-	 * (eiw < 8)
-	 *	Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW)
-	 *	Clear the position bits to isolate upper section, then
-	 *	reverse the left shift by eiw that occurred during DPA->HPA
-	 * (eiw >= 8)
-	 *	Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3
-	 *	Extract upper bits from the correct bit range and divide by 3
-	 *	to recover the original DPA upper bits
-	 */
-	bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0);
-	if (eiw < 8) {
-		temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0));
-		dpa_offset = temp >> eiw;
-	} else {
-		bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3);
-		dpa_offset = bits_upper << (eig + 8);
-	}
-	dpa_offset |= bits_lower;
+	dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig);
 
 	/* Look-up and return the result: a memdev and a DPA */
 	for (int i = 0; i < p->nr_targets; i++) {
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 231ddccf89773..10bee9aaa943e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -738,6 +738,12 @@ static inline bool is_cxl_root(struct cxl_port *port)
 	return port->uport_dev == port->dev.parent;
 }
 
+/* Address translation functions exported to cxl_translate test module only */
+int cxl_validate_translation_params(u8 eiw, u16 eig, int pos);
+u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig);
+u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig);
+int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig);
+
 int cxl_num_decoders_committed(struct cxl_port *port);
 bool is_cxl_port(const struct device *dev);
 struct cxl_port *to_cxl_port(const struct device *dev);

From 293cba361c3194bb6c8f4198be170f1da85b9084 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Tue, 14 Oct 2025 01:24:31 -0700
Subject: [PATCH 035/143] cxl/acpi: Make the XOR calculations available for
 testing

In preparation for adding a test module that can exercise the address
translation functions performed by the CXL Driver, refactor the XOR
implementation like this:

- Extract the core calculation into a standalone helper function,
- Export the new function for use by test module cxl_translate only,
- Enhance the parameter validation since this new function will be
  called from a test module with no guarantee of valid parameters,
- Move the define of struct cxl_cxims_data to cxl.h so the test module
  can build xormaps.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 4fe516d2ad1a6b827694db134fa2a0af97917b41)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c | 41 ++++++++++++++++++++++++++++++-----------
 drivers/cxl/cxl.h  | 13 +++++++++++++
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index bd2e282ca93a0..a8069278cb565 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -11,25 +11,36 @@
 #include "cxlpci.h"
 #include "cxl.h"
 
-struct cxl_cxims_data {
-	int nr_maps;
-	u64 xormaps[] __counted_by(nr_maps);
-};
-
 static const guid_t acpi_cxl_qtg_id_guid =
 	GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071,
 		  0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52);
 
-static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+	[1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+static const int valid_hbiw[] = { 1, 2, 3, 4, 6, 8, 12, 16 };
+
+u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw)
 {
-	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
-	int hbiw = cxlrd->cxlsd.nr_targets;
+	int nr_maps_to_apply = -1;
 	u64 val;
 	int pos;
 
-	/* No xormaps for host bridge interleave ways of 1 or 3 */
-	if (hbiw == 1 || hbiw == 3)
-		return addr;
+	/*
+	 * Strictly validate hbiw since this function is used for testing and
+	 * that nullifies any expectation of trusted parameters from the CXL
+	 * Region Driver.
+	 */
+	for (int i = 0; i < ARRAY_SIZE(valid_hbiw); i++) {
+		if (valid_hbiw[i] == hbiw) {
+			nr_maps_to_apply = hbiw_to_nr_maps[hbiw];
+			break;
+		}
+	}
+	if (nr_maps_to_apply == -1 || nr_maps_to_apply > cximsd->nr_maps)
+		return ULLONG_MAX;
 
 	/*
 	 * In regions using XOR interleave arithmetic the CXL HPA may not
@@ -60,6 +71,14 @@ static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
 
 	return addr;
 }
+EXPORT_SYMBOL_FOR_MODULES(cxl_do_xormap_calc, "cxl_translate");
+
+static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr)
+{
+	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
+
+	return cxl_do_xormap_calc(cximsd, addr, cxlrd->cxlsd.nr_targets);
+}
 
 struct cxl_cxims_context {
 	struct device *dev;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 10bee9aaa943e..e8931b626fc62 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -743,6 +743,19 @@ int cxl_validate_translation_params(u8 eiw, u16 eig, int pos);
 u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig);
 u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig);
 int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig);
+struct cxl_cxims_data {
+	int nr_maps;
+	u64 xormaps[] __counted_by(nr_maps);
+};
+
+#if IS_ENABLED(CONFIG_CXL_ACPI)
+u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw);
+#else
+static inline u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw)
+{
+	return ULLONG_MAX;
+}
+#endif
 
 int cxl_num_decoders_committed(struct cxl_port *port);
 bool is_cxl_port(const struct device *dev);

From a6c82f917f6bfb1a73e77efd8065831dc40e2377 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Tue, 14 Oct 2025 01:24:32 -0700
Subject: [PATCH 036/143] cxl/test: Add cxl_translate module for address
 translation testing

Add a loadable test module that validates CXL address translation
calculations using parameterized test vectors. The module tests both
host-to-device and device-to-host address translations for Modulo and
XOR interleave arithmetic.

Two types of testing are provided:

1. Parameterized test vectors:
   Test vectors are passed as module parameters in the format:
	"dpa pos r_eiw r_eig hb_ways math expected_spa".
   Round-trip validation is performed:
   - Translate a DPA and position to a SPA
   - Verify the result matches expected SPA
   - Translate that SPA back to a DPA and position
   - Verify round-trip consistency

2. Internal validation testing:
   When no test vectors are provided, the module performs validation
   of the translation functions by checking parameter boundaries and
   running 10,000 iterations of randomly generated valid parameters
   to exercise the core calculation functions.

The module uses the CXL Driver translation functions through symbols
exported exclusively for cxl_translate.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 06377c54a133621d61fa76cdcea85077c5b958f4)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/Kbuild          |   1 +
 tools/testing/cxl/test/cxl_translate.c | 445 +++++++++++++++++++++++++
 2 files changed, 446 insertions(+)
 create mode 100644 tools/testing/cxl/test/cxl_translate.c

diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild
index 6b19278978561..af50972c8b6d3 100644
--- a/tools/testing/cxl/test/Kbuild
+++ b/tools/testing/cxl/test/Kbuild
@@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core
 obj-m += cxl_test.o
 obj-m += cxl_mock.o
 obj-m += cxl_mock_mem.o
+obj-m += cxl_translate.o
 
 cxl_test-y := cxl.o
 cxl_mock-y := mock.o
diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c
new file mode 100644
index 0000000000000..2200ae21795c7
--- /dev/null
+++ b/tools/testing/cxl/test/cxl_translate.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright(c) 2025 Intel Corporation. All rights reserved.
+
+/* Preface all log entries with "cxl_translate" */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <cxlmem.h>
+#include <cxl.h>
+
+/* Maximum number of test vectors and entry length */
+#define MAX_TABLE_ENTRIES 128
+#define MAX_ENTRY_LEN 128
+
+/* Expected number of parameters in each test vector */
+#define EXPECTED_PARAMS 7
+
+/* Module parameters for test vectors */
+static char *table[MAX_TABLE_ENTRIES];
+static int table_num;
+
+/* Interleave Arithmetic */
+#define MODULO_MATH 0
+#define XOR_MATH 1
+
+/*
+ * XOR mapping configuration
+ * The test data sets all use the same set of xormaps. When additional
+ * data sets arrive for validation, this static setup will need to
+ * be changed to accept xormaps as additional parameters.
+ */
+struct cxl_cxims_data *cximsd;
+static u64 xormaps[] = {
+	0x2020900,
+	0x4041200,
+	0x1010400,
+	0x800,
+};
+
+static int nr_maps = ARRAY_SIZE(xormaps);
+
+#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1)
+static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = {
+	[1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4
+};
+
+/**
+ * to_hpa - calculate an HPA offset from a DPA offset and position
+ *
+ * dpa_offset: device physical address offset
+ * pos: devices position in interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: host physical address offset
+ */
+static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways,
+		  u8 math)
+{
+	u64 hpa_offset;
+
+	/* Calculate base HPA offset from DPA and position */
+	hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig);
+
+	if (math == XOR_MATH) {
+		cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+		if (cximsd->nr_maps)
+			return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+	}
+	return hpa_offset;
+}
+
+/**
+ * to_dpa - translate an HPA offset to DPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: device physical address offset
+ */
+static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+	u64 offset = hpa_offset;
+
+	if (math == XOR_MATH) {
+		cximsd->nr_maps = hbiw_to_nr_maps[hb_ways];
+		if (cximsd->nr_maps)
+			offset =
+				cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+	}
+	return cxl_calculate_dpa_offset(offset, r_eiw, r_eig);
+}
+
+/**
+ * to_pos - extract an interleave position from an HPA offset
+ *
+ * hpa_offset: host physical address offset
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ *
+ * Returns: devices position in region interleave
+ */
+static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math)
+{
+	u64 offset = hpa_offset;
+
+	/* Reverse XOR mapping if specified */
+	if (math == XOR_MATH)
+		offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways);
+
+	return cxl_calculate_position(offset, r_eiw, r_eig);
+}
+
+/**
+ * run_translation_test - execute forward and reverse translations
+ *
+ * @dpa: device physical address
+ * @pos: expected position in region interleave
+ * @r_eiw: region encoded interleave ways
+ * @r_eig: region encoded interleave granularity
+ * @hb_ways: host bridge interleave ways
+ * @math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * @expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig,
+				u8 hb_ways, int math, u64 expect_hpa)
+{
+	u64 translated_spa, reverse_dpa;
+	int reverse_pos;
+
+	/* Test Device to Host translation: DPA + POS -> SPA */
+	translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math);
+	if (translated_spa != expect_hpa) {
+		pr_err("Device to host failed: expected HPA %llu, got %llu\n",
+		       expect_hpa, translated_spa);
+		return -1;
+	}
+
+	/* Test Host to Device DPA translation: SPA -> DPA */
+	reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math);
+	if (reverse_dpa != dpa) {
+		pr_err("Host to Device DPA failed: expected %llu, got %llu\n",
+		       dpa, reverse_dpa);
+		return -1;
+	}
+
+	/* Test Host to Device Position translation: SPA -> POS */
+	reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math);
+	if (reverse_pos != pos) {
+		pr_err("Position lookup failed: expected %d, got %d\n", pos,
+		       reverse_pos);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * parse_test_vector - parse a single test vector string
+ *
+ * entry: test vector string to parse
+ * dpa: device physical address
+ * pos: expected position in region interleave
+ * r_eiw: region encoded interleave ways
+ * r_eig: region encoded interleave granularity
+ * hb_ways: host bridge interleave ways
+ * math: interleave arithmetic (MODULO_MATH or XOR_MATH)
+ * expect_spa: expected system physical address
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw,
+			     u16 *r_eig, u8 *hb_ways, int *math,
+			     u64 *expect_hpa)
+{
+	unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways;
+	int parsed;
+
+	parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw,
+			&tmp_r_eig, &tmp_hb_ways, math, expect_hpa);
+
+	if (parsed != EXPECTED_PARAMS) {
+		pr_err("Parse error: expected %d parameters, got %d in '%s'\n",
+		       EXPECTED_PARAMS, parsed, entry);
+		return -EINVAL;
+	}
+	if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) {
+		pr_err("Parameter overflow in entry: '%s'\n", entry);
+		return -ERANGE;
+	}
+	if (*math != MODULO_MATH && *math != XOR_MATH) {
+		pr_err("Invalid math type %d in entry: '%s'\n", *math, entry);
+		return -EINVAL;
+	}
+	*r_eiw = tmp_r_eiw;
+	*r_eig = tmp_r_eig;
+	*hb_ways = tmp_hb_ways;
+
+	return 0;
+}
+
+/*
+ * setup_xor_mapping - Initialize XOR mapping data structure
+ *
+ * The test data sets all use the same HBIG so we can use one set
+ * of xormaps, and set the number to apply based on HBIW before
+ * calling cxl_do_xormap_calc().
+ *
+ * When additional data sets arrive for validation with different
+ * HBIG's this static setup will need to be updated.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int setup_xor_mapping(void)
+{
+	if (nr_maps <= 0)
+		return -EINVAL;
+
+	cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL);
+	if (!cximsd)
+		return -ENOMEM;
+
+	memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps));
+	cximsd->nr_maps = nr_maps;
+
+	return 0;
+}
+
+static int test_random_params(void)
+{
+	u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 };
+	u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 };
+	int i, ways, pos, reverse_pos;
+	u64 dpa, hpa, reverse_dpa;
+	int iterations = 10000;
+	int failures = 0;
+
+	for (i = 0; i < iterations; i++) {
+		/* Generate valid random parameters for eiw, eig, pos, dpa */
+		u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)];
+		u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)];
+
+		eiw_to_ways(eiw, &ways);
+		pos = get_random_u32() % ways;
+		dpa = get_random_u64() >> 12;
+
+		hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig);
+		reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig);
+		reverse_pos = cxl_calculate_position(hpa, eiw, eig);
+
+		if (reverse_dpa != dpa || reverse_pos != pos) {
+			pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n",
+			       i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw,
+			       eig);
+
+			if (failures++ > 10) {
+				pr_err("test random too many failures, stop\n");
+				break;
+			}
+		}
+	}
+	pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures);
+
+	if (failures)
+		return -EINVAL;
+
+	return 0;
+}
+
+struct param_test {
+	u8 eiw;
+	u16 eig;
+	int pos;
+	bool expect; /* true: expect pass, false: expect fail */
+	const char *desc;
+};
+
+static struct param_test param_tests[] = {
+	{ 0x0, 0, 0, true, "1-way, min eig=0, pos=0" },
+	{ 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" },
+	{ 0x0, 6, 0, true, "1-way, max eig=6, pos=0" },
+	{ 0x1, 0, 0, true, "2-way, eig=0, pos=0" },
+	{ 0x1, 3, 1, true, "2-way, eig=3, max pos=1" },
+	{ 0x1, 6, 1, true, "2-way, eig=6, max pos=1" },
+	{ 0x2, 0, 0, true, "4-way, eig=0, pos=0" },
+	{ 0x2, 3, 3, true, "4-way, eig=3, max pos=3" },
+	{ 0x2, 6, 3, true, "4-way, eig=6, max pos=3" },
+	{ 0x3, 0, 0, true, "8-way, eig=0, pos=0" },
+	{ 0x3, 3, 7, true, "8-way, eig=3, max pos=7" },
+	{ 0x3, 6, 7, true, "8-way, eig=6, max pos=7" },
+	{ 0x4, 0, 0, true, "16-way, eig=0, pos=0" },
+	{ 0x4, 3, 15, true, "16-way, eig=3, max pos=15" },
+	{ 0x4, 6, 15, true, "16-way, eig=6, max pos=15" },
+	{ 0x8, 0, 0, true, "3-way, eig=0, pos=0" },
+	{ 0x8, 3, 2, true, "3-way, eig=3, max pos=2" },
+	{ 0x8, 6, 2, true, "3-way, eig=6, max pos=2" },
+	{ 0x9, 0, 0, true, "6-way, eig=0, pos=0" },
+	{ 0x9, 3, 5, true, "6-way, eig=3, max pos=5" },
+	{ 0x9, 6, 5, true, "6-way, eig=6, max pos=5" },
+	{ 0xA, 0, 0, true, "12-way, eig=0, pos=0" },
+	{ 0xA, 3, 11, true, "12-way, eig=3, max pos=11" },
+	{ 0xA, 6, 11, true, "12-way, eig=6, max pos=11" },
+	{ 0x5, 0, 0, false, "invalid eiw=5" },
+	{ 0x7, 0, 0, false, "invalid eiw=7" },
+	{ 0xB, 0, 0, false, "invalid eiw=0xB" },
+	{ 0xFF, 0, 0, false, "invalid eiw=0xFF" },
+	{ 0x1, 7, 0, false, "invalid eig=7 (out of range)" },
+	{ 0x2, 0x10, 0, false, "invalid eig=0x10" },
+	{ 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" },
+	{ 0x1, 0, -1, false, "pos < 0" },
+	{ 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" },
+	{ 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" },
+	{ 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" },
+	{ 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" },
+	{ 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" },
+	{ 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" },
+	{ 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" },
+};
+
+static int test_cxl_validate_translation_params(void)
+{
+	int i, rc, failures = 0;
+	bool valid;
+
+	for (i = 0; i < ARRAY_SIZE(param_tests); i++) {
+		struct param_test *t = &param_tests[i];
+
+		rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos);
+		valid = (rc == 0);
+
+		if (valid != t->expect) {
+			pr_err("test params failed: %s\n", t->desc);
+			failures++;
+		}
+	}
+	pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures);
+
+	if (failures)
+		return -EINVAL;
+
+	return 0;
+}
+
+/*
+ * cxl_translate_init
+ *
+ * Run the internal validation tests when no params are passed.
+ * Otherwise, parse the parameters (test vectors), and kick off
+ * the translation test.
+ *
+ * Returns: 0 on success, negative error code on failure
+ */
+static int __init cxl_translate_init(void)
+{
+	int rc, i;
+
+	/* If no tables are passed, validate module params only */
+	if (table_num == 0) {
+		pr_info("Internal validation test start...\n");
+		rc = test_cxl_validate_translation_params();
+		if (rc)
+			return rc;
+
+		rc = test_random_params();
+		if (rc)
+			return rc;
+
+		pr_info("Internal validation test completed successfully\n");
+
+		return 0;
+	}
+
+	pr_info("CXL translate test module loaded with %d test vectors\n",
+		table_num);
+
+	rc = setup_xor_mapping();
+	if (rc)
+		return rc;
+
+	/* Process each test vector */
+	for (i = 0; i < table_num; i++) {
+		u64 dpa, expect_spa;
+		int pos, math;
+		u8 r_eiw, hb_ways;
+		u16 r_eig;
+
+		pr_debug("Processing test vector %d: '%s'\n", i, table[i]);
+
+		/* Parse the test vector */
+		rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig,
+				       &hb_ways, &math, &expect_spa);
+		if (rc) {
+			pr_err("CXL Translate Test %d: FAIL\n"
+			       "    Failed to parse test vector '%s'\n",
+			       i, table[i]);
+			continue;
+		}
+		/* Run the translation test */
+		rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math,
+					  expect_spa);
+		if (rc) {
+			pr_err("CXL Translate Test %d: FAIL\n"
+			       "    dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n",
+			       i, dpa, pos, r_eiw, r_eig, hb_ways,
+			       (math == XOR_MATH) ? "XOR" : "MODULO",
+			       expect_spa);
+		} else {
+			pr_info("CXL Translate Test %d: PASS\n", i);
+		}
+	}
+
+	kfree(cximsd);
+	pr_info("CXL translate test completed\n");
+
+	return 0;
+}
+
+static void __exit cxl_translate_exit(void)
+{
+	pr_info("CXL translate test module unloaded\n");
+}
+
+module_param_array(table, charp, &table_num, 0444);
+MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings");
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("cxl_test: cxl address translation test module");
+MODULE_IMPORT_NS("CXL");
+
+module_init(cxl_translate_init);
+module_exit(cxl_translate_exit);

From 322aae9a659b97bce59c5655d69aba3ec2422fac Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 3 Oct 2025 11:55:09 -0700
Subject: [PATCH 037/143] cxl: Adjust extended linear cache failure emission in
 cxl_acpi

The cxl_acpi module spams "Extended linear cache calculation failed"
when the hmat memory target is not found for a node. This is normal
when the memory target does not contain extended linear cache
attributes. Adjust cxl_acpi_set_cache_size() to just return 0 if error
is returned from hmat_get_extended_linear_cache_size(). That is the
only error returned from hmat_get_extended_linear_cache_size() as
-ENOENT.

Also remove the check for -EOPNOTSUPP in cxl_setup_extended_linear_cache()
since that errno is never returned by cxl_acpi_set_cache_size().

[dj: Flipped minor return logic suggested by Jonathan ]
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251003185509.3215900-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit f0c5d3bc2830f04a72087f45d15807943eabfa10)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index a8069278cb565..1a64e5c71fbd8 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -372,7 +372,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd)
 
 	rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size);
 	if (rc)
-		return rc;
+		return 0;
 
 	/*
 	 * The cache range is expected to be within the CFMWS.
@@ -397,21 +397,18 @@ static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd)
 	int rc;
 
 	rc = cxl_acpi_set_cache_size(cxlrd);
-	if (!rc)
-		return;
-
-	if (rc != -EOPNOTSUPP) {
+	if (rc) {
 		/*
-		 * Failing to support extended linear cache region resize does not
+		 * Failing to retrieve extended linear cache region resize does not
 		 * prevent the region from functioning. Only causes cxl list showing
 		 * incorrect region size.
 		 */
 		dev_warn(cxlrd->cxlsd.cxld.dev.parent,
-			 "Extended linear cache calculation failed rc:%d\n", rc);
-	}
+			 "Extended linear cache retrieval failed rc:%d\n", rc);
 
-	/* Ignoring return code */
-	cxlrd->cache_size = 0;
+		/* Ignoring return code */
+		cxlrd->cache_size = 0;
+	}
 }
 
 DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *,

From 72872e60142d5b8ffe786215d2810a80f3d8cd5d Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 22 Oct 2025 13:30:52 -0700
Subject: [PATCH 038/143] cxl/region: Add support to indicate region has
 extended linear cache

Add a region sysfs attribute to show the size of the extended linear
cache if there is any. The attribute is invisible when the cache
size is 0, which indicates it does not exist.

Moved the cxl_region_visible() location in order to pick up the
new sysfs attribute definition.

[ dj: Fixed spelling errors noted by Benjamin ]

Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251022203052.4078527-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit d6602e25819dea2c239972e98e09ba5db4aebd22)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl | 11 ++++-
 drivers/cxl/core/region.c               | 59 ++++++++++++++++++-------
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 6b4e8c7a963da..c80a1b5a03dba 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -496,8 +496,17 @@ Description:
 		changed, only freed by writing 0. The kernel makes no guarantees
 		that data is maintained over an address space freeing event, and
 		there is no guarantee that a free followed by an allocate
-		results in the same address being allocated.
+		results in the same address being allocated. If extended linear
+		cache is present, the size indicates extended linear cache size
+		plus the CXL region size.
 
+What:		/sys/bus/cxl/devices/regionZ/extended_linear_cache_size
+Date:		October, 2025
+KernelVersion:	v6.19
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(RO) The size of extended linear cache, if there is an extended
+		linear cache. Otherwise the attribute will not be visible.
 
 What:		/sys/bus/cxl/devices/regionZ/mode
 Date:		January, 2023
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3af7561e2973a..d8f34cb5e8129 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -461,21 +461,6 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(commit);
 
-static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
-				  int n)
-{
-	struct device *dev = kobj_to_dev(kobj);
-	struct cxl_region *cxlr = to_cxl_region(dev);
-
-	/*
-	 * Support tooling that expects to find a 'uuid' attribute for all
-	 * regions regardless of mode.
-	 */
-	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
-		return 0444;
-	return a->mode;
-}
-
 static ssize_t interleave_ways_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
@@ -754,6 +739,21 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(size);
 
+static ssize_t extended_linear_cache_size_show(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	struct cxl_region_params *p = &cxlr->params;
+	ssize_t rc;
+
+	ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem)))
+		return rc;
+	return sysfs_emit(buf, "%#llx\n", p->cache_size);
+}
+static DEVICE_ATTR_RO(extended_linear_cache_size);
+
 static struct attribute *cxl_region_attrs[] = {
 	&dev_attr_uuid.attr,
 	&dev_attr_commit.attr,
@@ -762,9 +762,34 @@ static struct attribute *cxl_region_attrs[] = {
 	&dev_attr_resource.attr,
 	&dev_attr_size.attr,
 	&dev_attr_mode.attr,
+	&dev_attr_extended_linear_cache_size.attr,
 	NULL,
 };
 
+static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
+				  int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cxl_region *cxlr = to_cxl_region(dev);
+
+	/*
+	 * Support tooling that expects to find a 'uuid' attribute for all
+	 * regions regardless of mode.
+	 */
+	if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM)
+		return 0444;
+
+	/*
+	 * Don't display extended linear cache attribute if there is no
+	 * extended linear cache.
+	 */
+	if (a == &dev_attr_extended_linear_cache_size.attr &&
+	    cxlr->params.cache_size == 0)
+		return 0;
+
+	return a->mode;
+}
+
 static const struct attribute_group cxl_region_group = {
 	.attrs = cxl_region_attrs,
 	.is_visible = cxl_region_visible,
@@ -3561,6 +3586,10 @@ static int __construct_region(struct cxl_region *cxlr,
 			 "Extended linear cache calculation failed rc:%d\n", rc);
 	}
 
+	rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group);
+	if (rc)
+		return rc;
+
 	rc = insert_resource(cxlrd->res, res);
 	if (rc) {
 		/*

From 6afdd2e2a5fcd20abd6580089e2ac98e9fd33980 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 5 Nov 2025 13:18:26 -0700
Subject: [PATCH 039/143] cxl: Add handling of locked CXL decoder

When a decoder is locked, it means that its configuration cannot be
changed. CXL spec r3.2 8.2.4.20.13 discusses the details regarding
locked decoders. Locking happens when bit 8 of the decoder control
register is set and then the decoder is committed afterwards (CXL
spec r3.2 8.2.4.20.7).

Given that the driver creates a virtual decoder for each CFMWS, the
Fixed Device Configuration (bit 4) of the Window Restriction field is
considered as locking for the virtual decoder by the driver.

The current driver code disregards the locked status and a region can
be destroyed regardless of the locking state.

Add a region flag to indicate the region is in a locked configuration.
The driver will considered a region locked if the CFMWS or any decoder
is configured as locked. The consideration is all or nothing regarding
the locked state. It is reasonable to determine the region "locked"
status while the region is being assembled based on the decoders.

Add a check in region commit_store() to intercept when a 0 is written
to the commit sysfs attribute in order to prevent the destruction of a
region when in locked state. This should be the only entry point from user
space to destroy a region.

Add a check is added to cxl_decoder_reset() to prevent resetting a locked
decoder within the kernel driver.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251105201826.2901915-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 2230c4bdc4120417799c74326ade3123da226d54)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c    |  3 +++
 drivers/cxl/core/region.c | 19 +++++++++++++++++++
 drivers/cxl/cxl.h         |  8 ++++++++
 3 files changed, 30 insertions(+)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index de78601821e60..aff166798e353 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
 	if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
 		return;
 
+	if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+		return;
+
 	if (port->commit_end == id)
 		cxl_port_commit_reap(cxld);
 	else
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index d8f34cb5e8129..4056d75d27d08 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -245,6 +245,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
 	struct cxl_region_params *p = &cxlr->params;
 	int i;
 
+	if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+		return;
+
 	/*
 	 * Before region teardown attempt to flush, evict any data cached for
 	 * this region, or scream loudly about missing arch / platform support
@@ -419,6 +422,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
 		return len;
 	}
 
+	if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+		return -EPERM;
+
 	rc = queue_reset(cxlr);
 	if (rc)
 		return rc;
@@ -1084,6 +1090,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
 	return 0;
 }
 
+static void cxl_region_set_lock(struct cxl_region *cxlr,
+				struct cxl_decoder *cxld)
+{
+	if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+		return;
+
+	set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
+	clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
+}
+
 /**
  * cxl_port_attach_region() - track a region's interest in a port by endpoint
  * @port: port to add a new region reference 'struct cxl_region_ref'
@@ -1195,6 +1211,8 @@ static int cxl_port_attach_region(struct cxl_port *port,
 		}
 	}
 
+	cxl_region_set_lock(cxlr, cxld);
+
 	rc = cxl_rr_ep_add(cxl_rr, cxled);
 	if (rc) {
 		dev_dbg(&cxlr->dev,
@@ -2464,6 +2482,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
 	dev->bus = &cxl_bus_type;
 	dev->type = &cxl_region_type;
 	cxlr->id = id;
+	cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
 
 	return cxlr;
 }
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index e8931b626fc62..6cfe65a35c95a 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -517,6 +517,14 @@ enum cxl_partition_mode {
  */
 #define CXL_REGION_F_NEEDS_RESET 1
 
+/*
+ * Indicate whether this region is locked due to 1 or more decoders that have
+ * been locked. The approach of all or nothing is taken with regard to the
+ * locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is
+ * set.
+ */
+#define CXL_REGION_F_LOCK 2
+
 /**
  * struct cxl_region - CXL region
  * @dev: This region's device

From 7992ac79bfc6452c2559db0a84f09bc6928d477c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 5 Nov 2025 16:51:14 -0700
Subject: [PATCH 040/143] acpi/hmat: Return when generic target is updated

With the current code flow, once the generic target is updated
target->registered is set and the remaining code is skipped.
So return immediately instead of going through the checks and
then skip.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251105235115.85062-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 15e14267889bde30b7b82bc03432483222b4b42c)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/acpi/numa/hmat.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 11e4483685c9c..77a81627aaefd 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -910,12 +910,13 @@ static void hmat_register_target(struct memory_target *target)
 	 * Register generic port perf numbers. The nid may not be
 	 * initialized and is still NUMA_NO_NODE.
 	 */
-	mutex_lock(&target_lock);
-	if (*(u16 *)target->gen_port_device_handle) {
-		hmat_update_generic_target(target);
-		target->registered = true;
+	scoped_guard(mutex, &target_lock) {
+		if (*(u16 *)target->gen_port_device_handle) {
+			hmat_update_generic_target(target);
+			target->registered = true;
+			return;
+		}
 	}
-	mutex_unlock(&target_lock);
 
 	hmat_hotplug_target(target);
 }

From 24366091ed5b99b5367b9a1938b38a4338848066 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 6 Nov 2025 10:01:07 -0700
Subject: [PATCH 041/143] cxl: Rename region_res_match_cxl_range() to
 spa_maps_hpa()

The function name region_res_match_cxl_range() does not accurately
convey the operation of address comparison with cache size. Rename
to spa_maps_hpa() to provide a better function name.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/linux-cxl/68eea19c7e67e_2f899100a8@dwillia2-mobl4.notmuch/
Reviewed-by: Jonathan Cameron <jonathan.cameron@huwei.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/20251106170108.1468304-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit c43521b9db7f5ed481cfdfb04ad2e7fe0cb9dcf5)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 4056d75d27d08..88dbd83379587 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -869,8 +869,8 @@ static int match_free_decoder(struct device *dev, const void *data)
 	return 1;
 }
 
-static bool region_res_match_cxl_range(const struct cxl_region_params *p,
-				       const struct range *range)
+static bool spa_maps_hpa(const struct cxl_region_params *p,
+			 const struct range *range)
 {
 	if (!p->res)
 		return false;
@@ -896,7 +896,7 @@ static int match_auto_decoder(struct device *dev, const void *data)
 	cxld = to_cxl_decoder(dev);
 	r = &cxld->hpa_range;
 
-	if (region_res_match_cxl_range(p, r))
+	if (spa_maps_hpa(p, r))
 		return 1;
 
 	return 0;
@@ -1508,7 +1508,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
 		if (cxld->interleave_ways != iw ||
 		    (iw > 1 && cxld->interleave_granularity != ig) ||
-		    !region_res_match_cxl_range(p, &cxld->hpa_range) ||
+		    !spa_maps_hpa(p, &cxld->hpa_range) ||
 		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
 			dev_err(&cxlr->dev,
 				"%s:%s %s expected iw: %d ig: %d %pr\n",
@@ -3524,7 +3524,7 @@ static int match_region_by_range(struct device *dev, const void *data)
 	p = &cxlr->params;
 
 	guard(rwsem_read)(&cxl_rwsem.region);
-	return region_res_match_cxl_range(p, r);
+	return spa_maps_hpa(p, r);
 }
 
 static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,

From c0154b151982efd450c5766fd5a3d1e17d8c8c31 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 6 Nov 2025 10:01:08 -0700
Subject: [PATCH 042/143] cxl: Clarify comment in spa_maps_hpa()

Update the comment in spa_maps_hpa() to clearly convey the construction
of extended linear cache.

Suggested-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/linux-cxl/68eea19c7e67e_2f899100a8@dwillia2-mobl4.notmuch/
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/20251106170108.1468304-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 8d27dd0b219f00fc1e0548ae5008abd7bb350611)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 88dbd83379587..70e7bf809c08b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -876,9 +876,9 @@ static bool spa_maps_hpa(const struct cxl_region_params *p,
 		return false;
 
 	/*
-	 * If an extended linear cache region then the CXL range is assumed
-	 * to be fronted by the DRAM range in current known implementation.
-	 * This assumption will be made until a variant implementation exists.
+	 * The extended linear cache region is constructed by a 1:1 ratio
+	 * where the SPA maps equal amounts of DRAM and CXL HPA capacity with
+	 * CXL decoders at the high end of the SPA range.
 	 */
 	return p->res->start + p->cache_size == range->start &&
 		p->res->end == range->end;

From b46ca0eebf111acfb12b3e3721b1e899c400fca1 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@amd.com>
Date: Fri, 14 Nov 2025 08:58:41 +0100
Subject: [PATCH 043/143] cxl: Simplify cxl_rd_ops allocation and handling

A root decoder's callback handlers are collected in struct cxl_rd_ops.
The structure is dynamically allocated, though it contains only a few
pointers in it. This also requires to check two pointes to check for
the existence of a callback.

Simplify the allocation, release and handler check by embedding the
ops statically in struct cxl_root_decoder.

Implementation is equivalent to how struct cxl_root_ops handles the
callbacks.

[ dj: Fix spelling error in commit log. ]

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
Link: https://patch.msgid.link/20251114075844.1315805-2-rrichter@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(backported from commit 6123133ee90fc55a5437364d442dd5876648628d)
[jan: Resolve minor conflict due to code lines shift]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c        |  8 ++------
 drivers/cxl/core/port.c   |  1 -
 drivers/cxl/core/region.c | 20 +++++---------------
 drivers/cxl/cxl.h         |  2 +-
 4 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 1a64e5c71fbd8..908a5e460a2db 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -491,12 +491,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 	cxlrd->qos_class = cfmws->qtg_id;
 
 	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
-		cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL);
-		if (!cxlrd->ops)
-			return -ENOMEM;
-
-		cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps;
-		cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps;
+		cxlrd->ops.hpa_to_spa = cxl_apply_xor_maps;
+		cxlrd->ops.spa_to_hpa = cxl_apply_xor_maps;
 	}
 
 	rc = cxl_decoder_add(cxld);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 8128fd2b5b317..fef3aa0c6680c 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -459,7 +459,6 @@ static void cxl_root_decoder_release(struct device *dev)
 	if (atomic_read(&cxlrd->region_id) >= 0)
 		memregion_free(atomic_read(&cxlrd->region_id));
 	__cxl_decoder_release(&cxlrd->cxlsd.cxld);
-	kfree(cxlrd->ops);
 	kfree(cxlrd);
 }
 
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 70e7bf809c08b..d2143b79d17b8 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2968,16 +2968,6 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos)
 	return false;
 }
 
-static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd)
-{
-	return cxlrd->ops && cxlrd->ops->hpa_to_spa;
-}
-
-static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd)
-{
-	return cxlrd->ops && cxlrd->ops->spa_to_hpa;
-}
-
 #define CXL_POS_ZERO 0
 /**
  * cxl_validate_translation_params
@@ -3151,8 +3141,8 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	hpa = hpa_offset + p->res->start + p->cache_size;
 
 	/* Root decoder translation overrides typical modulo decode */
-	if (has_hpa_to_spa(cxlrd))
-		hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa);
+	if (cxlrd->ops.hpa_to_spa)
+		hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa);
 
 	if (!cxl_resource_contains_addr(p->res, hpa)) {
 		dev_dbg(&cxlr->dev,
@@ -3161,7 +3151,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	}
 
 	/* Simple chunk check, by pos & gran, only applies to modulo decodes */
-	if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos)))
+	if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
 		return ULLONG_MAX;
 
 	return hpa;
@@ -3194,8 +3184,8 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
 	 * If the root decoder has SPA to CXL HPA callback, use it. Otherwise
 	 * CXL HPA is assumed to equal SPA.
 	 */
-	if (has_spa_to_hpa(cxlrd)) {
-		hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset);
+	if (cxlrd->ops.spa_to_hpa) {
+		hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset);
 		hpa_offset = hpa - p->res->start;
 	} else {
 		hpa_offset = offset;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 6cfe65a35c95a..ba17fa86d249e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -451,7 +451,7 @@ struct cxl_root_decoder {
 	void *platform_data;
 	struct mutex range_lock;
 	int qos_class;
-	struct cxl_rd_ops *ops;
+	struct cxl_rd_ops ops;
 	struct cxl_switch_decoder cxlsd;
 };
 

From 19b248c812baff03009fcbc9983160cc48df604f Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@amd.com>
Date: Fri, 14 Nov 2025 08:58:42 +0100
Subject: [PATCH 044/143] cxl/acpi: Group xor arithmetric setup code in a
 single block

Simplify the xor arithmetric setup code by grouping it in a single
block. No need to split the block for QoS setup.

It is safe to reorder the call of cxl_setup_extended_linear_cache()
because there are no dependencies.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
Tested-by: Gregory Price <gourry@gourry.net>
Link: https://patch.msgid.link/20251114075844.1315805-3-rrichter@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit c42a4d2ee3b2c432ada9080e29343f4b27ad72bf)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 908a5e460a2db..77ac940e30138 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -469,8 +469,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 		ig = CXL_DECODER_MIN_GRANULARITY;
 	cxld->interleave_granularity = ig;
 
-	cxl_setup_extended_linear_cache(cxlrd);
-
 	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
 		if (ways != 1 && ways != 3) {
 			cxims_ctx = (struct cxl_cxims_context) {
@@ -486,15 +484,14 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 				return -EINVAL;
 			}
 		}
-	}
-
-	cxlrd->qos_class = cfmws->qtg_id;
-
-	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) {
 		cxlrd->ops.hpa_to_spa = cxl_apply_xor_maps;
 		cxlrd->ops.spa_to_hpa = cxl_apply_xor_maps;
 	}
 
+	cxl_setup_extended_linear_cache(cxlrd);
+
+	cxlrd->qos_class = cfmws->qtg_id;
+
 	rc = cxl_decoder_add(cxld);
 	if (rc)
 		return rc;

From 515662c3d66f9d71463f66a3c18305b9b62215f1 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@amd.com>
Date: Fri, 14 Nov 2025 08:58:43 +0100
Subject: [PATCH 045/143] cxl/region: Remove local variable @inc in
 cxl_port_setup_targets()

Simplify the code by removing local variable @inc. The variable is not
used elsewhere, remove it and directly increment the target number.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
Link: https://patch.msgid.link/20251114075844.1315805-4-rrichter@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 7e71fa6e015e46275efd900a728a42d5fcd75179)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index d2143b79d17b8..b251cb998892a 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1371,7 +1371,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 				  struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
-	int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
+	int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
@@ -1563,9 +1563,8 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 		cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
 		cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id;
 	}
-	inc = 1;
+	cxl_rr->nr_targets_set++;
 out_target_set:
-	cxl_rr->nr_targets_set += inc;
 	dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
 		dev_name(port->uport_dev), dev_name(&port->dev),
 		cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev),

From d2a6cf577da207e237c1fb57a03890a49c24a782 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 17 Nov 2025 07:46:09 -0700
Subject: [PATCH 046/143] cxl/test: Standardize CXL auto region size

Create a global define for the size of the mock CXL auto region used
in cxl_test. Remove the declared size in mock_init_hdm_decoder()
function.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Link: https://patch.msgid.link/20251117144611.903692-2-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit fa59c35167afdba043efcc80cf460863868141e7)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/cxl.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index fc271561827b6..4cfb3b39b4c68 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -26,6 +26,9 @@ static int interleave_arithmetic;
 #define NR_CXL_PORT_DECODERS 8
 #define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH)
 
+#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M
+static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT;
+
 static struct platform_device *cxl_acpi;
 static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES];
 #define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS)
@@ -801,7 +804,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 	struct cxl_endpoint_decoder *cxled;
 	struct cxl_switch_decoder *cxlsd;
 	struct cxl_port *port, *iter;
-	const int size = SZ_512M;
 	struct cxl_memdev *cxlmd;
 	struct cxl_dport *dport;
 	struct device *dev;
@@ -859,7 +861,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 
 	cxld->hpa_range = (struct range) {
 		.start = base,
-		.end = base + size - 1,
+		.end = base + mock_auto_region_size - 1,
 	};
 
 	cxld->interleave_ways = 2;
@@ -868,7 +870,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 	cxld->flags = CXL_DECODER_F_ENABLE;
 	cxled->state = CXL_DECODER_STATE_AUTO;
 	port->commit_end = cxld->id;
-	devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0);
+	devm_cxl_dpa_reserve(cxled, 0,
+			     mock_auto_region_size / cxld->interleave_ways, 0);
 	cxld->commit = mock_decoder_commit;
 	cxld->reset = mock_decoder_reset;
 
@@ -917,7 +920,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 		cxld->interleave_granularity = 4096;
 		cxld->hpa_range = (struct range) {
 			.start = base,
-			.end = base + size - 1,
+			.end = base + mock_auto_region_size - 1,
 		};
 		put_device(dev);
 

From 8c693dc67a2ebdc1441aff20ab8dc1eec7626edb Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 17 Nov 2025 07:46:10 -0700
Subject: [PATCH 047/143] cxl/test: Add cxl_test CFMWS support for extended
 linear cache

Add a module parameter to allow activation of extended linear cache
on the auto region for cxl_test. The current platform implementation
for extended linear cache is 1:1 of DRAM and CXL memory. A CFMWS is
created with the size of both memory together where DRAM takes the
first part of the memory range and CXL covers the second part. The
current CXL auto region on cxl_test consists of 2 256M devices that
creates a 512M region. The new extended linear cache setup will have
512M DRAM and 512M CXL memory for a total of 1G CFMWS. The hardware
decoders must have their starting offset moved to after the DRAM region
to handle the CXL regions.

[ dj: Fixup commenting style. (Jonathan) ]

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Link: https://patch.msgid.link/20251117144611.903692-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(backported from commit 4b1c0466c8fbe23d688a1f54584670a9d1dceabd)
[jan: Resolve minor conflict due to code line "base = window->base_hpa" being moved]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/cxl.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 4cfb3b39b4c68..79c462a15c99c 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -15,6 +15,7 @@
 #include "mock.h"
 
 static int interleave_arithmetic;
+static bool extended_linear_cache;
 
 #define FAKE_QTG_ID	42
 
@@ -429,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align)
 	return res;
 }
 
+/* Only update CFMWS0 as this is used by the auto region. */
+static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index)
+{
+	if (!extended_linear_cache)
+		return;
+
+	if (index != 0)
+		return;
+
+	/*
+	 * The window size should be 2x of the CXL region size where half is
+	 * DRAM and half is CXL
+	 */
+	window->window_size = mock_auto_region_size * 2;
+}
+
 static int populate_cedt(void)
 {
 	struct cxl_mock_res *res;
@@ -453,6 +470,7 @@ static int populate_cedt(void)
 	for (i = cfmws_start; i <= cfmws_end; i++) {
 		struct acpi_cedt_cfmws *window = mock_cfmws[i];
 
+		cfmws_elc_update(window, i);
 		res = alloc_mock_res(window->window_size, SZ_256M);
 		if (!res)
 			return -ENOMEM;
@@ -859,6 +877,9 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld)
 		return;
 	}
 
+	if (extended_linear_cache)
+		base += mock_auto_region_size;
+
 	cxld->hpa_range = (struct range) {
 		.start = base,
 		.end = base + mock_auto_region_size - 1,
@@ -1669,6 +1690,8 @@ static __exit void cxl_test_exit(void)
 
 module_param(interleave_arithmetic, int, 0444);
 MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1");
+module_param(extended_linear_cache, bool, 0444);
+MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support");
 module_init(cxl_test_init);
 module_exit(cxl_test_exit);
 MODULE_LICENSE("GPL v2");

From c371e8708871d172698cfde589c6959398816534 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 17 Nov 2025 07:46:11 -0700
Subject: [PATCH 048/143] cxl/test: Add support for acpi extended linear cache

Add the mock wrappers for hmat_get_extended_linear_cache_size() in order
to emulate the ACPI helper function for the regions that are mock'd by
cxl_test.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
Link: https://patch.msgid.link/20251117144611.903692-4-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 68f4a852e18329e84bb5d36168a45b0a52cdf236)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/Kbuild      |  1 +
 tools/testing/cxl/test/cxl.c  | 21 +++++++++++++++++++++
 tools/testing/cxl/test/mock.c | 20 ++++++++++++++++++++
 tools/testing/cxl/test/mock.h |  3 +++
 4 files changed, 45 insertions(+)

diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 3dae06ac7fba5..68b38863605b2 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -10,6 +10,7 @@ ldflags-y += --wrap=cxl_rcd_component_reg_phys
 ldflags-y += --wrap=cxl_endpoint_parse_cdat
 ldflags-y += --wrap=cxl_dport_init_ras_reporting
 ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
+ldflags-y += --wrap=hmat_get_extended_linear_cache_size
 
 DRIVERS := ../../../drivers
 CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 79c462a15c99c..e68bf64460996 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -612,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname,
 	return AE_OK;
 }
 
+static int
+mock_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+					 int nid, resource_size_t *cache_size)
+{
+	struct acpi_cedt_cfmws *window = mock_cfmws[0];
+	struct resource cfmws0_res =
+		DEFINE_RES_MEM(window->base_hpa, window->window_size);
+
+	if (!extended_linear_cache ||
+	    !resource_contains(&cfmws0_res, backing_res)) {
+		return hmat_get_extended_linear_cache_size(backing_res,
+							   nid, cache_size);
+	}
+
+	*cache_size = mock_auto_region_size;
+
+	return 0;
+}
+
 static struct pci_bus mock_pci_bus[NR_BRIDGES];
 static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = {
 	[0] = {
@@ -1201,6 +1220,8 @@ static struct cxl_mock_ops cxl_mock_ops = {
 	.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
 	.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
 	.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
+	.hmat_get_extended_linear_cache_size =
+		mock_hmat_get_extended_linear_cache_size,
 	.list = LIST_HEAD_INIT(cxl_mock_ops.list),
 };
 
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6fd4edb9215c4..6eb15991a414b 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -111,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle,
 }
 EXPORT_SYMBOL(__wrap_acpi_evaluate_integer);
 
+int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res,
+					       int nid,
+					       resource_size_t *cache_size)
+{
+	int index, rc;
+	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+	if (ops)
+		rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid,
+							      cache_size);
+	else
+		rc = hmat_get_extended_linear_cache_size(backing_res, nid,
+							 cache_size);
+
+	put_cxl_mock_ops(index);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size);
+
 struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle)
 {
 	int index;
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 580f383862245..2684b89c8aa2d 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -24,6 +24,9 @@ struct cxl_mock_ops {
 	void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);
 	struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port,
 						       struct device *dport_dev);
+	int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res,
+						   int nid,
+						   resource_size_t *cache_size);
 };
 
 void register_cxl_mock_ops(struct cxl_mock_ops *ops);

From c3f092072f7f454800b5027f7cc7a8c87737583d Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Tue, 18 Nov 2025 18:22:02 +0000
Subject: [PATCH 049/143] cxl/test: remove unused mock function for
 cxl_rcd_component_reg_phys()

Since commit 733b57f262b0 ("cxl/pci: Early setup RCH dport component registers from RCRB")
is not necessary under mocking tests.

[ dj: Fixup commit representation flagged by checkpatch. ]
[ dj: Ammend subject line to indicate which function. ]

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>> ---
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://patch.msgid.link/20251118182202.2083244-1-alejandro.lucero-palau@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 26c5b0d9c080ff753c66de0b19d6e3e014a24877)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/Kbuild      |  1 -
 tools/testing/cxl/test/mock.c | 17 -----------------
 2 files changed, 18 deletions(-)

diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 68b38863605b2..0e151d0572d1f 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -6,7 +6,6 @@ ldflags-y += --wrap=acpi_pci_find_root
 ldflags-y += --wrap=nvdimm_bus_register
 ldflags-y += --wrap=cxl_await_media_ready
 ldflags-y += --wrap=devm_cxl_add_rch_dport
-ldflags-y += --wrap=cxl_rcd_component_reg_phys
 ldflags-y += --wrap=cxl_endpoint_parse_cdat
 ldflags-y += --wrap=cxl_dport_init_ras_reporting
 ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6eb15991a414b..44bce80ef3ff5 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -231,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port,
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL");
 
-resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev,
-						  struct cxl_dport *dport)
-{
-	int index;
-	resource_size_t component_reg_phys;
-	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
-	if (ops && ops->is_mock_port(dev))
-		component_reg_phys = CXL_RESOURCE_NONE;
-	else
-		component_reg_phys = cxl_rcd_component_reg_phys(dev, dport);
-	put_cxl_mock_ops(index);
-
-	return component_reg_phys;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL");
-
 void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
 {
 	int index;

From 544c132bad87690e1305f770280710788e303ff9 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Sat, 15 Nov 2025 17:37:49 -0800
Subject: [PATCH 050/143] cxl/test: Remove ret_limit race condition in
 mock_get_event()

Commit 364ee9f3265e ("cxl/test: Enhance event testing") changed the
loop iterator in mock_get_event() from a static constant,
CXL_TEST_EVENT_CNT, to a dynamic global variable, ret_limit. The
intent was to vary the number of events returned per call to simulate
events occurring while logs are being read.

However, ret_limit is modified without synchronization. When multiple
threads call mock_get_event() concurrently, one thread may read
ret_limit, another thread may increment it, and the first thread's
loop condition and size calculation see and use the updated value.

This is visible during cxl_test module load when all memdevs are
initializing simultaneously, which includes getting event records. It
is not tied to the cxl-events.sh unit test specifically, as that
operates on a single memdev.

While no actual harm results (the buffer is always large enough and
the record count fields correctly reflect what was written), this is
a correctness issue. The race creates an inconsistent state within
mock_get_event() and adding variability based on a race appears
unintended.

Make ret_limit a local variable populated from an atomic counter. Each
call gets a stable value that won't change during execution. That
preserves the intended behavior of varying the return counts across
calls while eliminating the race condition.

This implementation uses "+ 1" to produce the full range of 1 to
CXL_TEST_EVENT_RET_MAX (4) records. Previously only 1, 2, 3 were
produced.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>> ---
Link: https://patch.msgid.link/20251116013819.1713780-1-alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b6369daf0d6a96db5048edd26b07fc1aaed77dd1)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/mem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index d533481672b78..6809c4a26f5e4 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -250,22 +250,22 @@ static void mes_add_event(struct mock_event_store *mes,
  * Vary the number of events returned to simulate events occuring while the
  * logs are being read.
  */
-static int ret_limit = 0;
+static atomic_t event_counter = ATOMIC_INIT(0);
 
 static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 {
 	struct cxl_get_event_payload *pl;
 	struct mock_event_log *log;
 	u16 nr_overflow;
+	int ret_limit;
 	u8 log_type;
 	int i;
 
 	if (cmd->size_in != sizeof(log_type))
 		return -EINVAL;
 
-	ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
-	if (!ret_limit)
-		ret_limit = 1;
+	/* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */
+	ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1;
 
 	if (cmd->size_out < struct_size(pl, records, ret_limit))
 		return -EINVAL;

From 33b4ed1c58e5be33e8b57ac162bcc84d85bdd388 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Sat, 15 Nov 2025 17:30:32 -0800
Subject: [PATCH 051/143] cxl/test: Assign overflow_err_count from
 log->nr_overflow

mock_get_event() uses an uninitialized local variable, nr_overflow, to
populate the overflow_err_count field. That results in incorrect
overflow_err_count values in mocked cxl_overflow trace events, such as
this case where the records are reported as 0 and should be non-zero:

[] cxl_overflow: memdev=mem7 host=cxl_mem.6 serial=7: log=Failure : 0 records from 1763228189130895685 to 1763228193130896180

Fix by using log->nr_overflow and remove the unused local variable.

A follow-up change was considered in cxl_mem_get_records_log() to
confirm that the overflow_err_count is non-zero when the overflow flag
is set [1]. Since the driver has no functional dependency on this
constraint, and a device that violates this specific requirement does
not cause incorrect driver behavior, no validation check is added.

[1] CXL 3.2, Table 8-65 Get Event Records Output Payload

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>> ---
Link: https://patch.msgid.link/20251116013036.1713313-1-alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit f1840efdb2bf4f8d0e698eebec8f676c6d745c6d)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 tools/testing/cxl/test/mem.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 6809c4a26f5e4..176dcde570cdd 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -256,7 +256,6 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 {
 	struct cxl_get_event_payload *pl;
 	struct mock_event_log *log;
-	u16 nr_overflow;
 	int ret_limit;
 	u8 log_type;
 	int i;
@@ -299,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
 		u64 ns;
 
 		pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW;
-		pl->overflow_err_count = cpu_to_le16(nr_overflow);
+		pl->overflow_err_count = cpu_to_le16(log->nr_overflow);
 		ns = ktime_get_real_ns();
 		ns -= 5000000000; /* 5s ago */
 		pl->first_overflow_timestamp = cpu_to_le64(ns);

From e0655d0381feeec4ff1d88fa720d5ece58344704 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 26 Sep 2025 16:31:31 +0200
Subject: [PATCH 052/143] soc: apple: mailbox: fix device leak on lookup

Make sure to drop the reference taken to the mbox platform device when
looking up its driver data.

Note that holding a reference to a device does not prevent its driver
data from going away so there is no point in keeping the reference.

Fixes: 6e1457fcad3f ("soc: apple: mailbox: Add ASC/M3 mailbox driver")
Cc: stable@vger.kernel.org	# 6.8
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Signed-off-by: Sven Peter <sven@kernel.org>
(cherry picked from commit f401671e90ccc26b3022f177c4156a429c024f6c)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/apple/mailbox.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c
index 49a0955e82d6c..1685da1da23d0 100644
--- a/drivers/soc/apple/mailbox.c
+++ b/drivers/soc/apple/mailbox.c
@@ -299,11 +299,18 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index)
 		return ERR_PTR(-EPROBE_DEFER);
 
 	mbox = platform_get_drvdata(pdev);
-	if (!mbox)
-		return ERR_PTR(-EPROBE_DEFER);
+	if (!mbox) {
+		mbox = ERR_PTR(-EPROBE_DEFER);
+		goto out_put_pdev;
+	}
+
+	if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) {
+		mbox = ERR_PTR(-ENODEV);
+		goto out_put_pdev;
+	}
 
-	if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER))
-		return ERR_PTR(-ENODEV);
+out_put_pdev:
+	put_device(&pdev->dev);
 
 	return mbox;
 }

From 3522058bef8de1cfb73f5cd68223a5e69e010ac1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 26 Sep 2025 16:31:32 +0200
Subject: [PATCH 053/143] soc: apple: sart: drop device reference after lookup

Holding a reference to a device does not prevent its driver data from
going away so there is no point in keeping the reference after looking
up the sart device.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Neal Gompa <neal@gompa.dev>
Signed-off-by: Sven Peter <sven@kernel.org>
(cherry picked from commit f95f3bceade25914cca30c871187b2d33db23f34)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/apple/sart.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/soc/apple/sart.c b/drivers/soc/apple/sart.c
index afa1117368997..6952afc41308a 100644
--- a/drivers/soc/apple/sart.c
+++ b/drivers/soc/apple/sart.c
@@ -164,17 +164,11 @@ static int apple_sart_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static void apple_sart_put_device(void *dev)
-{
-	put_device(dev);
-}
-
 struct apple_sart *devm_apple_sart_get(struct device *dev)
 {
 	struct device_node *sart_node;
 	struct platform_device *sart_pdev;
 	struct apple_sart *sart;
-	int ret;
 
 	sart_node = of_parse_phandle(dev->of_node, "apple,sart", 0);
 	if (!sart_node)
@@ -192,14 +186,11 @@ struct apple_sart *devm_apple_sart_get(struct device *dev)
 		return ERR_PTR(-EPROBE_DEFER);
 	}
 
-	ret = devm_add_action_or_reset(dev, apple_sart_put_device,
-				       &sart_pdev->dev);
-	if (ret)
-		return ERR_PTR(ret);
-
 	device_link_add(dev, &sart_pdev->dev,
 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
 
+	put_device(&sart_pdev->dev);
+
 	return sart;
 }
 EXPORT_SYMBOL_GPL(devm_apple_sart_get);

From 028cb3bc8c0bb4ac85816f1c3d31d1931ea776bf Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 26 Sep 2025 16:24:53 +0200
Subject: [PATCH 054/143] soc: amlogic: canvas: fix device leak on lookup

Make sure to drop the reference taken to the canvas platform device when
looking up its driver data.

Note that holding a reference to a device does not prevent its driver
data from going away so there is no point in keeping the reference.

Also note that commit 28f851e6afa8 ("soc: amlogic: canvas: add missing
put_device() call in meson_canvas_get()") fixed the leak in a lookup
error path, but the reference is still leaking on success.

Fixes: d4983983d987 ("soc: amlogic: add meson-canvas driver")
Cc: stable@vger.kernel.org	# 4.20: 28f851e6afa8
Cc: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://patch.msgid.link/20250926142454.5929-2-johan@kernel.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
(cherry picked from commit 32200f4828de9d7e6db379909898e718747f4e18)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/amlogic/meson-canvas.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c
index b6e06c4d2117f..0711088da5dcd 100644
--- a/drivers/soc/amlogic/meson-canvas.c
+++ b/drivers/soc/amlogic/meson-canvas.c
@@ -73,10 +73,9 @@ struct meson_canvas *meson_canvas_get(struct device *dev)
 	 * current state, this driver probe cannot return -EPROBE_DEFER
 	 */
 	canvas = dev_get_drvdata(&canvas_pdev->dev);
-	if (!canvas) {
-		put_device(&canvas_pdev->dev);
+	put_device(&canvas_pdev->dev);
+	if (!canvas)
 		return ERR_PTR(-EINVAL);
-	}
 
 	return canvas;
 }

From f97cbe15045c4ebe14f8d60f8237d8df919b8956 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 26 Sep 2025 16:24:54 +0200
Subject: [PATCH 055/143] soc: amlogic: canvas: simplify lookup error handling

Simplify the canvas lookup error handling by dropping the OF node
reference sooner.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://patch.msgid.link/20250926142454.5929-3-johan@kernel.org
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
(cherry picked from commit 075daf22641870e435a16ec2129bfd3b3134c487)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/amlogic/meson-canvas.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c
index 0711088da5dcd..79681afea8c61 100644
--- a/drivers/soc/amlogic/meson-canvas.c
+++ b/drivers/soc/amlogic/meson-canvas.c
@@ -60,12 +60,9 @@ struct meson_canvas *meson_canvas_get(struct device *dev)
 		return ERR_PTR(-ENODEV);
 
 	canvas_pdev = of_find_device_by_node(canvas_node);
-	if (!canvas_pdev) {
-		of_node_put(canvas_node);
-		return ERR_PTR(-EPROBE_DEFER);
-	}
-
 	of_node_put(canvas_node);
+	if (!canvas_pdev)
+		return ERR_PTR(-EPROBE_DEFER);
 
 	/*
 	 * If priv is NULL, it's probably because the canvas hasn't

From 13c26c0fc20f75d9123ece927099cd8d8d750210 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Mon, 13 Oct 2025 18:45:33 +0100
Subject: [PATCH 056/143] dt-bindings: soc: microchip: document the simple-mfd
 syscon on PolarFire SoC

"mss-top-sysreg" contains clocks, pinctrl, resets, an interrupt controller
and more. At this point, only the reset controller child is described as
that's all that is described by the existing bindings.
The clock controller already has a dedicated node, and will retain it as
there are other clock regions, so like the mailbox, a compatible-based
lookup of the syscon is sufficient to keep the clock driver working as
before, so no child is needed. There's also an interrupt multiplexing
service provided by this syscon, for which there is work in progress at
[1].

Link: https://lore.kernel.org/linux-gpio/20240723-uncouple-enforcer-7c48e4a4fefe@wendy/ [1]
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit feaa716adc514fb5fbcb60b3e1620ac5dcf8505a)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 .../microchip,mpfs-mss-top-sysreg.yaml        | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml

diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml
new file mode 100644
index 0000000000000..1ab691db87950
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire SoC Microprocessor Subsystem (MSS) sysreg register region
+
+maintainers:
+  - Conor Dooley <conor.dooley@microchip.com>
+
+description:
+  An wide assortment of registers that control elements of the MSS on PolarFire
+  SoC, including pinmuxing, resets and clocks among others.
+
+properties:
+  compatible:
+    items:
+      - const: microchip,mpfs-mss-top-sysreg
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+  '#reset-cells':
+    description:
+      The AHB/AXI peripherals on the PolarFire SoC have reset support, so
+      from CLK_ENVM to CLK_CFM. The reset consumer should specify the
+      desired peripheral via the clock ID in its "resets" phandle cell.
+      See include/dt-bindings/clock/microchip,mpfs-clock.h for the full list
+      of PolarFire clock/reset IDs.
+    const: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    syscon@20002000 {
+      compatible = "microchip,mpfs-mss-top-sysreg", "syscon";
+      reg = <0x20002000 0x1000>;
+      #reset-cells = <1>;
+    };
+

From aff6a6491e965c387edf236329325bcdd3526059 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Mon, 13 Oct 2025 18:45:34 +0100
Subject: [PATCH 057/143] soc: microchip: add mfd drivers for two syscon
 regions on PolarFire SoC

The control-scb and mss-top-sysreg regions on PolarFire SoC both fulfill
multiple purposes. The former is used for mailbox functions in addition
to the temperature & voltage sensor while the latter is used for clocks,
resets, interrupt muxing and pinctrl.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 4aac11c9a6e72efc025113e1ed62a1f084294300)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/microchip/Kconfig               | 12 ++++++
 drivers/soc/microchip/Makefile              |  1 +
 drivers/soc/microchip/mpfs-control-scb.c    | 38 ++++++++++++++++++
 drivers/soc/microchip/mpfs-mss-top-sysreg.c | 44 +++++++++++++++++++++
 4 files changed, 95 insertions(+)
 create mode 100644 drivers/soc/microchip/mpfs-control-scb.c
 create mode 100644 drivers/soc/microchip/mpfs-mss-top-sysreg.c

diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig
index 19f4b576f822b..bcf5546025610 100644
--- a/drivers/soc/microchip/Kconfig
+++ b/drivers/soc/microchip/Kconfig
@@ -9,3 +9,15 @@ config POLARFIRE_SOC_SYS_CTRL
 	  module will be called mpfs_system_controller.
 
 	  If unsure, say N.
+
+config POLARFIRE_SOC_SYSCONS
+	bool "PolarFire SoC (MPFS) syscon drivers"
+	default y
+	depends on ARCH_MICROCHIP
+	select MFD_CORE
+	help
+	  These drivers add support for the syscons on PolarFire SoC (MPFS).
+	  Without these drivers core parts of the kernel such as clocks
+	  and resets will not function correctly.
+
+	  If unsure, and on a PolarFire SoC, say y.
diff --git a/drivers/soc/microchip/Makefile b/drivers/soc/microchip/Makefile
index 14489919fe4b3..1a3a1594b089b 100644
--- a/drivers/soc/microchip/Makefile
+++ b/drivers/soc/microchip/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_POLARFIRE_SOC_SYS_CTRL)	+= mpfs-sys-controller.o
+obj-$(CONFIG_POLARFIRE_SOC_SYSCONS)	+= mpfs-control-scb.o mpfs-mss-top-sysreg.o
diff --git a/drivers/soc/microchip/mpfs-control-scb.c b/drivers/soc/microchip/mpfs-control-scb.c
new file mode 100644
index 0000000000000..f0b84b1f49cbc
--- /dev/null
+++ b/drivers/soc/microchip/mpfs-control-scb.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/array_size.h>
+#include <linux/of.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
+
+static const struct mfd_cell mpfs_control_scb_devs[] = {
+	MFD_CELL_NAME("mpfs-tvs"),
+};
+
+static int mpfs_control_scb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	return mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_control_scb_devs,
+			       ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, NULL);
+}
+
+static const struct of_device_id mpfs_control_scb_of_match[] = {
+	{ .compatible = "microchip,mpfs-control-scb", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mpfs_control_scb_of_match);
+
+static struct platform_driver mpfs_control_scb_driver = {
+	.driver = {
+		.name = "mpfs-control-scb",
+		.of_match_table = mpfs_control_scb_of_match,
+	},
+	.probe = mpfs_control_scb_probe,
+};
+module_platform_driver(mpfs_control_scb_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("PolarFire SoC control scb driver");
diff --git a/drivers/soc/microchip/mpfs-mss-top-sysreg.c b/drivers/soc/microchip/mpfs-mss-top-sysreg.c
new file mode 100644
index 0000000000000..b2244e44ff0fa
--- /dev/null
+++ b/drivers/soc/microchip/mpfs-mss-top-sysreg.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/array_size.h>
+#include <linux/of.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+static const struct mfd_cell mpfs_mss_top_sysreg_devs[] = {
+	MFD_CELL_NAME("mpfs-reset"),
+};
+
+static int mpfs_mss_top_sysreg_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int ret;
+
+	ret = mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_mss_top_sysreg_devs,
+			      ARRAY_SIZE(mpfs_mss_top_sysreg_devs) , NULL, 0, NULL);
+	if (ret)
+		return ret;
+
+	return devm_of_platform_populate(dev);
+}
+
+static const struct of_device_id mpfs_mss_top_sysreg_of_match[] = {
+	{ .compatible = "microchip,mpfs-mss-top-sysreg", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mpfs_mss_top_sysreg_of_match);
+
+static struct platform_driver mpfs_mss_top_sysreg_driver = {
+	.driver = {
+		.name = "mpfs-mss-top-sysreg",
+		.of_match_table = mpfs_mss_top_sysreg_of_match,
+	},
+	.probe = mpfs_mss_top_sysreg_probe,
+};
+module_platform_driver(mpfs_mss_top_sysreg_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
+MODULE_DESCRIPTION("PolarFire SoC mss top sysreg driver");

From 1af94efafea207512f56cca1ce834d04248ee403 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Mon, 10 Nov 2025 11:23:53 +0000
Subject: [PATCH 058/143] MAINTAINERS: add new soc drivers to Microchip RISC-V
 entry

Add the two new syscon drivers to the RISC-V entry for Microchip
platforms.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 587c0a5e810b72c93fa44ee06d60dd555f52360b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9a83ca44c7d29..da0c686bc7994 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21748,6 +21748,8 @@ F:	drivers/pci/controller/plda/pcie-microchip-host.c
 F:	drivers/pwm/pwm-microchip-core.c
 F:	drivers/reset/reset-mpfs.c
 F:	drivers/rtc/rtc-mpfs.c
+F:	drivers/soc/microchip/mpfs-control-scb.c
+F:	drivers/soc/microchip/mpfs-mss-top-sysreg.c
 F:	drivers/soc/microchip/mpfs-sys-controller.c
 F:	drivers/spi/spi-microchip-core-qspi.c
 F:	drivers/spi/spi-microchip-core.c

From ed7e3decfd152b60633d7c849e91e433ac1395fc Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Mon, 10 Nov 2025 11:23:54 +0000
Subject: [PATCH 059/143] MAINTAINERS: rename Microchip RISC-V entry

There's now non-FPGA RISC-V SoCs from Microchip, so rename the entry
to reflect that.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 66c6ceb41ed375773491c5d024167a2cbe6fe944)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index da0c686bc7994..0dda21bf8d21d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21722,7 +21722,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git
 F:	Documentation/devicetree/bindings/iommu/riscv,iommu.yaml
 F:	drivers/iommu/riscv/
 
-RISC-V MICROCHIP FPGA SUPPORT
+RISC-V MICROCHIP SUPPORT
 M:	Conor Dooley <conor.dooley@microchip.com>
 M:	Daire McNamara <daire.mcnamara@microchip.com>
 L:	linux-riscv@lists.infradead.org

From 6e5191437c9a035636ae988b11eded02ad6e4530 Mon Sep 17 00:00:00 2001
From: Pierre-Henry Moussay <pierre-henry.moussay@microchip.com>
Date: Mon, 17 Nov 2025 14:24:37 +0000
Subject: [PATCH 060/143] dt-bindings: cache: sifive,ccache0: add a pic64gx
 compatible

The pic64gx use the same IP than mpfs, therefore add compatibility with
mpfs as fallback.

Signed-off-by: Pierre-Henry Moussay <pierre-henry.moussay@microchip.com>
Acked-by: Rob Herring (Arm) <robh@kernel.org>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit d52341da4db0cd993d3549aa20cbdf063b412c3b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/devicetree/bindings/cache/sifive,ccache0.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
index 579bacb66f348..c0e5ebb1fa4c7 100644
--- a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
+++ b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml
@@ -48,6 +48,11 @@ properties:
           - const: microchip,mpfs-ccache
           - const: sifive,fu540-c000-ccache
           - const: cache
+      - items:
+          - const: microchip,pic64gx-ccache
+          - const: microchip,mpfs-ccache
+          - const: sifive,fu540-c000-ccache
+          - const: cache
 
   cache-block-size:
     const: 64

From 6485a3d7e1155d54bd32651adf5db3beff31f14c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 17 Nov 2025 10:47:54 +0000
Subject: [PATCH 061/143] memregion: Drop unused IORES_DESC_* parameter from
 cpu_cache_invalidate_memregion()

The res_desc parameter was originally introduced for documentation purposes
and with the idea that with HDM-DB CXL invalidation could be triggered from
the device. That has not come to pass and the continued existence of the
option is confusing when we add a range in the following patch which might
not be a strict subset of the res_desc. So avoid that confusion by dropping
the parameter.

Link: https://lore.kernel.org/linux-mm/686eedb25ed02_24471002e@dwillia2-xfh.jf.intel.com.notmuch/
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit f49ae86483c494ddc793d889f6df5ea68d138569)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 drivers/cxl/core/region.c    | 2 +-
 drivers/nvdimm/region.c      | 2 +-
 drivers/nvdimm/region_devs.c | 2 +-
 include/linux/memregion.h    | 7 +++----
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 8834c76f91c9e..4019b17fb65ed 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -368,7 +368,7 @@ bool cpu_cache_has_invalidate_memregion(void)
 }
 EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM");
 
-int cpu_cache_invalidate_memregion(int res_desc)
+int cpu_cache_invalidate_memregion(void)
 {
 	if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion()))
 		return -ENXIO;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index b251cb998892a..ffab8efa3d66d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -236,7 +236,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
 		return -ENXIO;
 	}
 
-	cpu_cache_invalidate_memregion(IORES_DESC_CXL);
+	cpu_cache_invalidate_memregion();
 	return 0;
 }
 
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 88dc062af5f84..c43506448edf8 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -110,7 +110,7 @@ static void nd_region_remove(struct device *dev)
 	 * here is ok.
 	 */
 	if (cpu_cache_has_invalidate_memregion())
-		cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
+		cpu_cache_invalidate_memregion();
 }
 
 static int child_notify(struct device *dev, void *data)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index de1ee5ebc8516..3cdd93d40997f 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -90,7 +90,7 @@ static int nd_region_invalidate_memregion(struct nd_region *nd_region)
 		}
 	}
 
-	cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY);
+	cpu_cache_invalidate_memregion();
 out:
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index c013214677897..945646bde825c 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -26,8 +26,7 @@ static inline void memregion_free(int id)
 
 /**
  * cpu_cache_invalidate_memregion - drop any CPU cached data for
- *     memregions described by @res_desc
- * @res_desc: one of the IORES_DESC_* types
+ *     memregion
  *
  * Perform cache maintenance after a memory event / operation that
  * changes the contents of physical memory in a cache-incoherent manner.
@@ -46,7 +45,7 @@ static inline void memregion_free(int id)
  * the cache maintenance.
  */
 #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
-int cpu_cache_invalidate_memregion(int res_desc);
+int cpu_cache_invalidate_memregion(void);
 bool cpu_cache_has_invalidate_memregion(void);
 #else
 static inline bool cpu_cache_has_invalidate_memregion(void)
@@ -54,7 +53,7 @@ static inline bool cpu_cache_has_invalidate_memregion(void)
 	return false;
 }
 
-static inline int cpu_cache_invalidate_memregion(int res_desc)
+static inline int cpu_cache_invalidate_memregion(void)
 {
 	WARN_ON_ONCE("CPU cache invalidation required");
 	return -ENXIO;

From cb2c6f8d76b26fa3017df1c671db3f2efe8d9fa0 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Mon, 17 Nov 2025 10:47:55 +0000
Subject: [PATCH 062/143] memregion: Support fine grained invalidate by
 cpu_cache_invalidate_memregion()

Extend cpu_cache_invalidate_memregion() to support invalidating a
particular range of memory by introducing start and length parameters.
Control of types of invalidation is left for when use cases turn up. For
now everything is Clean and Invalidate.

Where the range is unknown, use the provided cpu_cache_invalidate_all()
helper to act as documentation of intent in a fashion that is clearer than
passing (0, -1) to cpu_cache_invalidate_memregion().

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit b43652d867cf2a5f31b14e3d9a320ad01fca0992)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 arch/x86/mm/pat/set_memory.c |  2 +-
 drivers/cxl/core/region.c    |  5 ++++-
 drivers/nvdimm/region.c      |  2 +-
 drivers/nvdimm/region_devs.c |  2 +-
 include/linux/memregion.h    | 13 +++++++++++--
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 4019b17fb65ed..292c7202faed9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -368,7 +368,7 @@ bool cpu_cache_has_invalidate_memregion(void)
 }
 EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM");
 
-int cpu_cache_invalidate_memregion(void)
+int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len)
 {
 	if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion()))
 		return -ENXIO;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index ffab8efa3d66d..2ef7ac530f4d8 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -236,7 +236,10 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
 		return -ENXIO;
 	}
 
-	cpu_cache_invalidate_memregion();
+	if (!cxlr->params.res)
+		return -ENXIO;
+	cpu_cache_invalidate_memregion(cxlr->params.res->start,
+				       resource_size(cxlr->params.res));
 	return 0;
 }
 
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index c43506448edf8..42e982db5b049 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -110,7 +110,7 @@ static void nd_region_remove(struct device *dev)
 	 * here is ok.
 	 */
 	if (cpu_cache_has_invalidate_memregion())
-		cpu_cache_invalidate_memregion();
+		cpu_cache_invalidate_all();
 }
 
 static int child_notify(struct device *dev, void *data)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 3cdd93d40997f..e27fc380f6c0b 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -90,7 +90,7 @@ static int nd_region_invalidate_memregion(struct nd_region *nd_region)
 		}
 	}
 
-	cpu_cache_invalidate_memregion();
+	cpu_cache_invalidate_all();
 out:
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
diff --git a/include/linux/memregion.h b/include/linux/memregion.h
index 945646bde825c..a55f62cc52660 100644
--- a/include/linux/memregion.h
+++ b/include/linux/memregion.h
@@ -27,6 +27,9 @@ static inline void memregion_free(int id)
 /**
  * cpu_cache_invalidate_memregion - drop any CPU cached data for
  *     memregion
+ * @start: start physical address of the target memory region.
+ * @len: length of the target memory region. -1 for all the regions of
+ *       the target type.
  *
  * Perform cache maintenance after a memory event / operation that
  * changes the contents of physical memory in a cache-incoherent manner.
@@ -45,7 +48,7 @@ static inline void memregion_free(int id)
  * the cache maintenance.
  */
 #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
-int cpu_cache_invalidate_memregion(void);
+int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len);
 bool cpu_cache_has_invalidate_memregion(void);
 #else
 static inline bool cpu_cache_has_invalidate_memregion(void)
@@ -53,10 +56,16 @@ static inline bool cpu_cache_has_invalidate_memregion(void)
 	return false;
 }
 
-static inline int cpu_cache_invalidate_memregion(void)
+static inline int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len)
 {
 	WARN_ON_ONCE("CPU cache invalidation required");
 	return -ENXIO;
 }
 #endif
+
+static inline int cpu_cache_invalidate_all(void)
+{
+	return cpu_cache_invalidate_memregion(0, -1);
+}
+
 #endif /* _MEMREGION_H_ */

From 7ac957b1917c29e39effa2a24967a7717341fdc3 Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 19 Nov 2025 10:52:22 +0800
Subject: [PATCH 063/143] dt-bindings: arm: amlogic: meson-gx-ao-secure:
 support more SoCs

Add new compatible for ao-secure of Amlogic SoCs(S6,S7,S7D).

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://patch.msgid.link/20251119-soc-info-s6-s7-s7d-v3-1-1764c1995c04@amlogic.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
(cherry picked from commit 1d80bed4e35710287c584f998e51980a34fb3a4e)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 .../bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml       | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
index b4f6695a60152..fa7c403c874a6 100644
--- a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
+++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml
@@ -34,6 +34,9 @@ properties:
               - amlogic,a4-ao-secure
               - amlogic,c3-ao-secure
               - amlogic,s4-ao-secure
+              - amlogic,s6-ao-secure
+              - amlogic,s7-ao-secure
+              - amlogic,s7d-ao-secure
               - amlogic,t7-ao-secure
           - const: amlogic,meson-gx-ao-secure
           - const: syscon

From 86f030df5e131ffb24b562a37e7450ac4bb9a689 Mon Sep 17 00:00:00 2001
From: Xianwei Zhao <xianwei.zhao@amlogic.com>
Date: Wed, 19 Nov 2025 10:52:23 +0800
Subject: [PATCH 064/143] soc: amlogic: meson-gx-socinfo: add new SoCs id

Add new definition for Amlogic SoCs, include S6, S7, S7D.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Xianwei Zhao <xianwei.zhao@amlogic.com>
Link: https://patch.msgid.link/20251119-soc-info-s6-s7-s7d-v3-2-1764c1995c04@amlogic.com
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
(cherry picked from commit ba8abbdfd09e64f51ead8b86afc6b586505919b4)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/amlogic/meson-gx-socinfo.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/soc/amlogic/meson-gx-socinfo.c b/drivers/soc/amlogic/meson-gx-socinfo.c
index 7549f1644e5ea..2a54ca43cd13e 100644
--- a/drivers/soc/amlogic/meson-gx-socinfo.c
+++ b/drivers/soc/amlogic/meson-gx-socinfo.c
@@ -46,6 +46,9 @@ static const struct meson_gx_soc_id {
 	{ "A5", 0x3c },
 	{ "C3", 0x3d },
 	{ "A4", 0x40 },
+	{ "S7", 0x46 },
+	{ "S7D", 0x47 },
+	{ "S6", 0x48 },
 };
 
 static const struct meson_gx_package_id {
@@ -86,6 +89,9 @@ static const struct meson_gx_package_id {
 	{ "A311D2", 0x36, 0x1, 0xf },
 	{ "A113X2", 0x3c, 0x1, 0xf },
 	{ "A113L2", 0x40, 0x1, 0xf },
+	{ "S805X3", 0x46, 0x3, 0xf },
+	{ "S905X5M", 0x47, 0x1, 0xf },
+	{ "S905X5", 0x48, 0x1, 0xf },
 };
 
 static inline unsigned int socinfo_to_major(u32 socinfo)

From e4291ec0e847fb5603071ba2aac11274349fe733 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Mon, 17 Nov 2025 10:47:56 +0000
Subject: [PATCH 065/143] lib: Support ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION

ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION provides the mechanism for
invalidating certain memory regions in a cache-incoherent manner. Currently
this is used by NVDIMM and CXL memory drivers in cases where it is
necessary to flush all data from caches by physical address range.
The operations in question are effectively memory hotplug, where stale
data might otherwise remain in the caches.

This is separate from the invalidates done to enable use of non-coherent
DMA masters, primarily in terms of when it is needed (not related to DMA
mappings) and how deep the flush must push data. The flushes done for
non-coherent DMA only need to reach the Point of Coherence of a single host
(which is often nearer CPUs and DMA masters than the physical storage).
This operation must push the data out of non architectural caches
(memory-side caches, write buffers etc) and typically all the way to the
memory device.

In some architectures these operations are supported by system components
that may become available only later in boot as they are either present
on a discoverable bus, or via a firmware description of an MMIO interface
(e.g. ACPI DSDT). Provide a framework to handle this case.

Architectures can opt in for this support via
CONFIG_GENERIC_CPU_CACHE_MAINTENANCE

Add a registration framework. Each driver provides an ops structure and
the first op is Write Back and Invalidate by PA Range. The driver may
over invalidate.

For systems that can perform this operation asynchronously an optional
completion check operation is also provided. If present that must be called
to ensure that the action has finished. This provides a considerable
performance advantage if multiple agents are involved in the maintenance
operation.

When multiple agents are present in the system each should register with
this framework and the core code will issue the invalidate to all of them
before checking for completion on each. This is done to avoid need for
filtering in the core code which can become complex when interleave,
potentially across different cache coherency hardware is going on, so it
is easier to tell everyone and let those who don't care do nothing.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit c460697d3472d4252917fba9bbc1d1a23eafc124)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 include/linux/cache_coherency.h |  61 ++++++++++++++
 lib/Kconfig                     |   3 +
 lib/Makefile                    |   2 +
 lib/cache_maint.c               | 138 ++++++++++++++++++++++++++++++++
 4 files changed, 204 insertions(+)
 create mode 100644 include/linux/cache_coherency.h
 create mode 100644 lib/cache_maint.c

diff --git a/include/linux/cache_coherency.h b/include/linux/cache_coherency.h
new file mode 100644
index 0000000000000..cc81c5733e316
--- /dev/null
+++ b/include/linux/cache_coherency.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Cache coherency maintenance operation device drivers
+ *
+ * Copyright Huawei 2025
+ */
+#ifndef _LINUX_CACHE_COHERENCY_H_
+#define _LINUX_CACHE_COHERENCY_H_
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+struct cc_inval_params {
+	phys_addr_t addr;
+	size_t size;
+};
+
+struct cache_coherency_ops_inst;
+
+struct cache_coherency_ops {
+	int (*wbinv)(struct cache_coherency_ops_inst *cci,
+		     struct cc_inval_params *invp);
+	int (*done)(struct cache_coherency_ops_inst *cci);
+};
+
+struct cache_coherency_ops_inst {
+	struct kref kref;
+	struct list_head node;
+	const struct cache_coherency_ops *ops;
+};
+
+int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci);
+void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci);
+
+struct cache_coherency_ops_inst *
+_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops,
+				    size_t size);
+/**
+ * cache_coherency_ops_instance_alloc - Allocate cache coherency ops instance
+ * @ops: Cache maintenance operations
+ * @drv_struct: structure that contains the struct cache_coherency_ops_inst
+ * @member: Name of the struct cache_coherency_ops_inst member in @drv_struct.
+ *
+ * This allocates a driver specific structure and initializes the
+ * cache_coherency_ops_inst embedded in the drv_struct. Upon success the
+ * pointer must be freed via cache_coherency_ops_instance_put().
+ *
+ * Returns a &drv_struct * on success, %NULL on error.
+ */
+#define cache_coherency_ops_instance_alloc(ops, drv_struct, member)	    \
+	({								    \
+		static_assert(__same_type(struct cache_coherency_ops_inst,  \
+					  ((drv_struct *)NULL)->member));   \
+		static_assert(offsetof(drv_struct, member) == 0);	    \
+		(drv_struct *)_cache_coherency_ops_instance_alloc(ops,	    \
+			sizeof(drv_struct));				    \
+	})
+void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index c483951b624ff..cd8e5844f9bb6 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -543,6 +543,9 @@ config MEMREGION
 config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
 	bool
 
+config GENERIC_CPU_CACHE_MAINTENANCE
+	bool
+
 config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
 	bool
 
diff --git a/lib/Makefile b/lib/Makefile
index 392ff808c9b90..eed20c50f358b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -130,6 +130,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
 obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
 obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 
+obj-$(CONFIG_GENERIC_CPU_CACHE_MAINTENANCE) += cache_maint.o
+
 lib-y += logic_pio.o
 
 lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o
diff --git a/lib/cache_maint.c b/lib/cache_maint.c
new file mode 100644
index 0000000000000..9256a9ffc34c7
--- /dev/null
+++ b/lib/cache_maint.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic support for Memory System Cache Maintenance operations.
+ *
+ * Coherency maintenance drivers register with this simple framework that will
+ * iterate over each registered instance to first kick off invalidation and
+ * then to wait until it is complete.
+ *
+ * If no implementations are registered yet cpu_cache_has_invalidate_memregion()
+ * will return false. If this runs concurrently with unregistration then a
+ * race exists but this is no worse than the case where the operations instance
+ * responsible for a given memory region has not yet registered.
+ */
+#include <linux/cache_coherency.h>
+#include <linux/cleanup.h>
+#include <linux/container_of.h>
+#include <linux/export.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/memregion.h>
+#include <linux/module.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(cache_ops_instance_list);
+static DECLARE_RWSEM(cache_ops_instance_list_lock);
+
+static void __cache_coherency_ops_instance_free(struct kref *kref)
+{
+	struct cache_coherency_ops_inst *cci =
+		container_of(kref, struct cache_coherency_ops_inst, kref);
+	kfree(cci);
+}
+
+void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci)
+{
+	kref_put(&cci->kref, __cache_coherency_ops_instance_free);
+}
+EXPORT_SYMBOL_GPL(cache_coherency_ops_instance_put);
+
+static int cache_inval_one(struct cache_coherency_ops_inst *cci, void *data)
+{
+	if (!cci->ops)
+		return -EINVAL;
+
+	return cci->ops->wbinv(cci, data);
+}
+
+static int cache_inval_done_one(struct cache_coherency_ops_inst *cci)
+{
+	if (!cci->ops)
+		return -EINVAL;
+
+	if (!cci->ops->done)
+		return 0;
+
+	return cci->ops->done(cci);
+}
+
+static int cache_invalidate_memregion(phys_addr_t addr, size_t size)
+{
+	int ret;
+	struct cache_coherency_ops_inst *cci;
+	struct cc_inval_params params = {
+		.addr = addr,
+		.size = size,
+	};
+
+	guard(rwsem_read)(&cache_ops_instance_list_lock);
+	list_for_each_entry(cci, &cache_ops_instance_list, node) {
+		ret = cache_inval_one(cci, &params);
+		if (ret)
+			return ret;
+	}
+	list_for_each_entry(cci, &cache_ops_instance_list, node) {
+		ret = cache_inval_done_one(cci);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct cache_coherency_ops_inst *
+_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops,
+				    size_t size)
+{
+	struct cache_coherency_ops_inst *cci;
+
+	if (!ops || !ops->wbinv)
+		return NULL;
+
+	cci = kzalloc(size, GFP_KERNEL);
+	if (!cci)
+		return NULL;
+
+	cci->ops = ops;
+	INIT_LIST_HEAD(&cci->node);
+	kref_init(&cci->kref);
+
+	return cci;
+}
+EXPORT_SYMBOL_NS_GPL(_cache_coherency_ops_instance_alloc, "CACHE_COHERENCY");
+
+int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci)
+{
+	guard(rwsem_write)(&cache_ops_instance_list_lock);
+	list_add(&cci->node, &cache_ops_instance_list);
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_register, "CACHE_COHERENCY");
+
+void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci)
+{
+	guard(rwsem_write)(&cache_ops_instance_list_lock);
+	list_del(&cci->node);
+}
+EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_unregister, "CACHE_COHERENCY");
+
+int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len)
+{
+	return cache_invalidate_memregion(start, len);
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM");
+
+/*
+ * Used for optimization / debug purposes only as removal can race
+ *
+ * Machines that do not support invalidation, e.g. VMs, will not have any
+ * operations instance to register and so this will always return false.
+ */
+bool cpu_cache_has_invalidate_memregion(void)
+{
+	guard(rwsem_read)(&cache_ops_instance_list_lock);
+	return !list_empty(&cache_ops_instance_list);
+}
+EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM");

From d76476aeb05d8f363dd3e5cfb058a37ee42a2247 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 17 Nov 2025 10:47:57 +0000
Subject: [PATCH 066/143] arm64: Select GENERIC_CPU_CACHE_MAINTENANCE

The generic CPU cache maintenance framework provides a way to register
drivers for devices implementing the underlying support for
cpu_cache_has_invalidate_memregion(). Enable it for arm64 by selecting
GENERIC_CPU_CACHE_MAINTENANCE which provides the implementation for,
and in turn selects, ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 4d873c5dc3ed5a189a39fcbddad8bcd2bd2a1785)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 arch/arm64/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1684ec2454369..70f023a98af99 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,6 +21,7 @@ config ARM64
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_CACHE_LINE_SIZE
 	select ARCH_HAS_CC_PLATFORM
+	select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
@@ -146,6 +147,7 @@ config ARM64
 	select GENERIC_ARCH_TOPOLOGY
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_CPU_AUTOPROBE
+	select GENERIC_CPU_CACHE_MAINTENANCE
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP

From 8a2c90f2b4852e3abacce200db8b981e0c305153 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 17 Nov 2025 10:47:58 +0000
Subject: [PATCH 067/143] MAINTAINERS: Add Jonathan Cameron to drivers/cache
 and add lib/cache_maint.c + header

Seems unfair to inflict the cache-coherency drivers on Conor with out also
stepping up as a second maintainer for drivers/cache.

Include the library support for cache-coherency maintenance drivers to the
existing entry.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 9b9de5a56a62c86472848ec7d48ca939411511e6)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 MAINTAINERS | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0dda21bf8d21d..13e62f538099b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24050,10 +24050,13 @@ F:	drivers/staging/
 
 STANDALONE CACHE CONTROLLER DRIVERS
 M:	Conor Dooley <conor@kernel.org>
+M:	Jonathan Cameron <jonathan.cameron@huawei.com>
 S:	Maintained
 T:	git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 F:	Documentation/devicetree/bindings/cache/
 F:	drivers/cache
+F:	include/cache_coherency.h
+F:	lib/cache_maint.c
 
 STARFIRE/DURALAN NETWORK DRIVER
 M:	Ion Badulescu <ionut@badula.org>

From 809b667fd62a01f19ce494f252d33e3a66206835 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 17 Nov 2025 10:47:59 +0000
Subject: [PATCH 068/143] cache: Make top level Kconfig menu a boolean
 dependent on RISCV

The next patch will add a new type of cache maintenance driver responsible
for flushing deeper than is necessary for non coherent DMA (current
use case of drivers/cache drivers), as needed when performing operations
such as memory hotplug and security unlocking of persistent memory. The two
types of operation are similar enough to share a drivers/cache directory
and MAINTAINERS but are otherwise currently unrelated.

To avoid confusion have two separate menus. Each has dependencies that are
implemented by making them boolean symbols, here CACHEMAINT_FOR_DMA
which is dependent on RISCV as all driver are currently for platforms of
that architecture. Set new symbol default to y to avoid breaking existing
configs. This has no affect on actual code built, just visibility of the
menu.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 4d1608d0ab3365d1ef9447bdbc0cb4c0962f1774)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cache/Kconfig | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig
index db51386c663a6..59a79df4c0ce7 100644
--- a/drivers/cache/Kconfig
+++ b/drivers/cache/Kconfig
@@ -1,9 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
-menu "Cache Drivers"
+
+menuconfig CACHEMAINT_FOR_DMA
+	bool "Cache management for noncoherent DMA"
+	depends on RISCV
+	default y
+	help
+	  These drivers implement support for noncoherent DMA master devices
+	  on platforms that lack the standard CPU interfaces for this.
+
+if CACHEMAINT_FOR_DMA
 
 config AX45MP_L2_CACHE
 	bool "Andes Technology AX45MP L2 Cache controller"
-	depends on RISCV
 	select RISCV_NONSTANDARD_CACHE_OPS
 	help
 	  Support for the L2 cache controller on Andes Technology AX45MP platforms.
@@ -16,7 +24,6 @@ config SIFIVE_CCACHE
 
 config STARFIVE_STARLINK_CACHE
 	bool "StarFive StarLink Cache controller"
-	depends on RISCV
 	depends on ARCH_STARFIVE
 	depends on 64BIT
 	select RISCV_DMA_NONCOHERENT
@@ -24,4 +31,4 @@ config STARFIVE_STARLINK_CACHE
 	help
 	  Support for the StarLink cache controller IP from StarFive.
 
-endmenu
+endif #CACHEMAINT_FOR_DMA

From 1cbb3506ec12ab30ae278f01c0769e4f3e5dc6cc Mon Sep 17 00:00:00 2001
From: Yushan Wang <wangyushan12@huawei.com>
Date: Mon, 17 Nov 2025 10:48:00 +0000
Subject: [PATCH 069/143] cache: Support cache maintenance for HiSilicon SoC
 Hydra Home Agent

Hydra Home Agent is a device used to maintain cache coherency. Add support
for explicit cache maintenance operations using it. A system has multiple
of these agents. Whilst only one agent is responsible for a given cache
line, interleave means that for a range operation, responsibility for the
cache lines making up the range will typically be spread across multiple
instances.

Put this driver on a new Kconfig menu under drivers/cache. The short
description as memory hotplug like operations is intended to cover
the somewhat complex set of cases where this unit applies and differentiate
it clearly from typical non coherent DMA flows.

Co-developed-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Yushan Wang <wangyushan12@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 2ec3b54a6ff04046c07b7050d02321e406c4dcd1)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cache/Kconfig        |  22 ++++
 drivers/cache/Makefile       |   2 +
 drivers/cache/hisi_soc_hha.c | 194 +++++++++++++++++++++++++++++++++++
 3 files changed, 218 insertions(+)
 create mode 100644 drivers/cache/hisi_soc_hha.c

diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig
index 59a79df4c0ce7..1518449d47b51 100644
--- a/drivers/cache/Kconfig
+++ b/drivers/cache/Kconfig
@@ -32,3 +32,25 @@ config STARFIVE_STARLINK_CACHE
 	  Support for the StarLink cache controller IP from StarFive.
 
 endif #CACHEMAINT_FOR_DMA
+
+menuconfig CACHEMAINT_FOR_HOTPLUG
+	bool "Cache management for memory hot plug like operations"
+	depends on GENERIC_CPU_CACHE_MAINTENANCE
+	help
+	  These drivers implement cache management for flows where it is necessary
+	  to flush data from all host caches.
+
+if CACHEMAINT_FOR_HOTPLUG
+
+config HISI_SOC_HHA
+	tristate "HiSilicon Hydra Home Agent (HHA) device driver"
+	depends on (ARM64 && ACPI) || COMPILE_TEST
+	help
+	  The Hydra Home Agent (HHA) is responsible for cache coherency
+	  on the SoC. This drivers enables the cache maintenance functions of
+	  the HHA.
+
+	  This driver can be built as a module. If so, the module will be
+	  called hisi_soc_hha.
+
+endif #CACHEMAINT_FOR_HOTPLUG
diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile
index 55c5e851034da..b3362b15d6c15 100644
--- a/drivers/cache/Makefile
+++ b/drivers/cache/Makefile
@@ -3,3 +3,5 @@
 obj-$(CONFIG_AX45MP_L2_CACHE)		+= ax45mp_cache.o
 obj-$(CONFIG_SIFIVE_CCACHE)		+= sifive_ccache.o
 obj-$(CONFIG_STARFIVE_STARLINK_CACHE)	+= starfive_starlink_cache.o
+
+obj-$(CONFIG_HISI_SOC_HHA)		+= hisi_soc_hha.o
diff --git a/drivers/cache/hisi_soc_hha.c b/drivers/cache/hisi_soc_hha.c
new file mode 100644
index 0000000000000..25ff0f5ae79b3
--- /dev/null
+++ b/drivers/cache/hisi_soc_hha.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for HiSilicon Hydra Home Agent (HHA).
+ *
+ * Copyright (c) 2025 HiSilicon Technologies Co., Ltd.
+ * Author: Yicong Yang <yangyicong@hisilicon.com>
+ *         Yushan Wang <wangyushan12@huawei.com>
+ *
+ * A system typically contains multiple HHAs. Each is responsible for a subset
+ * of the physical addresses in the system, but interleave can make the mapping
+ * from a particular cache line to a responsible HHA complex. As such no
+ * filtering is done in the driver, with the hardware being responsible for
+ * responding with success for even if it was not responsible for any addresses
+ * in the range on which the operation was requested.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/cache_coherency.h>
+#include <linux/dev_printk.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/memregion.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#define HISI_HHA_CTRL		0x5004
+#define   HISI_HHA_CTRL_EN	BIT(0)
+#define   HISI_HHA_CTRL_RANGE	BIT(1)
+#define   HISI_HHA_CTRL_TYPE	GENMASK(3, 2)
+#define HISI_HHA_START_L	0x5008
+#define HISI_HHA_START_H	0x500c
+#define HISI_HHA_LEN_L		0x5010
+#define HISI_HHA_LEN_H		0x5014
+
+/* The maintain operation performs in a 128 Byte granularity */
+#define HISI_HHA_MAINT_ALIGN	128
+
+#define HISI_HHA_POLL_GAP_US		10
+#define HISI_HHA_POLL_TIMEOUT_US	50000
+
+struct hisi_soc_hha {
+	/* Must be first element */
+	struct cache_coherency_ops_inst cci;
+	/* Locks HHA instance to forbid overlapping access. */
+	struct mutex lock;
+	void __iomem *base;
+};
+
+static bool hisi_hha_cache_maintain_wait_finished(struct hisi_soc_hha *soc_hha)
+{
+	u32 val;
+
+	return !readl_poll_timeout_atomic(soc_hha->base + HISI_HHA_CTRL, val,
+					  !(val & HISI_HHA_CTRL_EN),
+					  HISI_HHA_POLL_GAP_US,
+					  HISI_HHA_POLL_TIMEOUT_US);
+}
+
+static int hisi_soc_hha_wbinv(struct cache_coherency_ops_inst *cci,
+			struct cc_inval_params *invp)
+{
+	struct hisi_soc_hha *soc_hha =
+		container_of(cci, struct hisi_soc_hha, cci);
+	phys_addr_t top, addr = invp->addr;
+	size_t size = invp->size;
+	u32 reg;
+
+	if (!size)
+		return -EINVAL;
+
+	addr = ALIGN_DOWN(addr, HISI_HHA_MAINT_ALIGN);
+	top = ALIGN(addr + size, HISI_HHA_MAINT_ALIGN);
+	size = top - addr;
+
+	guard(mutex)(&soc_hha->lock);
+
+	if (!hisi_hha_cache_maintain_wait_finished(soc_hha))
+		return -EBUSY;
+
+	/*
+	 * Hardware will search for addresses ranging [addr, addr + size - 1],
+	 * last byte included, and perform maintenance in 128 byte granules
+	 * on those cachelines which contain the addresses. If a given instance
+	 * is either not responsible for a cacheline or that cacheline is not
+	 * currently present then the search will fail, no operation will be
+	 * necessary and the device will report success.
+	 */
+	size -= 1;
+
+	writel(lower_32_bits(addr), soc_hha->base + HISI_HHA_START_L);
+	writel(upper_32_bits(addr), soc_hha->base + HISI_HHA_START_H);
+	writel(lower_32_bits(size), soc_hha->base + HISI_HHA_LEN_L);
+	writel(upper_32_bits(size), soc_hha->base + HISI_HHA_LEN_H);
+
+	reg = FIELD_PREP(HISI_HHA_CTRL_TYPE, 1); /* Clean Invalid */
+	reg |= HISI_HHA_CTRL_RANGE | HISI_HHA_CTRL_EN;
+	writel(reg, soc_hha->base + HISI_HHA_CTRL);
+
+	return 0;
+}
+
+static int hisi_soc_hha_done(struct cache_coherency_ops_inst *cci)
+{
+	struct hisi_soc_hha *soc_hha =
+		container_of(cci, struct hisi_soc_hha, cci);
+
+	guard(mutex)(&soc_hha->lock);
+	if (!hisi_hha_cache_maintain_wait_finished(soc_hha))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static const struct cache_coherency_ops hha_ops = {
+	.wbinv = hisi_soc_hha_wbinv,
+	.done = hisi_soc_hha_done,
+};
+
+static int hisi_soc_hha_probe(struct platform_device *pdev)
+{
+	struct hisi_soc_hha *soc_hha;
+	struct resource *mem;
+	int ret;
+
+	soc_hha = cache_coherency_ops_instance_alloc(&hha_ops,
+						     struct hisi_soc_hha, cci);
+	if (!soc_hha)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, soc_hha);
+
+	mutex_init(&soc_hha->lock);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		ret = -ENOMEM;
+		goto err_free_cci;
+	}
+
+	soc_hha->base = ioremap(mem->start, resource_size(mem));
+	if (!soc_hha->base) {
+		ret = dev_err_probe(&pdev->dev, -ENOMEM,
+				    "failed to remap io memory");
+		goto err_free_cci;
+	}
+
+	ret = cache_coherency_ops_instance_register(&soc_hha->cci);
+	if (ret)
+		goto err_iounmap;
+
+	return 0;
+
+err_iounmap:
+	iounmap(soc_hha->base);
+err_free_cci:
+	cache_coherency_ops_instance_put(&soc_hha->cci);
+	return ret;
+}
+
+static void hisi_soc_hha_remove(struct platform_device *pdev)
+{
+	struct hisi_soc_hha *soc_hha = platform_get_drvdata(pdev);
+
+	cache_coherency_ops_instance_unregister(&soc_hha->cci);
+	iounmap(soc_hha->base);
+	cache_coherency_ops_instance_put(&soc_hha->cci);
+}
+
+static const struct acpi_device_id hisi_soc_hha_ids[] = {
+	{ "HISI0511", },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, hisi_soc_hha_ids);
+
+static struct platform_driver hisi_soc_hha_driver = {
+	.driver = {
+		.name = "hisi_soc_hha",
+		.acpi_match_table = hisi_soc_hha_ids,
+	},
+	.probe = hisi_soc_hha_probe,
+	.remove = hisi_soc_hha_remove,
+};
+
+module_platform_driver(hisi_soc_hha_driver);
+
+MODULE_IMPORT_NS("CACHE_COHERENCY");
+MODULE_DESCRIPTION("HiSilicon Hydra Home Agent driver supporting cache maintenance");
+MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
+MODULE_AUTHOR("Yushan Wang <wangyushan12@huawei.com>");
+MODULE_LICENSE("GPL");

From 9fa4cc68151995fa46691a0df17ddf4c10f9dbd1 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Date: Mon, 17 Nov 2025 11:53:11 +0100
Subject: [PATCH 070/143] MAINTAINERS: refer to intended file in STANDALONE
 CACHE CONTROLLER DRIVERS

Commit 23db6eed72bd ("MAINTAINERS: Add Jonathan Cameron to drivers/cache
and add lib/cache_maint.c + header") intends to add a file entry pointing
to the cache_coherency.h file, but messes up to name the right path.

Update the entry to the intended file.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
(cherry picked from commit 055bcc552b5181da208038c1de9437e9cca69380)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 13e62f538099b..3be1319994827 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -24055,7 +24055,7 @@ S:	Maintained
 T:	git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 F:	Documentation/devicetree/bindings/cache/
 F:	drivers/cache
-F:	include/cache_coherency.h
+F:	include/linux/cache_coherency.h
 F:	lib/cache_maint.c
 
 STARFIRE/DURALAN NETWORK DRIVER

From bac9db58a0a7d1b68725fe693db3a420a6b3780d Mon Sep 17 00:00:00 2001
From: "Christophe Leroy (CS GROUP)" <chleroy@kernel.org>
Date: Wed, 26 Nov 2025 12:26:57 +0100
Subject: [PATCH 071/143] MAINTAINERS: Update email address for Christophe
 Leroy

My address at csgroup.eu is redirected to the new one at
cs-soprasteria.com which is a Professionnal Microsoft account without
SMTP gateway. We still have the SMTP gateway for csgroup.eu but it is
not maintained anymore and might stop working at anytime. In addition
the DKIM signature is not performed allthough the domain has DMARC
set up.

Switch to kernel.org email address and add entries in mailmap.

Link: https://lore.kernel.org/r/d9b6758297d7dcddf79feb4459ceaedd7d6f1f2e.1764155757.git.chleroy@kernel.org
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
(cherry picked from commit 3fca89b7756c5bb885e3a41df1443aa39f35951b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 .mailmap    |  3 +++
 MAINTAINERS | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.mailmap b/.mailmap
index 8db24be50158d..d64a8ed369b29 100644
--- a/.mailmap
+++ b/.mailmap
@@ -182,6 +182,9 @@ Christian Brauner <brauner@kernel.org> <christian@brauner.io>
 Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
 Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
 Christian Marangi <ansuelsmth@gmail.com>
+Christophe Leroy <chleroy@kernel.org> <christophe.leroy@c-s.fr>
+Christophe Leroy <chleroy@kernel.org> <christophe.leroy@csgroup.eu>
+Christophe Leroy <chleroy@kernel.org> <christophe.leroy2@cs-soprasteria.com>
 Christophe Ricard <christophe.ricard@gmail.com>
 Christopher Obbard <christopher.obbard@linaro.org> <chris.obbard@collabora.com>
 Christoph Hellwig <hch@lst.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index 3be1319994827..1c7561495148e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4498,7 +4498,7 @@ F:	drivers/net/ethernet/netronome/nfp/bpf/
 
 BPF JIT for POWERPC (32-BIT AND 64-BIT)
 M:	Hari Bathini <hbathini@linux.ibm.com>
-M:	Christophe Leroy <christophe.leroy@csgroup.eu>
+M:	Christophe Leroy (CS GROUP) <chleroy@kernel.org>
 R:	Naveen N Rao <naveen@kernel.org>
 L:	bpf@vger.kernel.org
 S:	Supported
@@ -9871,7 +9871,7 @@ F:	drivers/spi/spi-fsl-qspi.c
 
 FREESCALE QUICC ENGINE LIBRARY
 M:	Qiang Zhao <qiang.zhao@nxp.com>
-M:	Christophe Leroy <christophe.leroy@csgroup.eu>
+M:	Christophe Leroy (CS GROUP) <chleroy@kernel.org>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/soc/fsl/qe/
@@ -9924,7 +9924,7 @@ S:	Maintained
 F:	drivers/tty/serial/ucc_uart.c
 
 FREESCALE SOC DRIVERS
-M:	Christophe Leroy <christophe.leroy@csgroup.eu>
+M:	Christophe Leroy (CS GROUP) <chleroy@kernel.org>
 L:	linuxppc-dev@lists.ozlabs.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
@@ -14117,7 +14117,7 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT)
 M:	Madhavan Srinivasan <maddy@linux.ibm.com>
 M:	Michael Ellerman <mpe@ellerman.id.au>
 R:	Nicholas Piggin <npiggin@gmail.com>
-R:	Christophe Leroy <christophe.leroy@csgroup.eu>
+R:	Christophe Leroy (CS GROUP) <chleroy@kernel.org>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
 W:	https://github.com/linuxppc/wiki/wiki
@@ -14173,7 +14173,7 @@ F:	Documentation/devicetree/bindings/powerpc/fsl/
 F:	arch/powerpc/platforms/85xx/
 
 LINUX FOR POWERPC EMBEDDED PPC8XX AND PPC83XX
-M:	Christophe Leroy <christophe.leroy@csgroup.eu>
+M:	Christophe Leroy (CS GROUP) <chleroy@kernel.org>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Maintained
 F:	arch/powerpc/platforms/8xx/

From 49109306411668d981ee0f67b4b2aec782511afa Mon Sep 17 00:00:00 2001
From: Marco Crivellari <marco.crivellari@suse.com>
Date: Fri, 7 Nov 2025 16:29:50 +0100
Subject: [PATCH 072/143] soc: fsl: qbman: add WQ_PERCPU to alloc_workqueue
 users
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently if a user enqueues a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.
This lack of consistency cannot be addressed without refactoring the API.

alloc_workqueue() treats all queues as per-CPU by default, while unbound
workqueues must opt-in via WQ_UNBOUND.

This default is suboptimal: most workloads benefit from unbound queues,
allowing the scheduler to place worker threads where they’re needed and
reducing noise when CPUs are isolated.

This continues the effort to refactor workqueue APIs, which began with
the introduction of new workqueues and a new alloc_workqueue flag in:

commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq")
commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag")

This change adds a new WQ_PERCPU flag to explicitly request
alloc_workqueue() to be per-cpu when WQ_UNBOUND has not been specified.

With the introduction of the WQ_PERCPU flag (equivalent to !WQ_UNBOUND),
any alloc_workqueue() caller that doesn’t explicitly specify WQ_UNBOUND
must now use WQ_PERCPU.

Once migration is complete, WQ_UNBOUND can be removed and unbound will
become the implicit default.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
Link: https://lore.kernel.org/r/20251107152950.293899-1-marco.crivellari@suse.com
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
(cherry picked from commit c181703a290a13c088ca2ac7b984ec8e676acb2b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/fsl/qbman/qman.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
index 9be240999f877..6b392b3ad4b15 100644
--- a/drivers/soc/fsl/qbman/qman.c
+++ b/drivers/soc/fsl/qbman/qman.c
@@ -1073,7 +1073,7 @@ EXPORT_SYMBOL(qman_portal_set_iperiod);
 
 int qman_wq_alloc(void)
 {
-	qm_portal_wq = alloc_workqueue("qman_portal_wq", 0, 1);
+	qm_portal_wq = alloc_workqueue("qman_portal_wq", WQ_PERCPU, 1);
 	if (!qm_portal_wq)
 		return -ENOMEM;
 	return 0;

From 72964cb0fb65c9197929dc7a8bd547e141e342c6 Mon Sep 17 00:00:00 2001
From: Gongwei Li <ligongwei@kylinos.cn>
Date: Fri, 21 Nov 2025 14:10:22 +0800
Subject: [PATCH 073/143] soc: fsl: qbman: use kmalloc_array() instead of
 kmalloc()

Replace kmalloc() with kmalloc_array() to prevent potential
overflow, as recommended in Documentation/process/deprecated.rst.

Signed-off-by: Gongwei Li <ligongwei@kylinos.cn>
Reviewed-by: Fushuai Wang <wangfushuai@baidu.com>
Link: https://lore.kernel.org/r/20251121061022.114609-1-13875017792@163.com
Signed-off-by: Christophe Leroy (CS GROUP) <chleroy@kernel.org>
(cherry picked from commit 760b8eec2cf861c5b013f62c4af8ee06c959853e)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/soc/fsl/qbman/qman_test_stash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c
index f4d3c2146f4f0..18131a5d5979a 100644
--- a/drivers/soc/fsl/qbman/qman_test_stash.c
+++ b/drivers/soc/fsl/qbman/qman_test_stash.c
@@ -219,7 +219,7 @@ static int allocate_frame_data(void)
 
 	pcfg = qman_get_qm_portal_config(qman_dma_portal);
 
-	__frame_ptr = kmalloc(4 * HP_NUM_WORDS, GFP_KERNEL);
+	__frame_ptr = kmalloc_array(4, HP_NUM_WORDS, GFP_KERNEL);
 	if (!__frame_ptr)
 		return -ENOMEM;
 

From 2f317e54cd68f9c546d40d6cac25a643a63dff81 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:11 -0800
Subject: [PATCH 074/143] cxl/mem: Fix devm_cxl_memdev_edac_release() confusion

A device release method is only for undoing allocations on the path to
preparing the device for device_add(). In contrast, devm allocations are
post device_add(), are acquired during / after ->probe() and are released
synchronous with ->remove().

So, a "devm" helper in a "release" method is a clear anti-pattern.

Move this devm release action where it belongs, an action created at edac
object creation time. Otherwise, this leaks resources until
cxl_memdev_release() time which may be long after these xarray and error
record caches have gone idle.

Note, this also fixes up the type of @cxlmd->err_rec_array which needlessly
dropped type-safety.

Fixes: 0b5ccb0de1e2 ("cxl/edac: Support for finding memory operation attributes from the current boot")
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Shiju Jose <shiju.jose@huawei.com>
Cc: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Shiju Jose <shiju.jose@huawei.com>
Reviewed-by: Shiju Jose <shiju.jose@huawei.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Link: https://patch.msgid.link/20251216005616.3090129-2-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 10016118b6fade907143a32a7aeaa777063dc79c)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/edac.c   | 64 ++++++++++++++++++++++-----------------
 drivers/cxl/core/memdev.c |  1 -
 drivers/cxl/cxlmem.h      |  5 +--
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c
index 79994ca9bc9f3..81160260e26b7 100644
--- a/drivers/cxl/core/edac.c
+++ b/drivers/cxl/core/edac.c
@@ -1988,6 +1988,40 @@ static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd,
 	return 0;
 }
 
+static void err_rec_free(void *_cxlmd)
+{
+	struct cxl_memdev *cxlmd = _cxlmd;
+	struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
+	struct cxl_event_gen_media *rec_gen_media;
+	struct cxl_event_dram *rec_dram;
+	unsigned long index;
+
+	cxlmd->err_rec_array = NULL;
+	xa_for_each(&array_rec->rec_dram, index, rec_dram)
+		kfree(rec_dram);
+	xa_destroy(&array_rec->rec_dram);
+
+	xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
+		kfree(rec_gen_media);
+	xa_destroy(&array_rec->rec_gen_media);
+	kfree(array_rec);
+}
+
+static int devm_cxl_memdev_setup_err_rec(struct cxl_memdev *cxlmd)
+{
+	struct cxl_mem_err_rec *array_rec =
+		kzalloc(sizeof(*array_rec), GFP_KERNEL);
+
+	if (!array_rec)
+		return -ENOMEM;
+
+	xa_init(&array_rec->rec_gen_media);
+	xa_init(&array_rec->rec_dram);
+	cxlmd->err_rec_array = array_rec;
+
+	return devm_add_action_or_reset(&cxlmd->dev, err_rec_free, cxlmd);
+}
+
 int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
 {
 	struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES];
@@ -2038,15 +2072,9 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
 		}
 
 		if (repair_inst) {
-			struct cxl_mem_err_rec *array_rec =
-				devm_kzalloc(&cxlmd->dev, sizeof(*array_rec),
-					     GFP_KERNEL);
-			if (!array_rec)
-				return -ENOMEM;
-
-			xa_init(&array_rec->rec_gen_media);
-			xa_init(&array_rec->rec_dram);
-			cxlmd->err_rec_array = array_rec;
+			rc = devm_cxl_memdev_setup_err_rec(cxlmd);
+			if (rc)
+				return rc;
 		}
 	}
 
@@ -2088,22 +2116,4 @@ int devm_cxl_region_edac_register(struct cxl_region *cxlr)
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL");
 
-void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
-{
-	struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array;
-	struct cxl_event_gen_media *rec_gen_media;
-	struct cxl_event_dram *rec_dram;
-	unsigned long index;
-
-	if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec)
-		return;
-
-	xa_for_each(&array_rec->rec_dram, index, rec_dram)
-		kfree(rec_dram);
-	xa_destroy(&array_rec->rec_dram);
 
-	xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media)
-		kfree(rec_gen_media);
-	xa_destroy(&array_rec->rec_gen_media);
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL");
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index e370d733e4400..4dff7f44d908e 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -27,7 +27,6 @@ static void cxl_memdev_release(struct device *dev)
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 
 	ida_free(&cxl_memdev_ida, cxlmd->id);
-	devm_cxl_memdev_edac_release(cxlmd);
 	kfree(cxlmd);
 }
 
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 434031a0c1f74..c12ab4fc95123 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -63,7 +63,7 @@ struct cxl_memdev {
 	int depth;
 	u8 scrub_cycle;
 	int scrub_region_id;
-	void *err_rec_array;
+	struct cxl_mem_err_rec *err_rec_array;
 };
 
 static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
@@ -877,7 +877,6 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd);
 int devm_cxl_region_edac_register(struct cxl_region *cxlr);
 int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt);
 int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt);
-void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd);
 #else
 static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
 { return 0; }
@@ -889,8 +888,6 @@ static inline int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd,
 static inline int cxl_store_rec_dram(struct cxl_memdev *cxlmd,
 				     union cxl_event *evt)
 { return 0; }
-static inline void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd)
-{ return; }
 #endif
 
 #ifdef CONFIG_CXL_SUSPEND

From 604d26b32a6cffc33262c685baa80317a5ce2b5e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:12 -0800
Subject: [PATCH 075/143] cxl/mem: Arrange for always-synchronous memdev attach

In preparation for CXL accelerator drivers that have a hard dependency on
CXL capability initialization, arrange for cxl_mem_probe() to always run
synchronous with the device_add() of cxl_memdev instances. I.e.
cxl_mem_driver registration is always complete before the first memdev
creation event.

At present, cxl_pci does not care about the attach state of the cxl_memdev
because all generic memory expansion functionality can be handled by the
cxl_core. For accelerators, however, that driver needs to perform driver
specific initialization if CXL is available, or execute a fallback to PCIe
only operation.

This synchronous attach guarantee is also needed for Soft Reserve Recovery,
which is an effort that needs to assert that devices have had a chance to
attach before making a go / no-go decision on proceeding with CXL subsystem
initialization.

By moving devm_cxl_add_memdev() to cxl_mem.ko it removes async module
loading as one reason that a memdev may not be attached upon return from
devm_cxl_add_memdev().

Cc: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Cc: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Link: https://patch.msgid.link/20251216005616.3090129-3-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 1f1cb7f0c25574cf51501f8c8cece0047d7e8848)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/Kconfig       |  2 +-
 drivers/cxl/core/memdev.c | 10 +++++++---
 drivers/cxl/cxlmem.h      |  2 ++
 drivers/cxl/mem.c         | 17 +++++++++++++++++
 4 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 48b7314afdb88..f1361ed6a0d48 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -22,6 +22,7 @@ if CXL_BUS
 config CXL_PCI
 	tristate "PCI manageability"
 	default CXL_BUS
+	select CXL_MEM
 	help
 	  The CXL specification defines a "CXL memory device" sub-class in the
 	  PCI "memory controller" base class of devices. Device's identified by
@@ -89,7 +90,6 @@ config CXL_PMEM
 
 config CXL_MEM
 	tristate "CXL: Memory Expansion"
-	depends on CXL_PCI
 	default CXL_BUS
 	help
 	  The CXL.mem protocol allows a device to act as a provider of "System
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 4dff7f44d908e..7a4153e1c6a78 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -1050,8 +1050,12 @@ static const struct file_operations cxl_memdev_fops = {
 	.llseek = noop_llseek,
 };
 
-struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
-				       struct cxl_dev_state *cxlds)
+/*
+ * Core helper for devm_cxl_add_memdev() that wants to both create a device and
+ * assert to the caller that upon return cxl_mem::probe() has been invoked.
+ */
+struct cxl_memdev *__devm_cxl_add_memdev(struct device *host,
+					 struct cxl_dev_state *cxlds)
 {
 	struct cxl_memdev *cxlmd;
 	struct device *dev;
@@ -1093,7 +1097,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
 	put_device(dev);
 	return ERR_PTR(rc);
 }
-EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL");
+EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_memdev, "cxl_mem");
 
 static void sanitize_teardown_notifier(void *data)
 {
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index c12ab4fc95123..012e68acad342 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -95,6 +95,8 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
 	return is_cxl_memdev(port->uport_dev);
 }
 
+struct cxl_memdev *__devm_cxl_add_memdev(struct device *host,
+					 struct cxl_dev_state *cxlds);
 struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
 				       struct cxl_dev_state *cxlds);
 int devm_cxl_sanitize_setup_notifier(struct device *host,
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 6e6777b7bafb5..55883797ab2db 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -201,6 +201,22 @@ static int cxl_mem_probe(struct device *dev)
 	return devm_add_action_or_reset(dev, enable_suspend, NULL);
 }
 
+/**
+ * devm_cxl_add_memdev - Add a CXL memory device
+ * @host: devres alloc/release context and parent for the memdev
+ * @cxlds: CXL device state to associate with the memdev
+ *
+ * Upon return the device will have had a chance to attach to the
+ * cxl_mem driver, but may fail if the CXL topology is not ready
+ * (hardware CXL link down, or software platform CXL root not attached)
+ */
+struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
+				       struct cxl_dev_state *cxlds)
+{
+	return __devm_cxl_add_memdev(host, cxlds);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL");
+
 static ssize_t trigger_poison_list_store(struct device *dev,
 					 struct device_attribute *attr,
 					 const char *buf, size_t len)
@@ -248,6 +264,7 @@ static struct cxl_driver cxl_mem_driver = {
 	.probe = cxl_mem_probe,
 	.id = CXL_DEVICE_MEMORY_EXPANDER,
 	.drv = {
+		.probe_type = PROBE_FORCE_SYNCHRONOUS,
 		.dev_groups = cxl_mem_groups,
 	},
 };

From 61c9f04507984a7b15c67deefad4911fff1350f5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:13 -0800
Subject: [PATCH 076/143] cxl/port: Arrange for always synchronous endpoint
 attach

Make it so that upon return from devm_cxl_add_endpoint() that
cxl_mem_probe() can assume that the endpoint has had a chance to complete
cxl_port_probe().  I.e. cxl_port module loading has completed prior to
device registration.

Delete the MODULE_SOFTDEP() as it is not sufficient for this purpose, but a
hard link-time dependency is reliable. Specifically MODULE_SOFTDEP() does
not guarantee that the module loading has completed prior to the completion
of the current module's init.

Cc: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Cc: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Link: https://patch.msgid.link/20251216005616.3090129-4-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit ae201a0092362ffdec7206efa1ec85e260fab8d2)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/cxl.h  |  2 ++
 drivers/cxl/mem.c  | 43 -------------------------------------------
 drivers/cxl/port.c | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ba17fa86d249e..c796c3db36e0b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -780,6 +780,8 @@ struct cxl_port *devm_cxl_add_port(struct device *host,
 				   struct cxl_dport *parent_dport);
 struct cxl_root *devm_cxl_add_root(struct device *host,
 				   const struct cxl_root_ops *ops);
+int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
+			  struct cxl_dport *parent_dport);
 struct cxl_root *find_cxl_root(struct cxl_port *port);
 
 DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev))
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 55883797ab2db..d62931526fd41 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -45,44 +45,6 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data)
 	return 0;
 }
 
-static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
-				 struct cxl_dport *parent_dport)
-{
-	struct cxl_port *parent_port = parent_dport->port;
-	struct cxl_port *endpoint, *iter, *down;
-	int rc;
-
-	/*
-	 * Now that the path to the root is established record all the
-	 * intervening ports in the chain.
-	 */
-	for (iter = parent_port, down = NULL; !is_cxl_root(iter);
-	     down = iter, iter = to_cxl_port(iter->dev.parent)) {
-		struct cxl_ep *ep;
-
-		ep = cxl_ep_load(iter, cxlmd);
-		ep->next = down;
-	}
-
-	/* Note: endpoint port component registers are derived from @cxlds */
-	endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE,
-				     parent_dport);
-	if (IS_ERR(endpoint))
-		return PTR_ERR(endpoint);
-
-	rc = cxl_endpoint_autoremove(cxlmd, endpoint);
-	if (rc)
-		return rc;
-
-	if (!endpoint->dev.driver) {
-		dev_err(&cxlmd->dev, "%s failed probe\n",
-			dev_name(&endpoint->dev));
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
 static int cxl_debugfs_poison_inject(void *data, u64 dpa)
 {
 	struct cxl_memdev *cxlmd = data;
@@ -275,8 +237,3 @@ MODULE_DESCRIPTION("CXL: Memory Expansion");
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS("CXL");
 MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER);
-/*
- * create_endpoint() wants to validate port driver attach immediately after
- * endpoint registration.
- */
-MODULE_SOFTDEP("pre: cxl_port");
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 51c8f2f84717a..7937e7e53797c 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -156,10 +156,50 @@ static struct cxl_driver cxl_port_driver = {
 	.probe = cxl_port_probe,
 	.id = CXL_DEVICE_PORT,
 	.drv = {
+		.probe_type = PROBE_FORCE_SYNCHRONOUS,
 		.dev_groups = cxl_port_attribute_groups,
 	},
 };
 
+int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd,
+			  struct cxl_dport *parent_dport)
+{
+	struct cxl_port *parent_port = parent_dport->port;
+	struct cxl_port *endpoint, *iter, *down;
+	int rc;
+
+	/*
+	 * Now that the path to the root is established record all the
+	 * intervening ports in the chain.
+	 */
+	for (iter = parent_port, down = NULL; !is_cxl_root(iter);
+	     down = iter, iter = to_cxl_port(iter->dev.parent)) {
+		struct cxl_ep *ep;
+
+		ep = cxl_ep_load(iter, cxlmd);
+		ep->next = down;
+	}
+
+	/* Note: endpoint port component registers are derived from @cxlds */
+	endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE,
+				     parent_dport);
+	if (IS_ERR(endpoint))
+		return PTR_ERR(endpoint);
+
+	rc = cxl_endpoint_autoremove(cxlmd, endpoint);
+	if (rc)
+		return rc;
+
+	if (!endpoint->dev.driver) {
+		dev_err(&cxlmd->dev, "%s failed probe\n",
+			dev_name(&endpoint->dev));
+		return -ENXIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_FOR_MODULES(devm_cxl_add_endpoint, "cxl_mem");
+
 static int __init cxl_port_init(void)
 {
 	return cxl_driver_register(&cxl_port_driver);

From eb57cbc51ec38886dc1e88d48c6ec5dd134a3b69 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:14 -0800
Subject: [PATCH 077/143] cxl/mem: Convert devm_cxl_add_memdev() to
 scope-based-cleanup

In preparation for adding more setup steps, convert the current
implementation to scope-based cleanup.

The cxl_memdev_shutdown() is only required after cdev_device_add(). With
that moved to a helper function it precludes the need to add
scope-based-handler for that cleanup if devm_add_action_or_reset() fails.

Cc: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251216005616.3090129-5-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 6e1d21903ff213f1384ce43daa279c0965904116)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/memdev.c | 70 ++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 7a4153e1c6a78..92aea95859fb6 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -1050,6 +1050,45 @@ static const struct file_operations cxl_memdev_fops = {
 	.llseek = noop_llseek,
 };
 
+/*
+ * Activate ioctl operations, no cxl_memdev_rwsem manipulation needed as this is
+ * ordered with cdev_add() publishing the device.
+ */
+static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds)
+{
+	int rc;
+
+	cxlmd->cxlds = cxlds;
+	cxlds->cxlmd = cxlmd;
+
+	rc = cdev_device_add(&cxlmd->cdev, &cxlmd->dev);
+	if (rc) {
+		/*
+		 * The cdev was briefly live, shutdown any ioctl operations that
+		 * saw that state.
+		 */
+		cxl_memdev_shutdown(&cxlmd->dev);
+		return rc;
+	}
+
+	return 0;
+}
+
+DEFINE_FREE(put_cxlmd, struct cxl_memdev *,
+	    if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+
+static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
+{
+	int rc;
+
+	rc = devm_add_action_or_reset(cxlmd->cxlds->dev, cxl_memdev_unregister,
+				      cxlmd);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return cxlmd;
+}
+
 /*
  * Core helper for devm_cxl_add_memdev() that wants to both create a device and
  * assert to the caller that upon return cxl_mem::probe() has been invoked.
@@ -1057,45 +1096,24 @@ static const struct file_operations cxl_memdev_fops = {
 struct cxl_memdev *__devm_cxl_add_memdev(struct device *host,
 					 struct cxl_dev_state *cxlds)
 {
-	struct cxl_memdev *cxlmd;
 	struct device *dev;
-	struct cdev *cdev;
 	int rc;
 
-	cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
+	struct cxl_memdev *cxlmd __free(put_cxlmd) =
+		cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
 	if (IS_ERR(cxlmd))
 		return cxlmd;
 
 	dev = &cxlmd->dev;
 	rc = dev_set_name(dev, "mem%d", cxlmd->id);
 	if (rc)
-		goto err;
-
-	/*
-	 * Activate ioctl operations, no cxl_memdev_rwsem manipulation
-	 * needed as this is ordered with cdev_add() publishing the device.
-	 */
-	cxlmd->cxlds = cxlds;
-	cxlds->cxlmd = cxlmd;
-
-	cdev = &cxlmd->cdev;
-	rc = cdev_device_add(cdev, dev);
-	if (rc)
-		goto err;
+		return ERR_PTR(rc);
 
-	rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
+	rc = cxlmd_add(cxlmd, cxlds);
 	if (rc)
 		return ERR_PTR(rc);
-	return cxlmd;
 
-err:
-	/*
-	 * The cdev was briefly live, shutdown any ioctl operations that
-	 * saw that state.
-	 */
-	cxl_memdev_shutdown(dev);
-	put_device(dev);
-	return ERR_PTR(rc);
+	return cxl_memdev_autoremove(no_free_ptr(cxlmd));
 }
 EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_memdev, "cxl_mem");
 

From 18ff2a23cb3f0484f9f8efb8cc7d41af995a3d97 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:15 -0800
Subject: [PATCH 078/143] cxl/mem: Drop @host argument to devm_cxl_add_memdev()

In all cases the device that created the 'struct cxl_dev_state' instance is
also the device to host the devm cleanup of devm_cxl_add_memdev(). This
simplifies the function prototype, and limits a degree of freedom of the
API.

Cc: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Link: https://patch.msgid.link/20251216005616.3090129-6-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit f2546eba53bbe38c4bb950f78625ccf4b1a2cbc8)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/memdev.c    | 3 +--
 drivers/cxl/cxlmem.h         | 6 ++----
 drivers/cxl/mem.c            | 9 +++++----
 drivers/cxl/pci.c            | 2 +-
 tools/testing/cxl/test/mem.c | 2 +-
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 92aea95859fb6..935a163f1527d 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -1093,8 +1093,7 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
  * Core helper for devm_cxl_add_memdev() that wants to both create a device and
  * assert to the caller that upon return cxl_mem::probe() has been invoked.
  */
-struct cxl_memdev *__devm_cxl_add_memdev(struct device *host,
-					 struct cxl_dev_state *cxlds)
+struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
 {
 	struct device *dev;
 	int rc;
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 012e68acad342..9db31c7993c48 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -95,10 +95,8 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
 	return is_cxl_memdev(port->uport_dev);
 }
 
-struct cxl_memdev *__devm_cxl_add_memdev(struct device *host,
-					 struct cxl_dev_state *cxlds);
-struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
-				       struct cxl_dev_state *cxlds);
+struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
 int devm_cxl_sanitize_setup_notifier(struct device *host,
 				     struct cxl_memdev *cxlmd);
 struct cxl_memdev_state;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index d62931526fd41..677996c652724 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -165,17 +165,18 @@ static int cxl_mem_probe(struct device *dev)
 
 /**
  * devm_cxl_add_memdev - Add a CXL memory device
- * @host: devres alloc/release context and parent for the memdev
  * @cxlds: CXL device state to associate with the memdev
  *
  * Upon return the device will have had a chance to attach to the
  * cxl_mem driver, but may fail if the CXL topology is not ready
  * (hardware CXL link down, or software platform CXL root not attached)
+ *
+ * The parent of the resulting device and the devm context for allocations is
+ * @cxlds->dev.
  */
-struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
-				       struct cxl_dev_state *cxlds)
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
 {
-	return __devm_cxl_add_memdev(host, cxlds);
+	return __devm_cxl_add_memdev(cxlds);
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL");
 
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 0be4e508affe7..1c6fc53348069 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1006,7 +1006,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "No CXL Features discovered\n");
 
-	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
+	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 176dcde570cdd..8a22b76016273 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1767,7 +1767,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 
 	cxl_mock_add_event_logs(&mdata->mes);
 
-	cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds);
+	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 

From 14289aa0bc9c7b88982ae27af756c6fbc476f9e4 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 15 Dec 2025 16:56:16 -0800
Subject: [PATCH 079/143] cxl/mem: Introduce cxl_memdev_attach for
 CXL-dependent operation

Unlike the cxl_pci class driver that opportunistically enables memory
expansion with no other dependent functionality, CXL accelerator drivers
have distinct PCIe-only and CXL-enhanced operation states. If CXL is
available some additional coherent memory/cache operations can be enabled,
otherwise traditional DMA+MMIO over PCIe/CXL.io is a fallback.

This constitutes a new mode of operation where the caller of
devm_cxl_add_memdev() wants to make a "go/no-go" decision about running
in CXL accelerated mode or falling back to PCIe-only operation. Part of
that decision making process likely also includes additional
CXL-acceleration-specific resource setup. Encapsulate both of those
requirements into 'struct cxl_memdev_attach' that provides a ->probe()
callback. The probe callback runs in cxl_mem_probe() context, after the
port topology is successfully attached for the given memdev. It supports
a contract where, upon successful return from devm_cxl_add_memdev(),
everything needed for CXL accelerated operation has been enabled.

Additionally the presence of @cxlmd->attach indicates that the accelerator
driver be detached when CXL operation ends. This conceptually makes a CXL
link loss event mirror a PCIe link loss event which results in triggering
the ->remove() callback of affected devices+drivers. A driver can re-attach
to recover back to PCIe-only operation. Live recovery, i.e. without a
->remove()/->probe() cycle, is left as a future consideration.

[ dj: Repalce with updated commit log from Dan ]

Cc: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20251216005616.3090129-7-dan.j.williams@intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 29317f8dc6ed601ec54575689c2cd55cc470bcce)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/memdev.c    | 33 +++++++++++++++++++++++++++++----
 drivers/cxl/cxlmem.h         | 12 ++++++++++--
 drivers/cxl/mem.c            | 20 ++++++++++++++++----
 drivers/cxl/pci.c            |  2 +-
 tools/testing/cxl/test/mem.c |  2 +-
 5 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 935a163f1527d..af3d0cc651387 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -641,14 +641,24 @@ static void detach_memdev(struct work_struct *work)
 	struct cxl_memdev *cxlmd;
 
 	cxlmd = container_of(work, typeof(*cxlmd), detach_work);
-	device_release_driver(&cxlmd->dev);
+
+	/*
+	 * When the creator of @cxlmd sets ->attach it indicates CXL operation
+	 * is required. In that case, @cxlmd detach escalates to parent device
+	 * detach.
+	 */
+	if (cxlmd->attach)
+		device_release_driver(cxlmd->dev.parent);
+	else
+		device_release_driver(&cxlmd->dev);
 	put_device(&cxlmd->dev);
 }
 
 static struct lock_class_key cxl_memdev_key;
 
 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
-					   const struct file_operations *fops)
+					   const struct file_operations *fops,
+					   const struct cxl_memdev_attach *attach)
 {
 	struct cxl_memdev *cxlmd;
 	struct device *dev;
@@ -664,6 +674,8 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 		goto err;
 	cxlmd->id = rc;
 	cxlmd->depth = -1;
+	cxlmd->attach = attach;
+	cxlmd->endpoint = ERR_PTR(-ENXIO);
 
 	dev = &cxlmd->dev;
 	device_initialize(dev);
@@ -1081,6 +1093,18 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
 {
 	int rc;
 
+	/*
+	 * If @attach is provided fail if the driver is not attached upon
+	 * return. Note that failure here could be the result of a race to
+	 * teardown the CXL port topology. I.e. cxl_mem_probe() could have
+	 * succeeded and then cxl_mem unbound before the lock is acquired.
+	 */
+	guard(device)(&cxlmd->dev);
+	if (cxlmd->attach && !cxlmd->dev.driver) {
+		cxl_memdev_unregister(cxlmd);
+		return ERR_PTR(-ENXIO);
+	}
+
 	rc = devm_add_action_or_reset(cxlmd->cxlds->dev, cxl_memdev_unregister,
 				      cxlmd);
 	if (rc)
@@ -1093,13 +1117,14 @@ static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd)
  * Core helper for devm_cxl_add_memdev() that wants to both create a device and
  * assert to the caller that upon return cxl_mem::probe() has been invoked.
  */
-struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
+struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
+					 const struct cxl_memdev_attach *attach)
 {
 	struct device *dev;
 	int rc;
 
 	struct cxl_memdev *cxlmd __free(put_cxlmd) =
-		cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
+		cxl_memdev_alloc(cxlds, &cxl_memdev_fops, attach);
 	if (IS_ERR(cxlmd))
 		return cxlmd;
 
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 9db31c7993c48..ef202b34e5ea4 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -34,6 +34,10 @@
 	(FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
 	 CXLMDEV_RESET_NEEDED_NOT)
 
+struct cxl_memdev_attach {
+	int (*probe)(struct cxl_memdev *cxlmd);
+};
+
 /**
  * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
  * @dev: driver core device object
@@ -43,6 +47,7 @@
  * @cxl_nvb: coordinate removal of @cxl_nvd if present
  * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem
  * @endpoint: connection to the CXL port topology for this memory device
+ * @attach: creator of this memdev depends on CXL link attach to operate
  * @id: id number of this memdev instance.
  * @depth: endpoint port depth
  * @scrub_cycle: current scrub cycle set for this device
@@ -59,6 +64,7 @@ struct cxl_memdev {
 	struct cxl_nvdimm_bridge *cxl_nvb;
 	struct cxl_nvdimm *cxl_nvd;
 	struct cxl_port *endpoint;
+	const struct cxl_memdev_attach *attach;
 	int id;
 	int depth;
 	u8 scrub_cycle;
@@ -95,8 +101,10 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
 	return is_cxl_memdev(port->uport_dev);
 }
 
-struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
-struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds);
+struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
+					 const struct cxl_memdev_attach *attach);
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
+				       const struct cxl_memdev_attach *attach);
 int devm_cxl_sanitize_setup_notifier(struct device *host,
 				     struct cxl_memdev *cxlmd);
 struct cxl_memdev_state;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 677996c652724..333c366b69e76 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -142,6 +142,12 @@ static int cxl_mem_probe(struct device *dev)
 			return rc;
 	}
 
+	if (cxlmd->attach) {
+		rc = cxlmd->attach->probe(cxlmd);
+		if (rc)
+			return rc;
+	}
+
 	rc = devm_cxl_memdev_edac_register(cxlmd);
 	if (rc)
 		dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc);
@@ -166,17 +172,23 @@ static int cxl_mem_probe(struct device *dev)
 /**
  * devm_cxl_add_memdev - Add a CXL memory device
  * @cxlds: CXL device state to associate with the memdev
+ * @attach: Caller depends on CXL topology attachment
  *
  * Upon return the device will have had a chance to attach to the
- * cxl_mem driver, but may fail if the CXL topology is not ready
- * (hardware CXL link down, or software platform CXL root not attached)
+ * cxl_mem driver, but may fail to attach if the CXL topology is not ready
+ * (hardware CXL link down, or software platform CXL root not attached).
+ *
+ * When @attach is NULL it indicates the caller wants the memdev to remain
+ * registered even if it does not immediately attach to the CXL hierarchy. When
+ * @attach is provided a cxl_mem_probe() failure leads to failure of this routine.
  *
  * The parent of the resulting device and the devm context for allocations is
  * @cxlds->dev.
  */
-struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
+				       const struct cxl_memdev_attach *attach)
 {
-	return __devm_cxl_add_memdev(cxlds);
+	return __devm_cxl_add_memdev(cxlds, attach);
 }
 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL");
 
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1c6fc53348069..549368a9c868f 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1006,7 +1006,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "No CXL Features discovered\n");
 
-	cxlmd = devm_cxl_add_memdev(cxlds);
+	cxlmd = devm_cxl_add_memdev(cxlds, NULL);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 8a22b76016273..cb87e8c0e63c0 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1767,7 +1767,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 
 	cxl_mock_add_event_logs(&mdata->mes);
 
-	cxlmd = devm_cxl_add_memdev(cxlds);
+	cxlmd = devm_cxl_add_memdev(cxlds, NULL);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);
 

From 91c8344a5d164078eaf354646c84bda3ace826ce Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:22 -0600
Subject: [PATCH 080/143] PCI: Move CXL DVSEC definitions into
 uapi/linux/pci_regs.h

The CXL DVSECs are currently defined in cxl/core/cxlpci.h. These are not
accessible to other subsystems. Move these to uapi/linux/pci_regs.h.

The CXL DVSEC definitions will be renamed and reformatted to fit better
with existing defines.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260114182055.46029-2-terry.bowman@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(backported from commit 0f7afd80d81b739c4a9a6e4e24109ba1030c9c56)
[jan: Resolve minor conflict due to common anchor not existing in 6.17]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/cxlpci.h          | 53 ---------------------------------
 include/uapi/linux/pci_regs.h | 56 +++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 1d526bea84312..cdb7cf3dbcb43 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -7,59 +7,6 @@
 
 #define CXL_MEMORY_PROGIF	0x10
 
-/*
- * See section 8.1 Configuration Space Registers in the CXL 2.0
- * Specification. Names are taken straight from the specification with "CXL" and
- * "DVSEC" redundancies removed. When obvious, abbreviations may be used.
- */
-#define PCI_DVSEC_HEADER1_LENGTH_MASK	GENMASK(31, 20)
-
-/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
-#define CXL_DVSEC_PCIE_DEVICE					0
-#define   CXL_DVSEC_CAP_OFFSET		0xA
-#define     CXL_DVSEC_MEM_CAPABLE	BIT(2)
-#define     CXL_DVSEC_HDM_COUNT_MASK	GENMASK(5, 4)
-#define   CXL_DVSEC_CTRL_OFFSET		0xC
-#define     CXL_DVSEC_MEM_ENABLE	BIT(2)
-#define   CXL_DVSEC_RANGE_SIZE_HIGH(i)	(0x18 + (i * 0x10))
-#define   CXL_DVSEC_RANGE_SIZE_LOW(i)	(0x1C + (i * 0x10))
-#define     CXL_DVSEC_MEM_INFO_VALID	BIT(0)
-#define     CXL_DVSEC_MEM_ACTIVE	BIT(1)
-#define     CXL_DVSEC_MEM_SIZE_LOW_MASK	GENMASK(31, 28)
-#define   CXL_DVSEC_RANGE_BASE_HIGH(i)	(0x20 + (i * 0x10))
-#define   CXL_DVSEC_RANGE_BASE_LOW(i)	(0x24 + (i * 0x10))
-#define     CXL_DVSEC_MEM_BASE_LOW_MASK	GENMASK(31, 28)
-
-#define CXL_DVSEC_RANGE_MAX		2
-
-/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */
-#define CXL_DVSEC_FUNCTION_MAP					2
-
-/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */
-#define CXL_DVSEC_PORT_EXTENSIONS				3
-
-/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */
-#define CXL_DVSEC_PORT_GPF					4
-#define   CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET		0x0C
-#define     CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK		GENMASK(3, 0)
-#define     CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK		GENMASK(11, 8)
-#define   CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET		0xE
-#define     CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK		GENMASK(3, 0)
-#define     CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK		GENMASK(11, 8)
-
-/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */
-#define CXL_DVSEC_DEVICE_GPF					5
-
-/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */
-#define CXL_DVSEC_PCIE_FLEXBUS_PORT				7
-
-/* CXL 2.0 8.1.9: Register Locator DVSEC */
-#define CXL_DVSEC_REG_LOCATOR					8
-#define   CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET			0xC
-#define     CXL_DVSEC_REG_LOCATOR_BIR_MASK			GENMASK(2, 0)
-#define	    CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK			GENMASK(15, 8)
-#define     CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK		GENMASK(31, 16)
-
 /*
  * NOTE: Currently all the functions which are enabled for CXL require their
  * vectors to be in the first 16.  Use this as the default max.
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index bfa9ada355c9b..8ea78d7e08a3d 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1239,9 +1239,59 @@
 #define PCI_DVSEC_CXL_PORT_CTL				0x0c
 #define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
 
-/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */
+/*
+ * Compute Express Link (CXL r3.2, sec 8.1)
+ *
+ * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state
+ * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these
+ * registers on downstream link-up events.
+ */
+#define PCI_DVSEC_HEADER1_LENGTH_MASK  __GENMASK(31, 20)
+
+/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */
 #define CXL_DVSEC_PCIE_DEVICE				0
-#define   CXL_DVSEC_CAP_OFFSET				0xA
-#define     CXL_DVSEC_CACHE_CAPABLE			BIT(0)
+#define  CXL_DVSEC_CAP_OFFSET				0xA
+#define     CXL_DVSEC_CACHE_CAPABLE			_BITUL(0)
+#define   CXL_DVSEC_MEM_CAPABLE				_BITUL(2)
+#define   CXL_DVSEC_HDM_COUNT_MASK			__GENMASK(5, 4)
+#define  CXL_DVSEC_CTRL_OFFSET				0xC
+#define   CXL_DVSEC_MEM_ENABLE				_BITUL(2)
+#define  CXL_DVSEC_RANGE_SIZE_HIGH(i)			(0x18 + (i * 0x10))
+#define  CXL_DVSEC_RANGE_SIZE_LOW(i)			(0x1C + (i * 0x10))
+#define   CXL_DVSEC_MEM_INFO_VALID			_BITUL(0)
+#define   CXL_DVSEC_MEM_ACTIVE				_BITUL(1)
+#define   CXL_DVSEC_MEM_SIZE_LOW_MASK			__GENMASK(31, 28)
+#define  CXL_DVSEC_RANGE_BASE_HIGH(i)			(0x20 + (i * 0x10))
+#define  CXL_DVSEC_RANGE_BASE_LOW(i)			(0x24 + (i * 0x10))
+#define   CXL_DVSEC_MEM_BASE_LOW_MASK			__GENMASK(31, 28)
+
+#define CXL_DVSEC_RANGE_MAX				2
+
+/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */
+#define CXL_DVSEC_FUNCTION_MAP				2
+
+/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */
+#define CXL_DVSEC_PORT					3
+#define   CXL_DVSEC_PORT_CTL				0x0c
+#define    CXL_DVSEC_PORT_CTL_UNMASK_SBR		0x00000001
+
+/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */
+#define CXL_DVSEC_PORT_GPF				4
+#define  CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET	0x0C
+#define   CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK	__GENMASK(3, 0)
+#define   CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK	__GENMASK(11, 8)
+#define  CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET	0xE
+#define   CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK	__GENMASK(3, 0)
+#define   CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK	__GENMASK(11, 8)
+
+/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */
+#define CXL_DVSEC_DEVICE_GPF				5
+
+/* CXL 3.2 8.1.9: Register Locator DVSEC */
+#define CXL_DVSEC_REG_LOCATOR				8
+#define  CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET		0xC
+#define   CXL_DVSEC_REG_LOCATOR_BIR_MASK		__GENMASK(2, 0)
+#define   CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK		__GENMASK(15, 8)
+#define   CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK	__GENMASK(31, 16)
 
 #endif /* LINUX_PCI_REGS_H */

From ed38046d9db5cc8dc07647503d9ddf1d2482833c Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:23 -0600
Subject: [PATCH 081/143] PCI: Update CXL DVSEC definitions

CXL DVSEC definitions were recently moved into uapi/pci_regs.h, but the
newly added macros do not follow the file's existing naming conventions.
The current format uses CXL_DVSEC_XYZ, while the new CXL entries must
instead use the PCI_DVSEC_CXL_XYZ prefix to match the conventions already
established in pci_regs.h.

The new CXL DVSEC macros also introduce _MASK and _OFFSET suffixes, which
are not used anywhere else in the file. These suffixes lengthen the
identifiers and reduce readability. Remove _MASK and _OFFSET from the
recently added definitions.

Additionally, remove PCI_DVSEC_HEADER1_LENGTH, as it duplicates the existing
PCI_DVSEC_HEADER1_LEN() macro.

Update all existing references to use the new macro names.

Finally, update the inline documentation to reference the latest revision
of the CXL specification.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260114182055.46029-3-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 6612bd9ff0b1001cff5f5d79db6ce44427d2e99c)
[jan: Resolve conflict due to commit ancestor anchor missing in 6.17 and CXL_DVSEC_CACHE_CAPABLE introduced by 72bd823fb4f1]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c        | 58 ++++++++++-----------
 drivers/cxl/core/regs.c       | 14 ++---
 drivers/cxl/pci.c             |  2 +-
 drivers/pci/ats.c             |  6 +--
 include/uapi/linux/pci_regs.h | 96 ++++++++++++++++-------------------
 5 files changed, 85 insertions(+), 91 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 5b023a0178a47..077b386e0c8d6 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -86,12 +86,12 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
 	i = 1;
 	do {
 		rc = pci_read_config_dword(pdev,
-					   d + CXL_DVSEC_RANGE_SIZE_LOW(id),
+					   d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id),
 					   &temp);
 		if (rc)
 			return rc;
 
-		valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp);
+		valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp);
 		if (valid)
 			break;
 		msleep(1000);
@@ -121,11 +121,11 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id)
 	/* Check MEM ACTIVE bit, up to 60s timeout by default */
 	for (i = media_ready_timeout; i; i--) {
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp);
+			pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp);
 		if (rc)
 			return rc;
 
-		active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+		active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp);
 		if (active)
 			break;
 		msleep(1000);
@@ -154,11 +154,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds)
 	u16 cap;
 
 	rc = pci_read_config_word(pdev,
-				  d + CXL_DVSEC_CAP_OFFSET, &cap);
+				  d + PCI_DVSEC_CXL_CAP, &cap);
 	if (rc)
 		return rc;
 
-	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+	hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap);
 	for (i = 0; i < hdm_count; i++) {
 		rc = cxl_dvsec_mem_range_valid(cxlds, i);
 		if (rc)
@@ -186,16 +186,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
 	u16 ctrl;
 	int rc;
 
-	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
+	rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl);
 	if (rc < 0)
 		return rc;
 
-	if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
+	if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val)
 		return 1;
-	ctrl &= ~CXL_DVSEC_MEM_ENABLE;
+	ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE;
 	ctrl |= val;
 
-	rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
+	rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, ctrl);
 	if (rc < 0)
 		return rc;
 
@@ -211,7 +211,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
 {
 	int rc;
 
-	rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
+	rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE);
 	if (rc < 0)
 		return rc;
 	if (rc > 0)
@@ -273,11 +273,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 		return -ENXIO;
 	}
 
-	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
+	rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP, &cap);
 	if (rc)
 		return rc;
 
-	if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
+	if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) {
 		dev_dbg(dev, "Not MEM Capable\n");
 		return -ENXIO;
 	}
@@ -288,7 +288,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 	 * driver is for a spec defined class code which must be CXL.mem
 	 * capable, there is no point in continuing to enable CXL.mem.
 	 */
-	hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+	hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap);
 	if (!hdm_count || hdm_count > 2)
 		return -EINVAL;
 
@@ -297,11 +297,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 	 * disabled, and they will remain moot after the HDM Decoder
 	 * capability is enabled.
 	 */
-	rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
+	rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl);
 	if (rc)
 		return rc;
 
-	info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+	info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl);
 	if (!info->mem_enabled)
 		return 0;
 
@@ -314,35 +314,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 			return rc;
 
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
+			pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp);
 		if (rc)
 			return rc;
 
 		size = (u64)temp << 32;
 
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
+			pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp);
 		if (rc)
 			return rc;
 
-		size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+		size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW;
 		if (!size) {
 			continue;
 		}
 
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
+			pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp);
 		if (rc)
 			return rc;
 
 		base = (u64)temp << 32;
 
 		rc = pci_read_config_dword(
-			pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
+			pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp);
 		if (rc)
 			return rc;
 
-		base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
+		base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW;
 
 		info->dvsec_range[ranges++] = (struct range) {
 			.start = base,
@@ -1068,7 +1068,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev)
 		is_port = false;
 
 	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
-			is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF);
+			is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF);
 	if (!dvsec)
 		dev_warn(dev, "%s GPF DVSEC not present\n",
 			 is_port ? "Port" : "Device");
@@ -1084,14 +1084,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase)
 
 	switch (phase) {
 	case 1:
-		offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET;
-		base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK;
-		scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK;
+		offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL;
+		base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE;
+		scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE;
 		break;
 	case 2:
-		offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET;
-		base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK;
-		scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK;
+		offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL;
+		base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE;
+		scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 5ca7b0eed568b..a010b32143422 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -271,10 +271,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL");
 static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
 				struct cxl_register_map *map)
 {
-	u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
-	int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+	u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo);
+	int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo);
 	u64 offset = ((u64)reg_hi << 32) |
-		     (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
+		     (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW);
 
 	if (offset > pci_resource_len(pdev, bar)) {
 		dev_warn(&pdev->dev,
@@ -311,15 +311,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty
 	};
 
 	regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
-					   CXL_DVSEC_REG_LOCATOR);
+					   PCI_DVSEC_CXL_REG_LOCATOR);
 	if (!regloc)
 		return -ENXIO;
 
 	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
-	regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+	regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size);
 
-	regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET;
-	regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8;
+	regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1;
+	regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8;
 
 	for (i = 0; i < regblocks; i++, regloc += 8) {
 		u32 reg_lo, reg_hi;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 549368a9c868f..d03292e7b9b99 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -933,7 +933,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	cxlds->rcd = is_cxl_restricted(pdev);
 	cxlds->serial = pci_get_dsn(pdev);
 	cxlds->cxl_dvsec = pci_find_dvsec_capability(
-		pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
+		pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE);
 	if (!cxlds->cxl_dvsec)
 		dev_warn(&pdev->dev,
 			 "Device DVSEC not present, skip CXL.mem init\n");
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 6db45ae2cc8e3..ae3152be018a7 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -218,12 +218,12 @@ static bool pci_cxl_ats_always_on(struct pci_dev *pdev)
 	u16 cap;
 
 	offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
-					   CXL_DVSEC_PCIE_DEVICE);
+					   PCI_DVSEC_CXL_DEVICE);
 	if (!offset)
 		return false;
 
-	pci_read_config_word(pdev, offset + CXL_DVSEC_CAP_OFFSET, &cap);
-	if (cap & CXL_DVSEC_CACHE_CAPABLE)
+	pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap);
+	if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE)
 		return true;
 
 	return false;
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 8ea78d7e08a3d..22e22cea2c4f2 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1234,64 +1234,58 @@
 /* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */
 #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL		PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE
 
-/* Compute Express Link (CXL r3.1, sec 8.1.5) */
-#define PCI_DVSEC_CXL_PORT				3
-#define PCI_DVSEC_CXL_PORT_CTL				0x0c
-#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
-
 /*
- * Compute Express Link (CXL r3.2, sec 8.1)
+ * Compute Express Link (CXL r4.0, sec 8.1)
  *
  * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state
- * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these
+ * is "disconnected" (CXL r4.0, sec 9.12.3). Re-enumerate these
  * registers on downstream link-up events.
  */
-#define PCI_DVSEC_HEADER1_LENGTH_MASK  __GENMASK(31, 20)
-
-/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */
-#define CXL_DVSEC_PCIE_DEVICE				0
-#define  CXL_DVSEC_CAP_OFFSET				0xA
-#define     CXL_DVSEC_CACHE_CAPABLE			_BITUL(0)
-#define   CXL_DVSEC_MEM_CAPABLE				_BITUL(2)
-#define   CXL_DVSEC_HDM_COUNT_MASK			__GENMASK(5, 4)
-#define  CXL_DVSEC_CTRL_OFFSET				0xC
-#define   CXL_DVSEC_MEM_ENABLE				_BITUL(2)
-#define  CXL_DVSEC_RANGE_SIZE_HIGH(i)			(0x18 + (i * 0x10))
-#define  CXL_DVSEC_RANGE_SIZE_LOW(i)			(0x1C + (i * 0x10))
-#define   CXL_DVSEC_MEM_INFO_VALID			_BITUL(0)
-#define   CXL_DVSEC_MEM_ACTIVE				_BITUL(1)
-#define   CXL_DVSEC_MEM_SIZE_LOW_MASK			__GENMASK(31, 28)
-#define  CXL_DVSEC_RANGE_BASE_HIGH(i)			(0x20 + (i * 0x10))
-#define  CXL_DVSEC_RANGE_BASE_LOW(i)			(0x24 + (i * 0x10))
-#define   CXL_DVSEC_MEM_BASE_LOW_MASK			__GENMASK(31, 28)
+
+/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */
+#define PCI_DVSEC_CXL_DEVICE				0
+#define  PCI_DVSEC_CXL_CAP				0xA
+#define     PCI_DVSEC_CXL_CACHE_CAPABLE			_BITUL(0)
+#define   PCI_DVSEC_CXL_MEM_CAPABLE			_BITUL(2)
+#define   PCI_DVSEC_CXL_HDM_COUNT			__GENMASK(5, 4)
+#define  PCI_DVSEC_CXL_CTRL				0xC
+#define   PCI_DVSEC_CXL_MEM_ENABLE			_BITUL(2)
+#define  PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i)		(0x18 + (i * 0x10))
+#define  PCI_DVSEC_CXL_RANGE_SIZE_LOW(i)		(0x1C + (i * 0x10))
+#define   PCI_DVSEC_CXL_MEM_INFO_VALID			_BITUL(0)
+#define   PCI_DVSEC_CXL_MEM_ACTIVE			_BITUL(1)
+#define   PCI_DVSEC_CXL_MEM_SIZE_LOW			__GENMASK(31, 28)
+#define  PCI_DVSEC_CXL_RANGE_BASE_HIGH(i)		(0x20 + (i * 0x10))
+#define  PCI_DVSEC_CXL_RANGE_BASE_LOW(i)		(0x24 + (i * 0x10))
+#define   PCI_DVSEC_CXL_MEM_BASE_LOW			__GENMASK(31, 28)
 
 #define CXL_DVSEC_RANGE_MAX				2
 
-/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */
-#define CXL_DVSEC_FUNCTION_MAP				2
-
-/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */
-#define CXL_DVSEC_PORT					3
-#define   CXL_DVSEC_PORT_CTL				0x0c
-#define    CXL_DVSEC_PORT_CTL_UNMASK_SBR		0x00000001
-
-/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */
-#define CXL_DVSEC_PORT_GPF				4
-#define  CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET	0x0C
-#define   CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK	__GENMASK(3, 0)
-#define   CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK	__GENMASK(11, 8)
-#define  CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET	0xE
-#define   CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK	__GENMASK(3, 0)
-#define   CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK	__GENMASK(11, 8)
-
-/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */
-#define CXL_DVSEC_DEVICE_GPF				5
-
-/* CXL 3.2 8.1.9: Register Locator DVSEC */
-#define CXL_DVSEC_REG_LOCATOR				8
-#define  CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET		0xC
-#define   CXL_DVSEC_REG_LOCATOR_BIR_MASK		__GENMASK(2, 0)
-#define   CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK		__GENMASK(15, 8)
-#define   CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK	__GENMASK(31, 16)
+/* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */
+#define PCI_DVSEC_CXL_FUNCTION_MAP			2
+
+/* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */
+#define PCI_DVSEC_CXL_PORT				3
+#define  PCI_DVSEC_CXL_PORT_CTL				0x0c
+#define   PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR		0x00000001
+
+/* CXL r4.0, 8.1.6: GPF DVSEC for CXL Port */
+#define PCI_DVSEC_CXL_PORT_GPF				4
+#define  PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL		0x0C
+#define   PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE	__GENMASK(3, 0)
+#define   PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE	__GENMASK(11, 8)
+#define  PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL		0xE
+#define   PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE	__GENMASK(3, 0)
+#define   PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE	__GENMASK(11, 8)
+
+/* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */
+#define PCI_DVSEC_CXL_DEVICE_GPF			5
+
+/* CXL r4.0, 8.1.9: Register Locator DVSEC */
+#define PCI_DVSEC_CXL_REG_LOCATOR			8
+#define  PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1		0xC
+#define   PCI_DVSEC_CXL_REG_LOCATOR_BIR			__GENMASK(2, 0)
+#define   PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID		__GENMASK(15, 8)
+#define   PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW	__GENMASK(31, 16)
 
 #endif /* LINUX_PCI_REGS_H */

From b9b8e4556e3da9fcc8bf5d9e3658010de6634a6c Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:25 -0600
Subject: [PATCH 082/143] cxl/pci: Remove unnecessary CXL Endpoint handling
 helper functions

The CXL driver's cxl_handle_endpoint_cor_ras()/cxl_handle_endpoint_ras()
are unnecessary helper functions used only for Endpoints. Remove these
functions as they are not common for all CXL devices and do not provide
value for EP handling.

Rename __cxl_handle_ras to cxl_handle_ras() and __cxl_handle_cor_ras()
to cxl_handle_cor_ras().

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-5-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit ca3d1a53e62093d17436abd447463da9c0f4e56b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 077b386e0c8d6..3ec7407f0c5da 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -632,8 +632,8 @@ void read_cdat_data(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
 
-static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
-				 void __iomem *ras_base)
+static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
+			       void __iomem *ras_base)
 {
 	void __iomem *addr;
 	u32 status;
@@ -649,11 +649,6 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
 	}
 }
 
-static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds)
-{
-	return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
-}
-
 /* CXL spec rev3.0 8.2.4.16.1 */
 static void header_log_copy(void __iomem *ras_base, u32 *log)
 {
@@ -675,8 +670,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
  * Log the state of the RAS status registers and prepare them to log the
  * next error status. Return 1 if reset needed.
  */
-static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
-				  void __iomem *ras_base)
+static bool cxl_handle_ras(struct cxl_dev_state *cxlds,
+			   void __iomem *ras_base)
 {
 	u32 hl[CXL_HEADERLOG_SIZE_U32];
 	void __iomem *addr;
@@ -709,11 +704,6 @@ static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
 	return true;
 }
 
-static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
-{
-	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
-}
-
 #ifdef CONFIG_PCIEAER_CXL
 
 static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
@@ -792,13 +782,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
 					  struct cxl_dport *dport)
 {
-	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
+	return cxl_handle_cor_ras(cxlds, dport->regs.ras);
 }
 
 static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
 				       struct cxl_dport *dport)
 {
-	return __cxl_handle_ras(cxlds, dport->regs.ras);
+	return cxl_handle_ras(cxlds, dport->regs.ras);
 }
 
 /*
@@ -895,7 +885,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev)
 		if (cxlds->rcd)
 			cxl_handle_rdport_errors(cxlds);
 
-		cxl_handle_endpoint_cor_ras(cxlds);
+		cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
 	}
 }
 EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
@@ -924,7 +914,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 		 * chance the situation is recoverable dump the status of the RAS
 		 * capability registers and bounce the active state of the memdev.
 		 */
-		ue = cxl_handle_endpoint_ras(cxlds);
+		ue = cxl_handle_ras(cxlds, cxlds->regs.ras);
 	}
 
 

From e0c700e71cf38b7a07f510b585b97beeaba67d5f Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:26 -0600
Subject: [PATCH 083/143] cxl/pci: Remove unnecessary CXL RCH handling helper
 functions

cxl_handle_rdport_cor_ras() and cxl_handle_rdport_ras() are specific
to Restricted CXL Host (RCH) handling. Improve readability and
maintainability by replacing these and instead using the common
cxl_handle_cor_ras() and cxl_handle_ras() functions.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-6-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit eb78ef4d6f0e51243c1ee117f801dbc503e886ab)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 3ec7407f0c5da..51bb0f372e40d 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -779,18 +779,6 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 
-static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
-					  struct cxl_dport *dport)
-{
-	return cxl_handle_cor_ras(cxlds, dport->regs.ras);
-}
-
-static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
-				       struct cxl_dport *dport)
-{
-	return cxl_handle_ras(cxlds, dport->regs.ras);
-}
-
 /*
  * Copy the AER capability registers using 32 bit read accesses.
  * This is necessary because RCRB AER capability is MMIO mapped. Clear the
@@ -860,9 +848,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 	pci_print_aer(pdev, severity, &aer_regs);
 
 	if (severity == AER_CORRECTABLE)
-		cxl_handle_rdport_cor_ras(cxlds, dport);
+		cxl_handle_cor_ras(cxlds, dport->regs.ras);
 	else
-		cxl_handle_rdport_ras(cxlds, dport);
+		cxl_handle_ras(cxlds, dport->regs.ras);
 }
 
 #else

From df7634c649cee0352c72b17948ef16dd0033de25 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 14 Jan 2026 12:20:28 -0600
Subject: [PATCH 084/143] cxl/pci: Remove CXL VH handling in CONFIG_PCIEAER_CXL
 conditional blocks from core/pci.c

Create new config CONFIG_CXL_RAS and put all CXL RAS items behind the
config. The config will depend on CPER and PCIE AER to build. Move the
related VH RAS code from core/pci.c to core/ras.c.

Restricted CXL host (RCH) RAS functions will be moved in a future patch.

Cc: Robert Richter <rrichter@amd.com>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Co-developed-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-8-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 7ff8b1d60881c5f97b5ae426e14d2822917d3b69)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/Kconfig       |   4 +
 drivers/cxl/core/Makefile |   2 +-
 drivers/cxl/core/core.h   |  31 +++++++
 drivers/cxl/core/pci.c    | 189 +-------------------------------------
 drivers/cxl/core/ras.c    | 176 +++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |   8 --
 drivers/cxl/cxlpci.h      |  16 ++++
 tools/testing/cxl/Kbuild  |   2 +-
 8 files changed, 233 insertions(+), 195 deletions(-)

diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index f1361ed6a0d48..6b8fb3284e7e4 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -233,4 +233,8 @@ config CXL_MCE
 	def_bool y
 	depends on X86_MCE && MEMORY_FAILURE
 
+config CXL_RAS
+	def_bool y
+	depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI
+
 endif
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 5ad8fef210b5c..b2930cc54f8ba 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -14,9 +14,9 @@ cxl_core-y += pci.o
 cxl_core-y += hdm.o
 cxl_core-y += pmu.o
 cxl_core-y += cdat.o
-cxl_core-y += ras.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
+cxl_core-$(CONFIG_CXL_RAS) += ras.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 1fb66132b7777..bc818de87cccc 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -144,8 +144,39 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
 int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 					struct access_coordinate *c);
 
+#ifdef CONFIG_CXL_RAS
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
+bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+#else
+static inline int cxl_ras_init(void)
+{
+	return 0;
+}
+
+static inline void cxl_ras_exit(void)
+{
+}
+
+static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+{
+	return false;
+}
+static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { }
+#endif /* CONFIG_CXL_RAS */
+
+/* Restricted CXL Host specific RAS functions */
+#ifdef CONFIG_CXL_RAS
+void cxl_dport_map_rch_aer(struct cxl_dport *dport);
+void cxl_disable_rch_root_ints(struct cxl_dport *dport);
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
+#else
+static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
+static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
+static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
+#endif /* CONFIG_CXL_RAS */
+
 int cxl_gpf_port_setup(struct cxl_dport *dport);
 
 struct cxl_hdm;
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 51bb0f372e40d..e132fff809792 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -632,81 +632,8 @@ void read_cdat_data(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
 
-static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
-			       void __iomem *ras_base)
-{
-	void __iomem *addr;
-	u32 status;
-
-	if (!ras_base)
-		return;
-
-	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
-	status = readl(addr);
-	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
-		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
-		trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
-	}
-}
-
-/* CXL spec rev3.0 8.2.4.16.1 */
-static void header_log_copy(void __iomem *ras_base, u32 *log)
-{
-	void __iomem *addr;
-	u32 *log_addr;
-	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
-
-	addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
-	log_addr = log;
-
-	for (i = 0; i < log_u32_size; i++) {
-		*log_addr = readl(addr);
-		log_addr++;
-		addr += sizeof(u32);
-	}
-}
-
-/*
- * Log the state of the RAS status registers and prepare them to log the
- * next error status. Return 1 if reset needed.
- */
-static bool cxl_handle_ras(struct cxl_dev_state *cxlds,
-			   void __iomem *ras_base)
-{
-	u32 hl[CXL_HEADERLOG_SIZE_U32];
-	void __iomem *addr;
-	u32 status;
-	u32 fe;
-
-	if (!ras_base)
-		return false;
-
-	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
-	status = readl(addr);
-	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
-		return false;
-
-	/* If multiple errors, log header points to first error from ctrl reg */
-	if (hweight32(status) > 1) {
-		void __iomem *rcc_addr =
-			ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
-
-		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
-				   readl(rcc_addr)));
-	} else {
-		fe = status;
-	}
-
-	header_log_copy(ras_base, hl);
-	trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
-	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
-
-	return true;
-}
-
-#ifdef CONFIG_PCIEAER_CXL
-
-static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
+#ifdef CONFIG_CXL_RAS
+void cxl_dport_map_rch_aer(struct cxl_dport *dport)
 {
 	resource_size_t aer_phys;
 	struct device *host;
@@ -721,19 +648,7 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
 	}
 }
 
-static void cxl_dport_map_ras(struct cxl_dport *dport)
-{
-	struct cxl_register_map *map = &dport->reg_map;
-	struct device *dev = dport->dport_dev;
-
-	if (!map->component_map.ras.valid)
-		dev_dbg(dev, "RAS registers not found\n");
-	else if (cxl_map_component_regs(map, &dport->regs.component,
-					BIT(CXL_CM_CAP_CAP_ID_RAS)))
-		dev_dbg(dev, "Failed to map RAS capability.\n");
-}
-
-static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
+void cxl_disable_rch_root_ints(struct cxl_dport *dport)
 {
 	void __iomem *aer_base = dport->regs.dport_aer;
 	u32 aer_cmd_mask, aer_cmd;
@@ -757,28 +672,6 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
 	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
 }
 
-/**
- * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
- * @dport: the cxl_dport that needs to be initialized
- * @host: host device for devm operations
- */
-void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
-{
-	dport->reg_map.host = host;
-	cxl_dport_map_ras(dport);
-
-	if (dport->rch) {
-		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
-
-		if (!host_bridge->native_aer)
-			return;
-
-		cxl_dport_map_rch_aer(dport);
-		cxl_disable_rch_root_ints(dport);
-	}
-}
-EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
-
 /*
  * Copy the AER capability registers using 32 bit read accesses.
  * This is necessary because RCRB AER capability is MMIO mapped. Clear the
@@ -827,7 +720,7 @@ static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
 	return false;
 }
 
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 {
 	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
 	struct aer_capability_regs aer_regs;
@@ -852,82 +745,8 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 	else
 		cxl_handle_ras(cxlds, dport->regs.ras);
 }
-
-#else
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
 #endif
 
-void cxl_cor_error_detected(struct pci_dev *pdev)
-{
-	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
-	struct device *dev = &cxlds->cxlmd->dev;
-
-	scoped_guard(device, dev) {
-		if (!dev->driver) {
-			dev_warn(&pdev->dev,
-				 "%s: memdev disabled, abort error handling\n",
-				 dev_name(dev));
-			return;
-		}
-
-		if (cxlds->rcd)
-			cxl_handle_rdport_errors(cxlds);
-
-		cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
-	}
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
-
-pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
-				    pci_channel_state_t state)
-{
-	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
-	struct cxl_memdev *cxlmd = cxlds->cxlmd;
-	struct device *dev = &cxlmd->dev;
-	bool ue;
-
-	scoped_guard(device, dev) {
-		if (!dev->driver) {
-			dev_warn(&pdev->dev,
-				 "%s: memdev disabled, abort error handling\n",
-				 dev_name(dev));
-			return PCI_ERS_RESULT_DISCONNECT;
-		}
-
-		if (cxlds->rcd)
-			cxl_handle_rdport_errors(cxlds);
-		/*
-		 * A frozen channel indicates an impending reset which is fatal to
-		 * CXL.mem operation, and will likely crash the system. On the off
-		 * chance the situation is recoverable dump the status of the RAS
-		 * capability registers and bounce the active state of the memdev.
-		 */
-		ue = cxl_handle_ras(cxlds, cxlds->regs.ras);
-	}
-
-
-	switch (state) {
-	case pci_channel_io_normal:
-		if (ue) {
-			device_release_driver(dev);
-			return PCI_ERS_RESULT_NEED_RESET;
-		}
-		return PCI_ERS_RESULT_CAN_RECOVER;
-	case pci_channel_io_frozen:
-		dev_warn(&pdev->dev,
-			 "%s: frozen state error detected, disable CXL.mem\n",
-			 dev_name(dev));
-		device_release_driver(dev);
-		return PCI_ERS_RESULT_NEED_RESET;
-	case pci_channel_io_perm_failure:
-		dev_warn(&pdev->dev,
-			 "failure state error detected, request disconnect\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-	return PCI_ERS_RESULT_NEED_RESET;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
-
 static int cxl_flit_size(struct pci_dev *pdev)
 {
 	if (cxl_pci_flit_256(pdev))
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 2731ba3a07993..b933030b8e1e7 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -5,6 +5,7 @@
 #include <linux/aer.h>
 #include <cxl/event.h>
 #include <cxlmem.h>
+#include <cxlpci.h>
 #include "trace.h"
 
 static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
@@ -124,3 +125,178 @@ void cxl_ras_exit(void)
 	cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work);
 	cancel_work_sync(&cxl_cper_prot_err_work);
 }
+
+static void cxl_dport_map_ras(struct cxl_dport *dport)
+{
+	struct cxl_register_map *map = &dport->reg_map;
+	struct device *dev = dport->dport_dev;
+
+	if (!map->component_map.ras.valid)
+		dev_dbg(dev, "RAS registers not found\n");
+	else if (cxl_map_component_regs(map, &dport->regs.component,
+					BIT(CXL_CM_CAP_CAP_ID_RAS)))
+		dev_dbg(dev, "Failed to map RAS capability.\n");
+}
+
+/**
+ * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
+ * @dport: the cxl_dport that needs to be initialized
+ * @host: host device for devm operations
+ */
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
+{
+	dport->reg_map.host = host;
+	cxl_dport_map_ras(dport);
+
+	if (dport->rch) {
+		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
+
+		if (!host_bridge->native_aer)
+			return;
+
+		cxl_dport_map_rch_aer(dport);
+		cxl_disable_rch_root_ints(dport);
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
+
+void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+{
+	void __iomem *addr;
+	u32 status;
+
+	if (!ras_base)
+		return;
+
+	addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+	status = readl(addr);
+	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+		trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+	}
+}
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(void __iomem *ras_base, u32 *log)
+{
+	void __iomem *addr;
+	u32 *log_addr;
+	int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+	addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
+	log_addr = log;
+
+	for (i = 0; i < log_u32_size; i++) {
+		*log_addr = readl(addr);
+		log_addr++;
+		addr += sizeof(u32);
+	}
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+{
+	u32 hl[CXL_HEADERLOG_SIZE_U32];
+	void __iomem *addr;
+	u32 status;
+	u32 fe;
+
+	if (!ras_base)
+		return false;
+
+	addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+	status = readl(addr);
+	if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+		return false;
+
+	/* If multiple errors, log header points to first error from ctrl reg */
+	if (hweight32(status) > 1) {
+		void __iomem *rcc_addr =
+			ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
+
+		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
+				   readl(rcc_addr)));
+	} else {
+		fe = status;
+	}
+
+	header_log_copy(ras_base, hl);
+	trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
+	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+	return true;
+}
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+	struct device *dev = &cxlds->cxlmd->dev;
+
+	scoped_guard(device, dev) {
+		if (!dev->driver) {
+			dev_warn(&pdev->dev,
+				 "%s: memdev disabled, abort error handling\n",
+				 dev_name(dev));
+			return;
+		}
+
+		if (cxlds->rcd)
+			cxl_handle_rdport_errors(cxlds);
+
+		cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
+
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+				    pci_channel_state_t state)
+{
+	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+	struct cxl_memdev *cxlmd = cxlds->cxlmd;
+	struct device *dev = &cxlmd->dev;
+	bool ue;
+
+	scoped_guard(device, dev) {
+		if (!dev->driver) {
+			dev_warn(&pdev->dev,
+				 "%s: memdev disabled, abort error handling\n",
+				 dev_name(dev));
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
+
+		if (cxlds->rcd)
+			cxl_handle_rdport_errors(cxlds);
+		/*
+		 * A frozen channel indicates an impending reset which is fatal to
+		 * CXL.mem operation, and will likely crash the system. On the off
+		 * chance the situation is recoverable dump the status of the RAS
+		 * capability registers and bounce the active state of the memdev.
+		 */
+		ue = cxl_handle_ras(cxlds, cxlds->regs.ras);
+	}
+
+
+	switch (state) {
+	case pci_channel_io_normal:
+		if (ue) {
+			device_release_driver(dev);
+			return PCI_ERS_RESULT_NEED_RESET;
+		}
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+		dev_warn(&pdev->dev,
+			 "%s: frozen state error detected, disable CXL.mem\n",
+			 dev_name(dev));
+		device_release_driver(dev);
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		dev_warn(&pdev->dev,
+			 "failure state error detected, request disconnect\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index c796c3db36e0b..2301dd42f4f2d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -805,14 +805,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 					 struct device *dport_dev, int port_id,
 					 resource_size_t rcrb);
 
-#ifdef CONFIG_PCIEAER_CXL
-void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
-void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
-#else
-static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
-						struct device *host) { }
-#endif
-
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
 struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index cdb7cf3dbcb43..6f9c78886fd9a 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -76,7 +76,23 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
 
 struct cxl_dev_state;
 void read_cdat_data(struct cxl_port *port);
+
+#ifdef CONFIG_CXL_RAS
 void cxl_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 				    pci_channel_state_t state);
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
+#else
+static inline void cxl_cor_error_detected(struct pci_dev *pdev) { }
+
+static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+						  pci_channel_state_t state)
+{
+	return PCI_ERS_RESULT_NONE;
+}
+
+static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
+						struct device *host) { }
+#endif
+
 #endif /* __CXL_PCI_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0e151d0572d1f..b7ea66382f3b1 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -57,12 +57,12 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
 cxl_core-y += $(CXL_CORE_SRC)/hdm.o
 cxl_core-y += $(CXL_CORE_SRC)/pmu.o
 cxl_core-y += $(CXL_CORE_SRC)/cdat.o
-cxl_core-y += $(CXL_CORE_SRC)/ras.o
 cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
 cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
 cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
+cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o
 cxl_core-y += config_check.o
 cxl_core-y += cxl_core_test.o
 cxl_core-y += cxl_core_exports.o

From 8891ca7c6e4937945336887a8986ff4cfd91cc0a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 14 Jan 2026 12:20:34 -0600
Subject: [PATCH 085/143] PCI/AER: Replace PCIEAER_CXL symbol with CXL_RAS

One of the primary reasons for the CXL driver to exist is to perform error
handling. If both PCIEAER and CXL are enabled then light up CXL error
handling as well. Now that all RAS handling is moved under the CXL_RAS
symbol, drop the previous PCIEAER_CXL symbol.

Reviewed-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-14-terry.bowman@amd.com
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit d18f1b7beadf1af1cd334ff789ba5a07ce285bbc)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/Kconfig      | 2 +-
 drivers/pci/pcie/Kconfig | 9 ---------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index 6b8fb3284e7e4..5b5aa941ad2fb 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -235,6 +235,6 @@ config CXL_MCE
 
 config CXL_RAS
 	def_bool y
-	depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI
+	depends on ACPI_APEI_GHES && PCIEAER && CXL_BUS
 
 endif
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 17919b99fa66a..207c2deae35ff 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -49,15 +49,6 @@ config PCIEAER_INJECT
 	  gotten from:
 	     https://github.com/intel/aer-inject.git
 
-config PCIEAER_CXL
-	bool "PCI Express CXL RAS support"
-	default y
-	depends on PCIEAER && CXL_PCI
-	help
-	  Enables CXL error handling.
-
-	  If unsure, say Y.
-
 #
 # PCI Express ECRC
 #

From 50a3017a3638240657dd58d047d50ce3c6623024 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:29 -0600
Subject: [PATCH 086/143] cxl/pci: Move CXL driver's RCH error handling into
 core/ras_rch.c

Restricted CXL Host (RCH) protocol error handling uses a procedure distinct
from the CXL Virtual Hierarchy (VH) handling. This is because of the
differences in the RCH and VH topologies. Improve the maintainability and
add ability to enable/disable RCH handling.

Move and combine the RCH handling code into a single block conditionally
compiled with the CONFIG_CXL_RCH_RAS kernel config.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-9-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 0ff60f2ec3e4043a442e805f80f8a2445113ec8f)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/Makefile  |   1 +
 drivers/cxl/core/core.h    |  11 +---
 drivers/cxl/core/pci.c     | 115 -----------------------------------
 drivers/cxl/core/ras_rch.c | 121 +++++++++++++++++++++++++++++++++++++
 tools/testing/cxl/Kbuild   |   1 +
 5 files changed, 126 insertions(+), 123 deletions(-)
 create mode 100644 drivers/cxl/core/ras_rch.c

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index b2930cc54f8ba..b37f38d502d8c 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -20,3 +20,4 @@ cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
 cxl_core-$(CONFIG_CXL_RAS) += ras.o
+cxl_core-$(CONFIG_CXL_RAS) += ras_rch.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index bc818de87cccc..724361195057e 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -149,6 +149,9 @@ int cxl_ras_init(void);
 void cxl_ras_exit(void);
 bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
 void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+void cxl_dport_map_rch_aer(struct cxl_dport *dport);
+void cxl_disable_rch_root_ints(struct cxl_dport *dport);
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
 #else
 static inline int cxl_ras_init(void)
 {
@@ -164,14 +167,6 @@ static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras
 	return false;
 }
 static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { }
-#endif /* CONFIG_CXL_RAS */
-
-/* Restricted CXL Host specific RAS functions */
-#ifdef CONFIG_CXL_RAS
-void cxl_dport_map_rch_aer(struct cxl_dport *dport);
-void cxl_disable_rch_root_ints(struct cxl_dport *dport);
-void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
-#else
 static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
 static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
 static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index e132fff809792..b838c59d7a3c0 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -632,121 +632,6 @@ void read_cdat_data(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
 
-#ifdef CONFIG_CXL_RAS
-void cxl_dport_map_rch_aer(struct cxl_dport *dport)
-{
-	resource_size_t aer_phys;
-	struct device *host;
-	u16 aer_cap;
-
-	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
-	if (aer_cap) {
-		host = dport->reg_map.host;
-		aer_phys = aer_cap + dport->rcrb.base;
-		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
-						sizeof(struct aer_capability_regs));
-	}
-}
-
-void cxl_disable_rch_root_ints(struct cxl_dport *dport)
-{
-	void __iomem *aer_base = dport->regs.dport_aer;
-	u32 aer_cmd_mask, aer_cmd;
-
-	if (!aer_base)
-		return;
-
-	/*
-	 * Disable RCH root port command interrupts.
-	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
-	 *
-	 * This sequence may not be necessary. CXL spec states disabling
-	 * the root cmd register's interrupts is required. But, PCI spec
-	 * shows these are disabled by default on reset.
-	 */
-	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
-			PCI_ERR_ROOT_CMD_NONFATAL_EN |
-			PCI_ERR_ROOT_CMD_FATAL_EN);
-	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
-	aer_cmd &= ~aer_cmd_mask;
-	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
-}
-
-/*
- * Copy the AER capability registers using 32 bit read accesses.
- * This is necessary because RCRB AER capability is MMIO mapped. Clear the
- * status after copying.
- *
- * @aer_base: base address of AER capability block in RCRB
- * @aer_regs: destination for copying AER capability
- */
-static bool cxl_rch_get_aer_info(void __iomem *aer_base,
-				 struct aer_capability_regs *aer_regs)
-{
-	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
-	u32 *aer_regs_buf = (u32 *)aer_regs;
-	int n;
-
-	if (!aer_base)
-		return false;
-
-	/* Use readl() to guarantee 32-bit accesses */
-	for (n = 0; n < read_cnt; n++)
-		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
-
-	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
-	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
-
-	return true;
-}
-
-/* Get AER severity. Return false if there is no error. */
-static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
-				     int *severity)
-{
-	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
-		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
-			*severity = AER_FATAL;
-		else
-			*severity = AER_NONFATAL;
-		return true;
-	}
-
-	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
-		*severity = AER_CORRECTABLE;
-		return true;
-	}
-
-	return false;
-}
-
-void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
-{
-	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
-	struct aer_capability_regs aer_regs;
-	struct cxl_dport *dport;
-	int severity;
-
-	struct cxl_port *port __free(put_cxl_port) =
-		cxl_pci_find_port(pdev, &dport);
-	if (!port)
-		return;
-
-	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
-		return;
-
-	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
-		return;
-
-	pci_print_aer(pdev, severity, &aer_regs);
-
-	if (severity == AER_CORRECTABLE)
-		cxl_handle_cor_ras(cxlds, dport->regs.ras);
-	else
-		cxl_handle_ras(cxlds, dport->regs.ras);
-}
-#endif
-
 static int cxl_flit_size(struct pci_dev *pdev)
 {
 	if (cxl_pci_flit_256(pdev))
diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c
new file mode 100644
index 0000000000000..ed58afd18ecc9
--- /dev/null
+++ b/drivers/cxl/core/ras_rch.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
+
+#include <linux/types.h>
+#include <linux/aer.h>
+#include "cxl.h"
+#include "core.h"
+#include "cxlmem.h"
+
+void cxl_dport_map_rch_aer(struct cxl_dport *dport)
+{
+	resource_size_t aer_phys;
+	struct device *host;
+	u16 aer_cap;
+
+	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
+	if (aer_cap) {
+		host = dport->reg_map.host;
+		aer_phys = aer_cap + dport->rcrb.base;
+		dport->regs.dport_aer =
+			devm_cxl_iomap_block(host, aer_phys,
+					     sizeof(struct aer_capability_regs));
+	}
+}
+
+void cxl_disable_rch_root_ints(struct cxl_dport *dport)
+{
+	void __iomem *aer_base = dport->regs.dport_aer;
+	u32 aer_cmd_mask, aer_cmd;
+
+	if (!aer_base)
+		return;
+
+	/*
+	 * Disable RCH root port command interrupts.
+	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
+	 *
+	 * This sequence may not be necessary. CXL spec states disabling
+	 * the root cmd register's interrupts is required. But, PCI spec
+	 * shows these are disabled by default on reset.
+	 */
+	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
+			PCI_ERR_ROOT_CMD_NONFATAL_EN |
+			PCI_ERR_ROOT_CMD_FATAL_EN);
+	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
+	aer_cmd &= ~aer_cmd_mask;
+	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
+}
+
+/*
+ * Copy the AER capability registers using 32 bit read accesses.
+ * This is necessary because RCRB AER capability is MMIO mapped. Clear the
+ * status after copying.
+ *
+ * @aer_base: base address of AER capability block in RCRB
+ * @aer_regs: destination for copying AER capability
+ */
+static bool cxl_rch_get_aer_info(void __iomem *aer_base,
+				 struct aer_capability_regs *aer_regs)
+{
+	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
+	u32 *aer_regs_buf = (u32 *)aer_regs;
+	int n;
+
+	if (!aer_base)
+		return false;
+
+	/* Use readl() to guarantee 32-bit accesses */
+	for (n = 0; n < read_cnt; n++)
+		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
+
+	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
+	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
+
+	return true;
+}
+
+/* Get AER severity. Return false if there is no error. */
+static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
+				     int *severity)
+{
+	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
+		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
+			*severity = AER_FATAL;
+		else
+			*severity = AER_NONFATAL;
+		return true;
+	}
+
+	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
+		*severity = AER_CORRECTABLE;
+		return true;
+	}
+
+	return false;
+}
+
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	struct aer_capability_regs aer_regs;
+	struct cxl_dport *dport;
+	int severity;
+
+	struct cxl_port *port __free(put_cxl_port) =
+		cxl_pci_find_port(pdev, &dport);
+	if (!port)
+		return;
+
+	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
+		return;
+
+	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
+		return;
+
+	pci_print_aer(pdev, severity, &aer_regs);
+	if (severity == AER_CORRECTABLE)
+		cxl_handle_cor_ras(cxlds, dport->regs.ras);
+	else
+		cxl_handle_ras(cxlds, dport->regs.ras);
+}
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index b7ea66382f3b1..6eceefefb0e04 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -63,6 +63,7 @@ cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
 cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o
+cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras_rch.o
 cxl_core-y += config_check.o
 cxl_core-y += cxl_core_test.o
 cxl_core-y += cxl_core_exports.o

From 9228e0c331f6a8074a83eaf2a81a5e09dcee125b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 14 Jan 2026 12:20:37 -0600
Subject: [PATCH 087/143] cxl/mem: Clarify @host for devm_cxl_add_nvdimm()

The convention for devm_ helpers in the CXL driver is that the first
argument is the @host for the operation (locked driver::probe() context).

Reviewed-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-17-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit f953b7d5e19a1310dd5d92b86bafc5957847b4d6)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pmem.c | 13 +++++++------
 drivers/cxl/cxl.h       |  3 ++-
 drivers/cxl/mem.c       |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 8853415c106a9..e7b1e6fa0ea09 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -237,12 +237,13 @@ static void cxlmd_release_nvdimm(void *_cxlmd)
 
 /**
  * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
- * @parent_port: parent port for the (to be added) @cxlmd endpoint port
- * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
+ * @host: host device for devm operations
+ * @port: any port in the CXL topology to find the nvdimm-bridge device
+ * @cxlmd: parent of the to be created cxl_nvdimm device
  *
  * Return: 0 on success negative error code on failure.
  */
-int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
+int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port,
 			struct cxl_memdev *cxlmd)
 {
 	struct cxl_nvdimm_bridge *cxl_nvb;
@@ -250,7 +251,7 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
 	struct device *dev;
 	int rc;
 
-	cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
+	cxl_nvb = cxl_find_nvdimm_bridge(port);
 	if (!cxl_nvb)
 		return -ENODEV;
 
@@ -270,10 +271,10 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
 	if (rc)
 		goto err;
 
-	dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
+	dev_dbg(host, "register %s\n", dev_name(dev));
 
 	/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
-	return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
+	return devm_add_action_or_reset(host, cxlmd_release_nvdimm, cxlmd);
 
 err:
 	put_device(dev);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 2301dd42f4f2d..e1d47062e1d3d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -889,7 +889,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
 						     struct cxl_port *port);
 struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
 bool is_cxl_nvdimm(struct device *dev);
-int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
+int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port,
+			struct cxl_memdev *cxlmd);
 struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
 
 #ifdef CONFIG_CXL_REGION
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 333c366b69e76..0958bea915acb 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -115,7 +115,7 @@ static int cxl_mem_probe(struct device *dev)
 	}
 
 	if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
-		rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
+		rc = devm_cxl_add_nvdimm(dev, parent_port, cxlmd);
 		if (rc) {
 			if (rc == -ENODEV)
 				dev_info(dev, "PMEM disabled by platform\n");

From e8681540c92364df59f59839dcc4caf0497c6aea Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:38 -0600
Subject: [PATCH 088/143] cxl: Update RAS handler interfaces to also support
 CXL Ports

CXL PCIe Port Protocol Error handling support will be added to the
CXL drivers in the future. In preparation, rename the existing
interfaces to support handling all CXL PCIe Port Protocol Errors.

The driver's RAS support functions currently rely on a 'struct
cxl_dev_state' type parameter, which is not available for CXL Port
devices. However, since the same CXL RAS capability structure is
needed across most CXL components and devices, a common handling
approach should be adopted.

To accommodate this, update the __cxl_handle_cor_ras() and
__cxl_handle_ras() functions to use a `struct device` instead of
`struct cxl_dev_state`.

No functional changes are introduced.

[1] CXL 3.1 Spec, 8.2.4 CXL.cache and CXL.mem Registers

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-18-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 9a8920ca8ebfb99604f639e7fbc681d0d04518a0)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h    | 14 +++++---------
 drivers/cxl/core/ras.c     | 12 ++++++------
 drivers/cxl/core/ras_rch.c |  4 ++--
 3 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 724361195057e..422531799af2f 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -147,8 +147,8 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
 #ifdef CONFIG_CXL_RAS
 int cxl_ras_init(void);
 void cxl_ras_exit(void);
-bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
-void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+bool cxl_handle_ras(struct device *dev, void __iomem *ras_base);
+void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base);
 void cxl_dport_map_rch_aer(struct cxl_dport *dport);
 void cxl_disable_rch_root_ints(struct cxl_dport *dport);
 void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
@@ -157,16 +157,12 @@ static inline int cxl_ras_init(void)
 {
 	return 0;
 }
-
-static inline void cxl_ras_exit(void)
-{
-}
-
-static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+static inline void cxl_ras_exit(void) { }
+static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base)
 {
 	return false;
 }
-static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { }
+static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { }
 static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
 static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
 static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index b933030b8e1e7..72908f3ced775 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -160,7 +160,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 
-void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base)
 {
 	void __iomem *addr;
 	u32 status;
@@ -172,7 +172,7 @@ void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
 	status = readl(addr);
 	if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
 		writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
-		trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
+		trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status);
 	}
 }
 
@@ -197,7 +197,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
  * Log the state of the RAS status registers and prepare them to log the
  * next error status. Return 1 if reset needed.
  */
-bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
+bool cxl_handle_ras(struct device *dev, void __iomem *ras_base)
 {
 	u32 hl[CXL_HEADERLOG_SIZE_U32];
 	void __iomem *addr;
@@ -224,7 +224,7 @@ bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
 	}
 
 	header_log_copy(ras_base, hl);
-	trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
+	trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl);
 	writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
 
 	return true;
@@ -246,7 +246,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev)
 		if (cxlds->rcd)
 			cxl_handle_rdport_errors(cxlds);
 
-		cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
+		cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
 	}
 }
 EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
@@ -275,7 +275,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 		 * chance the situation is recoverable dump the status of the RAS
 		 * capability registers and bounce the active state of the memdev.
 		 */
-		ue = cxl_handle_ras(cxlds, cxlds->regs.ras);
+		ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras);
 	}
 
 
diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c
index ed58afd18ecc9..0a8b3b9b63884 100644
--- a/drivers/cxl/core/ras_rch.c
+++ b/drivers/cxl/core/ras_rch.c
@@ -115,7 +115,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
 
 	pci_print_aer(pdev, severity, &aer_regs);
 	if (severity == AER_CORRECTABLE)
-		cxl_handle_cor_ras(cxlds, dport->regs.ras);
+		cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras);
 	else
-		cxl_handle_ras(cxlds, dport->regs.ras);
+		cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras);
 }

From 51906b0621ccea4d4175785736f95ca53c4097b9 Mon Sep 17 00:00:00 2001
From: Samasth Norway Ananda <samasth.norway.ananda@oracle.com>
Date: Mon, 5 Jan 2026 12:38:33 -0800
Subject: [PATCH 089/143] cxl/pci: Remove outdated FIXME comment and
 BUILD_BUG_ON

Remove the outdated FIXME comment about switching to struct_group() and
the associated BUILD_BUG_ON check. This work was already completed in
commit 301e68dd9b9b ("cxl/core: Replace unions with struct_group()")
which converted struct cxl_regs to use struct_group_tagged().

The BUILD_BUG_ON was checking that anonymous union layout was preserved,
but since struct_group() now handles this correctly, the compile-time
check is no longer necessary.

Signed-off-by: Samasth Norway Ananda <samasth.norway.ananda@oracle.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260105203833.1604585-1-samasth.norway.ananda@oracle.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 4dd05f02f1d618da610e7d3bd479c47a96b4fc3f)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/pci.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index d03292e7b9b99..1cf2322208735 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -912,13 +912,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	unsigned int i;
 	bool irq_avail;
 
-	/*
-	 * Double check the anonymous union trickery in struct cxl_regs
-	 * FIXME switch to struct_group()
-	 */
-	BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
-		     offsetof(struct cxl_regs, device_regs.memdev));
-
 	rc = pcim_enable_device(pdev);
 	if (rc)
 		return rc;

From 89e9eef4367bbffc5bbce069e75c85bd966323b1 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@amd.com>
Date: Fri, 9 Jan 2026 13:29:51 +0100
Subject: [PATCH 090/143] cxl/hdm: Fix newline character in dev_err() messages

The newline character is not placed at the end of the string. This
causes unintended line wraps, broken log level and unterminated log
messages. Fix that for all messages.

Note that the messages are changed to use colons now instead of
parentheses, which is more common use.

Fixes: 24b18197184a ("cxl/hdm: Extend DVSEC range register emulation for region enumeration")
Fixes: 9c57cde0dcbd ("cxl/hdm: Enumerate allocated DPA")
Signed-off-by: Robert Richter <rrichter@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260109122952.639231-1-rrichter@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit e5b1887619403c2da25a5899cad3e1ab34e7717f)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index aff166798e353..35b34b8c50763 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -966,7 +966,7 @@ static int cxl_setup_hdm_decoder_from_dvsec(
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0);
 	if (rc) {
 		dev_err(&port->dev,
-			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
+			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n",
 			port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc);
 		return rc;
 	}
@@ -1118,7 +1118,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
 	rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
 	if (rc) {
 		dev_err(&port->dev,
-			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
+			"decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n",
 			port->id, cxld->id, *dpa_base,
 			*dpa_base + dpa_size + skip - 1, rc);
 		return rc;

From fa29f9690432bae4e9f571eb9be1ecff1a1d9190 Mon Sep 17 00:00:00 2001
From: Li Ming <ming.li@zohomail.com>
Date: Fri, 9 Jan 2026 23:40:42 +0800
Subject: [PATCH 091/143] cxl/acpi: Remove cxl_acpi_set_cache_size()

cxl_acpi_set_cache_size() returns an error only when the size of the
cache range is not matched with the CXL address range. Almost all
implementation of setting cache size is in cxl_acpi_set_cache_size(),
cxl_setup_extended_linear_size() does nothing except printing a warning
in above error case, but cxl_acpi_set_cache_size() also prints a warning
at the same time. So can consolidates these two functions into one, keep
the function name as cxl_setup_extended_linear_size().

Signed-off-by: Li Ming <ming.li@zohomail.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/20260109154042.331296-1-ming.li@zohomail.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 99698e70148fbce4410799570adac8456204fa37)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/acpi.c | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 77ac940e30138..e65dfae42bded 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -357,7 +357,7 @@ static int add_or_reset_cxl_resource(struct resource *parent, struct resource *r
 	return rc;
 }
 
-static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd)
+static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd)
 {
 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
 	struct range *hpa = &cxld->hpa_range;
@@ -367,12 +367,14 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd)
 	struct resource res;
 	int nid, rc;
 
+	/* Explicitly initialize cache size to 0 at the beginning */
+	cxlrd->cache_size = 0;
 	res = DEFINE_RES_MEM(start, size);
 	nid = phys_to_target_node(start);
 
 	rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size);
 	if (rc)
-		return 0;
+		return;
 
 	/*
 	 * The cache range is expected to be within the CFMWS.
@@ -384,31 +386,10 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd)
 		dev_warn(&cxld->dev,
 			 "Extended Linear Cache size %pa != CXL size %pa. No Support!",
 			 &cache_size, &size);
-		return -ENXIO;
+		return;
 	}
 
 	cxlrd->cache_size = cache_size;
-
-	return 0;
-}
-
-static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd)
-{
-	int rc;
-
-	rc = cxl_acpi_set_cache_size(cxlrd);
-	if (rc) {
-		/*
-		 * Failing to retrieve extended linear cache region resize does not
-		 * prevent the region from functioning. Only causes cxl list showing
-		 * incorrect region size.
-		 */
-		dev_warn(cxlrd->cxlsd.cxld.dev.parent,
-			 "Extended linear cache retrieval failed rc:%d\n", rc);
-
-		/* Ignoring return code */
-		cxlrd->cache_size = 0;
-	}
 }
 
 DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *,

From 78dccb8b6ed30fff42600cd7920d8be2181f67b9 Mon Sep 17 00:00:00 2001
From: "Cheatham, Benjamin" <benjamin.cheatham@amd.com>
Date: Fri, 9 Jan 2026 07:57:38 -0600
Subject: [PATCH 092/143] cxl/core: Fix cxl_dport debugfs EINJ entries

Protocol error injection is only valid for CXL 2.0+ root ports and CXL
1.1 memory-mapped downstream ports as per the ACPI v6.5 spec (Table
8-31). The core code currently creates an 'einj_inject' file in CXL debugfs
for all CXL 1.1 downstream ports and all PCI CXL 2.0+ downstream ports.
This results in debugfs EINJ files that won't work due to platform/spec
restrictions.

Fix by limiting 'einj_inject' file creation to only CXL 1.1 dports and
CXL 2.0+ root ports. Update the comment above the check to more accurately
represent the requirements expected by the EINJ module and ACPI spec.

Fixes: 8039804cfa73 ("cxl/core: Add CXL EINJ debugfs files")
Signed-off-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://patch.msgid.link/6e9fb657-8264-4028-92e2-5428e2695bf1@amd.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 4ed7952b9e87cf731ebc8251874416e60eb15230)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/port.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index fef3aa0c6680c..54f72452fb062 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -822,16 +822,18 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject,
 
 static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport)
 {
+	struct cxl_port *parent = parent_port_of(dport->port);
 	struct dentry *dir;
 
 	if (!einj_cxl_is_initialized())
 		return;
 
 	/*
-	 * dport_dev needs to be a PCIe port for CXL 2.0+ ports because
-	 * EINJ expects a dport SBDF to be specified for 2.0 error injection.
+	 * Protocol error injection is only available for CXL 2.0+ root ports
+	 * and CXL 1.1 downstream ports
 	 */
-	if (!dport->rch && !dev_is_pci(dport->dport_dev))
+	if (!dport->rch &&
+	    !(dev_is_pci(dport->dport_dev) && parent && is_cxl_root(parent)))
 		return;
 
 	dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev));

From 7ab30cf2502cd8ac35744bb3a3e2b3a06211e3d2 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Thu, 15 Jan 2026 20:58:36 -0800
Subject: [PATCH 093/143] cxl/region: Translate DPA->HPA in unaligned MOD3
 regions

The CXL driver implementation of DPA->HPA address translation depends
on a region's starting address always being aligned to Host Bridge
Interleave Ways * 256MB. The driver follows the decode methods
defined in the CXL Spec[1] and expanded upon in the CXL Driver Writers
Guide[2], which describe bit manipulations based on power-of-2
alignment to translate a DPA to an HPA.

With the introduction of MOD3 interleave way support, platforms may
create regions at starting addresses that are not power-of-2 aligned.
This allows platforms to avoid gaps in the memory map, but addresses
within those regions cannot be translated using the existing bit
manipulation method.

Introduce an unaligned translation method for DPA->HPA that
reconstructs an HPA by restoring the address first at the port level
and then at the host bridge level.

[1] CXL Spec 4.0 8.2.4.20.13 Implementation Note Device Decoder Logic
[2] CXL Type 3 Memory Software Guide 1.1 2.13.25 DPA to HPA Translation

Suggested-by: Qing Huang <qing.huang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/e7c53215bf69f2ff1ae7e58bcc49ca387b7b0299.1768538962.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit e639055f1f30311db91cafb36e408cc727c7d445)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 160 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 155 insertions(+), 5 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 2ef7ac530f4d8..7d5c9ee6bfc2b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3112,13 +3112,146 @@ u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig)
 }
 EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate");
 
+static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port,
+		      int *pos_hb)
+{
+	int devices_per_hb;
+
+	/*
+	 * Decode for 3-6-12 way interleaves as defined in the CXL
+	 * Spec 4.0 9.13.1.1 Legal Interleaving Configurations.
+	 * Region creation should prevent invalid combinations but
+	 * sanity check here to avoid a silent bad decode.
+	 */
+	switch (hb_ways) {
+	case 3:
+		if (region_ways != 3 && region_ways != 6 && region_ways != 12)
+			return -EINVAL;
+		break;
+	case 6:
+		if (region_ways != 6 && region_ways != 12)
+			return -EINVAL;
+		break;
+	case 12:
+		if (region_ways != 12)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+	/*
+	 * Each host bridge contributes an equal number of endpoints
+	 * that are laid out contiguously per host bridge. Modulo
+	 * selects the port within a host bridge and division selects
+	 * the host bridge position.
+	 */
+	devices_per_hb = region_ways / hb_ways;
+	*pos_port = pos % devices_per_hb;
+	*pos_hb = pos / devices_per_hb;
+
+	return 0;
+}
+
+/*
+ * restore_parent() reconstruct the address in parent
+ *
+ * This math, specifically the bitmask creation 'mask = gran - 1' relies
+ * on the CXL Spec requirement that interleave granularity is always a
+ * power of two.
+ *
+ * [mask]		isolate the offset with the granularity
+ * [addr & ~mask]	remove the offset leaving the aligned portion
+ * [* ways]		distribute across all interleave ways
+ * [+ (pos * gran)]	add the positional offset
+ * [+ (addr & mask)]	restore the masked offset
+ */
+static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways)
+{
+	u64 mask = gran - 1;
+
+	return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask);
+}
+
+/*
+ * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource
+ * start address is not aligned at Host Bridge Interleave Ways * 256MB.
+ *
+ * Unaligned start addresses only occur with MOD3 interleaves. All power-
+ * of-two interleaves are guaranteed aligned.
+ */
+static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld,
+				struct cxl_region_params *p, int pos, u64 dpa)
+{
+	int ways_port = p->interleave_ways / cxld->interleave_ways;
+	int gran_port = p->interleave_granularity;
+	int gran_hb = cxld->interleave_granularity;
+	int ways_hb = cxld->interleave_ways;
+	int pos_port, pos_hb, gran_shift;
+	u64 hpa_port = 0;
+
+	/* Decode an endpoint 'pos' into port and host-bridge components */
+	if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) {
+		dev_dbg(&cxld->dev, "not supported for region ways:%d\n",
+			p->interleave_ways);
+		return ULLONG_MAX;
+	}
+
+	/* Restore the port parent address if needed */
+	if (gran_hb != gran_port)
+		hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port);
+	else
+		hpa_port = dpa;
+
+	/*
+	 * Complete the HPA reconstruction by restoring the address as if
+	 * each HB position is a candidate. Test against expected pos_hb
+	 * to confirm match.
+	 */
+	gran_shift = ilog2(gran_hb);
+	for (int position = 0; position < ways_hb; position++) {
+		u64 shifted, hpa;
+
+		hpa = restore_parent(hpa_port, position, gran_hb, ways_hb);
+		hpa += p->res->start;
+
+		shifted = hpa >> gran_shift;
+		if (do_div(shifted, ways_hb) == pos_hb)
+			return hpa;
+	}
+
+	dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res,
+		pos);
+	dev_dbg(&cxld->dev, "     port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n",
+		ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb);
+
+	return ULLONG_MAX;
+}
+
+static bool region_is_unaligned_mod3(struct cxl_region *cxlr)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct cxl_region_params *p = &cxlr->params;
+	int hbiw = cxld->interleave_ways;
+	u64 rem;
+
+	if (is_power_of_2(hbiw))
+		return false;
+
+	div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem);
+
+	return (rem != 0);
+}
+
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa)
 {
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_endpoint_decoder *cxled = NULL;
 	u64 dpa_offset, hpa_offset, hpa;
+	bool unaligned = false;
 	u16 eig = 0;
 	u8 eiw = 0;
 	int pos;
@@ -3132,15 +3265,32 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 	if (!cxled)
 		return ULLONG_MAX;
 
+	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
+
+	/* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */
+	unaligned = region_is_unaligned_mod3(cxlr);
+	if (unaligned) {
+		hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset);
+		if (hpa == ULLONG_MAX)
+			return ULLONG_MAX;
+
+		goto skip_aligned;
+	}
+	/*
+	 * Aligned calc for all power-of-2 interleaves and for MOD3
+	 * interleaves that are aligned at hbiw * 256MB
+	 */
 	pos = cxled->pos;
 	ways_to_eiw(p->interleave_ways, &eiw);
 	granularity_to_eig(p->interleave_granularity, &eig);
 
-	dpa_offset = dpa - cxl_dpa_resource_start(cxled);
 	hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig);
 
 	/* Apply the hpa_offset to the region base address */
-	hpa = hpa_offset + p->res->start + p->cache_size;
+	hpa = hpa_offset + p->res->start;
+
+skip_aligned:
+	hpa += p->cache_size;
 
 	/* Root decoder translation overrides typical modulo decode */
 	if (cxlrd->ops.hpa_to_spa)
@@ -3151,9 +3301,9 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 			"Addr trans fail: hpa 0x%llx not in region\n", hpa);
 		return ULLONG_MAX;
 	}
-
-	/* Simple chunk check, by pos & gran, only applies to modulo decodes */
-	if (!cxlrd->ops.hpa_to_spa && !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
+	/* Chunk check applies to aligned modulo decodes only */
+	if (!unaligned && !cxlrd->ops.hpa_to_spa &&
+	    !cxl_is_hpa_in_chunk(hpa, cxlr, pos))
 		return ULLONG_MAX;
 
 	return hpa;

From c210b5788d5b72adcf1ea5feae4e6f49de4ec748 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Thu, 15 Jan 2026 20:58:37 -0800
Subject: [PATCH 094/143] cxl/region: Translate HPA to DPA and memdev in
 unaligned regions

The CXL driver supports an expert user debugfs interface to inject and
clear poison by a region offset. That feature requires translating a
HPA (the region address) to a DPA and a memdev to perform the poison
operation.

Unaligned regions do not have an algebraically invertible mapping
from HPA to DPA due to the region offset skew. The region base is not
aligned to a full interleave. Add a helper to perform the unaligned
translations that first calculates the DPA offset and then tests it
against each candidate endpoint decoder.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/f338b7aff7e4574fcc525b1a0d4f09786bfb6489.1768538962.git.alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit b51792fd9168e581e51be98e22df5f79454e22de)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 46 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 7d5c9ee6bfc2b..8bacef7a4d11b 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3314,6 +3314,48 @@ struct dpa_result {
 	u64 dpa;
 };
 
+static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr,
+						 u64 offset,
+						 struct dpa_result *result)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct cxl_region_params *p = &cxlr->params;
+	u64 interleave_width, interleave_index;
+	u64 gran, gran_offset, dpa_offset;
+	u64 hpa = p->res->start + offset;
+
+	/*
+	 * Unaligned addresses are not algebraically invertible. Calculate
+	 * a dpa_offset independent of the target device and then enumerate
+	 * and test that dpa_offset against each candidate endpoint decoder.
+	 */
+	gran = cxld->interleave_granularity;
+	interleave_width = gran * cxld->interleave_ways;
+	interleave_index = div64_u64(offset, interleave_width);
+	gran_offset = div64_u64_rem(offset, gran, NULL);
+
+	dpa_offset = interleave_index * gran + gran_offset;
+
+	for (int i = 0; i < p->nr_targets; i++) {
+		struct cxl_endpoint_decoder *cxled = p->targets[i];
+		int pos = cxled->pos;
+		u64 test_hpa;
+
+		test_hpa = unaligned_dpa_to_hpa(cxld, p, pos, dpa_offset);
+		if (test_hpa == hpa) {
+			result->cxlmd = cxled_to_memdev(cxled);
+			result->dpa =
+				cxl_dpa_resource_start(cxled) + dpa_offset;
+			return 0;
+		}
+	}
+	dev_err(&cxlr->dev,
+		"failed to resolve HPA %#llx in unaligned MOD3 region\n", hpa);
+
+	return -ENXIO;
+}
+
 static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
 				       struct dpa_result *result)
 {
@@ -3343,6 +3385,10 @@ static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset,
 		hpa_offset = offset;
 	}
 
+	if (region_is_unaligned_mod3(cxlr))
+		return unaligned_region_offset_to_dpa_result(cxlr, offset,
+							     result);
+
 	pos = cxl_calculate_position(hpa_offset, eiw, eig);
 	if (pos < 0 || pos >= p->nr_targets) {
 		dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n",

From 84743e0d681d2c796964445f6ec18c1fce157b44 Mon Sep 17 00:00:00 2001
From: Alison Schofield <alison.schofield@intel.com>
Date: Fri, 16 Jan 2026 20:47:30 -0800
Subject: [PATCH 095/143] cxl/region: Use do_div() for 64-bit modulo operation

div64_u64_rem() was the wrong choice for doing a modulo operation
and it was used incorrectly, causing a kernel oops by passing NULL
as the remainder parameter. Replace it with the do_div() helper
that does the intended math (gran_offset = offset % gran) and is
architecture safe.

This bug appeared during testing of unaligned address translations.
The visibility to userspace would be limited to folks doing poison
injection or clear by HPA on unaligned regions.

Fixes: 78b50b598462 ("cxl/region: Translate HPA to DPA and memdev in unaligned regions")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20260117044732.567831-1-alison.schofield@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 064c098790944fa44f6aa704eb55a5c3ed65a2fa)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 8bacef7a4d11b..dee25d90b3e49 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3324,6 +3324,7 @@ static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr,
 	u64 interleave_width, interleave_index;
 	u64 gran, gran_offset, dpa_offset;
 	u64 hpa = p->res->start + offset;
+	u64 tmp = offset;
 
 	/*
 	 * Unaligned addresses are not algebraically invertible. Calculate
@@ -3333,7 +3334,7 @@ static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr,
 	gran = cxld->interleave_granularity;
 	interleave_width = gran * cxld->interleave_ways;
 	interleave_index = div64_u64(offset, interleave_width);
-	gran_offset = div64_u64_rem(offset, gran, NULL);
+	gran_offset = do_div(tmp, gran);
 
 	dpa_offset = interleave_index * gran + gran_offset;
 

From 90497a70c95764d37378914e517db51420d72d01 Mon Sep 17 00:00:00 2001
From: Yuxiong Wang <yuxiong.wang@linux.alibaba.com>
Date: Thu, 29 Jan 2026 14:45:52 +0800
Subject: [PATCH 096/143] cxl: Fix premature commit_end increment on decoder
 commit failure

In cxl_decoder_commit(), commit_end is incremented before verifying
whether the commit succeeded, and the CXL_DECODER_F_ENABLE bit in
cxld->flags is only set after a successful commit. As a result, if the
commit fails, commit_end has been incremented and cxld->reset() has no
effect since the flag is not set, so commit_end remains incorrectly
incremented. The inconsistency between commit_end and CXL_DECODER_F_ENABLE
causes failure during subsequent either commit or reset operations.

Fix this by incrementing commit_end only after confirming the commit
succeeded. Also, remove the ineffective cxld->reset() call. According to
CXL Spec r4.0 8.2.4.20.12 Committing Decoder Programming, since
cxld_await_commit() has cleared the decoder commit bit on failure, no
additional reset is required.

[dj: Fixed commit log 80 char wrapping. ]
[dj: Fix "Fixes" tag to correct hash length. ]
[dj: Change spec to r4.0. ]

Fixes: 176baefb2eb5 ("cxl/hdm: Commit decoder state to hardware")
Signed-off-by: Yuxiong Wang <yuxiong.wang@linux.alibaba.com>
Acked-by: Huang Ying <ying.huang@linux.alibaba.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Link: https://patch.msgid.link/20260129064552.31180-1-yuxiong.wang@linux.alibaba.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 7b6f9d9b1ea05c9c22570126547c780e8c6c3f62)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 35b34b8c50763..061f364cc9a00 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -844,14 +844,13 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld)
 	scoped_guard(rwsem_read, &cxl_rwsem.dpa)
 		setup_hw_decoder(cxld, hdm);
 
-	port->commit_end++;
 	rc = cxld_await_commit(hdm, cxld->id);
 	if (rc) {
 		dev_dbg(&port->dev, "%s: error %d committing decoder\n",
 			dev_name(&cxld->dev), rc);
-		cxld->reset(cxld);
 		return rc;
 	}
+	port->commit_end++;
 	cxld->flags |= CXL_DECODER_F_ENABLE;
 
 	return 0;

From c46d21a21708aaa521f496b225649c1ee867ccc3 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:24 -0600
Subject: [PATCH 097/143] PCI: Introduce pcie_is_cxl()

CXL is a protocol that runs on top of PCIe electricals. Its error model
also runs on top of the PCIe AER error model by standardizing "internal"
errors as "CXL" errors. Linux has historically ignored internal errors.

CXL protocol error handling is then a task of enhancing the PCIe AER
core to understand that PCIe ports (upstream and downstream) and
endpoints may throw internal errors that represent standard CXL protocol
errors.

The proposed method to make that determination is to teach 'struct
pci_dev' to cache when its link has trained the CXL.mem and/or CXL.cache
protocols and then treat all internal errors as CXL errors. A design
goal is to not burden the PCIe AER core with CXL knowledge beyond just
enough to forward error notifications to the CXL RAS core. The forwarded
notification looks up a 'struct cxl_port' or 'struct cxl_dport'
companion device to the PCI device.

Introduce set_pcie_cxl() with logic checking for CXL.mem or CXL.cache
status in the CXL Flex Bus DVSEC status register. The CXL Flex Bus DVSEC
presence is used because it is required for all the CXL PCIe devices.[1]

[1] CXL 3.1 Spec, 8.1.1 PCIe Designated Vendor-Specific Extended
    Capability (DVSEC) ID Assignment, Table 8-2

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260114182055.46029-4-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 7c29ba02210c6e4570cdce53813a1ae68fb6d049)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/probe.c           | 31 +++++++++++++++++++++++++++++++
 include/linux/pci.h           |  6 ++++++
 include/uapi/linux/pci_regs.h |  6 ++++++
 3 files changed, 43 insertions(+)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index eb084877bb043..06cb9081d4ac4 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1692,6 +1692,35 @@ static void set_pcie_thunderbolt(struct pci_dev *dev)
 		dev->is_thunderbolt = 1;
 }
 
+static void set_pcie_cxl(struct pci_dev *dev)
+{
+	struct pci_dev *bridge;
+	u16 dvsec, cap;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	/*
+	 * Update parent's CXL state because alternate protocol training
+	 * may have changed
+	 */
+	bridge = pci_upstream_bridge(dev);
+	if (bridge)
+		set_pcie_cxl(bridge);
+
+	dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_FLEXBUS_PORT);
+	if (!dvsec)
+		return;
+
+	pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS,
+			     &cap);
+
+	dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) ||
+		FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap);
+
+}
+
 static void set_pcie_untrusted(struct pci_dev *dev)
 {
 	struct pci_dev *parent = pci_upstream_bridge(dev);
@@ -2022,6 +2051,8 @@ int pci_setup_device(struct pci_dev *dev)
 	/* Need to have dev->cfg_size ready */
 	set_pcie_thunderbolt(dev);
 
+	set_pcie_cxl(dev);
+
 	set_pcie_untrusted(dev);
 
 	if (pci_is_pcie(dev))
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1bdfd152eb1f8..a03cdd8c96122 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -466,6 +466,7 @@ struct pci_dev {
 	unsigned int	is_pciehp:1;
 	unsigned int	shpc_managed:1;		/* SHPC owned by shpchp */
 	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
+	unsigned int	is_cxl:1;               /* Compute Express Link (CXL) */
 	/*
 	 * Devices marked being untrusted are the ones that can potentially
 	 * execute DMA attacks and similar. They are typically connected
@@ -773,6 +774,11 @@ static inline bool pci_is_display(struct pci_dev *pdev)
 	return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY;
 }
 
+static inline bool pcie_is_cxl(struct pci_dev *pci_dev)
+{
+	return pci_dev->is_cxl;
+}
+
 #define for_each_pci_bridge(dev, bus)				\
 	list_for_each_entry(dev, &bus->devices, bus_list)	\
 		if (!pci_is_bridge(dev)) {} else
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 22e22cea2c4f2..49848c6765270 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1281,6 +1281,12 @@
 /* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */
 #define PCI_DVSEC_CXL_DEVICE_GPF			5
 
+/* CXL r4.0, 8.1.8: Flex Bus DVSEC */
+#define PCI_DVSEC_CXL_FLEXBUS_PORT			7
+#define  PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS		0xE
+#define   PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE	_BITUL(0)
+#define   PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM		_BITUL(2)
+
 /* CXL r4.0, 8.1.9: Register Locator DVSEC */
 #define PCI_DVSEC_CXL_REG_LOCATOR			8
 #define  PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1		0xC

From 55cdd3eb04e827b4aedeabc00ef364f82312119c Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:27 -0600
Subject: [PATCH 098/143] PCI: Replace cxl_error_is_native() with
 pcie_aer_is_native()

The AER driver includes a CXL support function cxl_error_is_native(). This
function adds no additional value from pcie_aer_is_native().

Simplify the codebase by removing cxl_error_is_native() and replace
occurrences of cxl_error_is_native() with pcie_aer_is_native().

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260114182055.46029-7-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit bcfa289932a703dd189466ea5947212e8dddd399)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pcie/aer.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 3dba9c0c6ae11..9f4985fba50b9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1140,13 +1140,6 @@ static bool is_cxl_mem_dev(struct pci_dev *dev)
 	return true;
 }
 
-static bool cxl_error_is_native(struct pci_dev *dev)
-{
-	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
-
-	return (pcie_ports_native || host->native_aer);
-}
-
 static bool is_internal_error(struct aer_err_info *info)
 {
 	if (info->severity == AER_CORRECTABLE)
@@ -1160,7 +1153,7 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
 	struct aer_err_info *info = (struct aer_err_info *)data;
 	const struct pci_error_handlers *err_handler;
 
-	if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+	if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev))
 		return 0;
 
 	/* Protect dev->driver */
@@ -1201,7 +1194,7 @@ static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
 	bool *handles_cxl = data;
 
 	if (!*handles_cxl)
-		*handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+		*handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev);
 
 	/* Non-zero terminates iteration */
 	return *handles_cxl;

From bf964029c3486009a54ef9d50329360b73c73732 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:30 -0600
Subject: [PATCH 099/143] PCI/AER: Export pci_aer_unmask_internal_errors()

Internal PCIe errors are not enabled by default during initialization
because their behavior is too device-specific and there is no standard way
to reason about them. However, for CXL an internal error is the standard
mechanism for conveying CXL protocol errors.

Export pci_aer_unmask_internal_errors() for CXL, but make it clear that
they are only meant for CXL and the status quo for leaving them masked for
PCIe in general remains.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-10-terry.bowman@amd.com
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 6dc5fe212e74e6880a1da0093f627387d0a658bb)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pcie/aer.c | 11 ++++++++---
 include/linux/aer.h    |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 9f4985fba50b9..1ec0193ab1cc9 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1094,8 +1094,6 @@ static bool find_source_device(struct pci_dev *parent,
 	return true;
 }
 
-#ifdef CONFIG_PCIEAER_CXL
-
 /**
  * pci_aer_unmask_internal_errors - unmask internal errors
  * @dev: pointer to the pci_dev data structure
@@ -1106,7 +1104,7 @@ static bool find_source_device(struct pci_dev *parent,
  * Note: AER must be enabled and supported by the device which must be
  * checked in advance, e.g. with pcie_aer_is_native().
  */
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 {
 	int aer = dev->aer_cap;
 	u32 mask;
@@ -1120,6 +1118,13 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 	pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
 }
 
+/*
+ * Internal errors are too device-specific to enable generally, however for CXL
+ * their behavior is standardized for conveying CXL protocol errors.
+ */
+EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core");
+
+#ifdef CONFIG_PCIEAER_CXL
 static bool is_cxl_mem_dev(struct pci_dev *dev)
 {
 	/*
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 02940be66324e..df0f5c382286f 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -56,12 +56,14 @@ struct aer_capability_regs {
 #if defined(CONFIG_PCIEAER)
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 int pcie_aer_is_native(struct pci_dev *dev);
+void pci_aer_unmask_internal_errors(struct pci_dev *dev);
 #else
 static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
 static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
+static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { }
 #endif
 
 void pci_print_aer(struct pci_dev *dev, int aer_severity,

From 7d34b727c1a1c9b9e0bf9e876fb09ce29fcfac69 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:31 -0600
Subject: [PATCH 100/143] PCI/AER: Update is_internal_error() to be non-static
 is_aer_internal_error()

The AER driver includes significant logic for handling CXL protocol errors.
The AER driver will be updated in the future to separate the AER and CXL
logic.

Rename the is_internal_error() function to is_aer_internal_error() as it
gives a more precise indication of the purpose. Make
is_aer_internal_error() non-static to allow for the 2 different CXL
topology error model implementations (RCH and VH) to share this helper.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Link: https://patch.msgid.link/20260114182055.46029-11-terry.bowman@amd.com
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 51ce56b1a5d6f7263739d4766ae445463c74b689)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pcie/aer.c     | 4 ++--
 drivers/pci/pcie/portdrv.h | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 1ec0193ab1cc9..8bb894f9b152c 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1145,7 +1145,7 @@ static bool is_cxl_mem_dev(struct pci_dev *dev)
 	return true;
 }
 
-static bool is_internal_error(struct aer_err_info *info)
+bool is_aer_internal_error(struct aer_err_info *info)
 {
 	if (info->severity == AER_CORRECTABLE)
 		return info->status & PCI_ERR_COR_INTERNAL;
@@ -1190,7 +1190,7 @@ static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
 	 * device driver.
 	 */
 	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
-	    is_internal_error(info))
+	    is_aer_internal_error(info))
 		pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
 }
 
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index bd29d1cc7b8bd..e7a0a2cffea93 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -123,4 +123,13 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {}
 #endif /* !CONFIG_PCIE_PME */
 
 struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
+
+struct aer_err_info;
+
+#ifdef CONFIG_PCIEAER_CXL
+bool is_aer_internal_error(struct aer_err_info *info);
+#else
+static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; }
+#endif /* CONFIG_PCIEAER_CXL */
+
 #endif /* _PORTDRV_H_ */

From 115b1e171dbe7847829c4ce3059ad7566d3e887e Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:32 -0600
Subject: [PATCH 101/143] PCI/AER: Move CXL RCH error handling to aer_cxl_rch.c

The Restricted CXL Host (RCH) AER error handling logic currently resides
in the AER driver file, aer.c. CXL specific changes conditionally compiled
using #ifdefs.

Improve the AER driver maintainability by separating the RCH specific logic
from the AER driver's core functionality and removing the ifdefs. Introduce
drivers/pci/pcie/aer_cxl_rch.c for moving the RCH AER logic into. Conditionally
compile the file using the CONFIG_CXL_RCH_RAS Kconfig.

Move the CXL logic into the new file but leave CXL helper function
is_internal_error() in aer.c for now as it will be moved in future patch
for CXL Virtual Hierarchy handling.

To maintain compilation after the move other changes are required. Change
cxl_rch_handle_error(), cxl_rch_enable_rcec(), and is_internal_error() to
be non-static inorder for accessing from the AER driver.

Update the new file with the SPDX and 2023 AMD copyright notations because
the RCH bits were initially contributed in 2023 by AMD. See commit:
commit 0a867568bb0d ("PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler")

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://patch.msgid.link/20260114182055.46029-12-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 59010029faf27c82d1e786dfd1fb83b09f478d1b)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pcie/Makefile      |   1 +
 drivers/pci/pcie/aer.c         |  99 +-----------------------------
 drivers/pci/pcie/aer_cxl_rch.c | 106 +++++++++++++++++++++++++++++++++
 drivers/pci/pcie/portdrv.h     |   9 ++-
 4 files changed, 114 insertions(+), 101 deletions(-)
 create mode 100644 drivers/pci/pcie/aer_cxl_rch.c

diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 173829aa02e60..b0b43a18c304b 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o bwctrl.o
 
 obj-y				+= aspm.o
 obj-$(CONFIG_PCIEAER)		+= aer.o err.o tlp.o
+obj-$(CONFIG_CXL_RAS)		+= aer_cxl_rch.o
 obj-$(CONFIG_PCIEAER_INJECT)	+= aer_inject.o
 obj-$(CONFIG_PCIE_PME)		+= pme.o
 obj-$(CONFIG_PCIE_DPC)		+= dpc.o
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 8bb894f9b152c..95a829b6c0889 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1124,27 +1124,7 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev)
  */
 EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core");
 
-#ifdef CONFIG_PCIEAER_CXL
-static bool is_cxl_mem_dev(struct pci_dev *dev)
-{
-	/*
-	 * The capability, status, and control fields in Device 0,
-	 * Function 0 DVSEC control the CXL functionality of the
-	 * entire device (CXL 3.0, 8.1.3).
-	 */
-	if (dev->devfn != PCI_DEVFN(0, 0))
-		return false;
-
-	/*
-	 * CXL Memory Devices must have the 502h class code set (CXL
-	 * 3.0, 8.1.12.1).
-	 */
-	if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
-		return false;
-
-	return true;
-}
-
+#ifdef CONFIG_CXL_RAS
 bool is_aer_internal_error(struct aer_err_info *info)
 {
 	if (info->severity == AER_CORRECTABLE)
@@ -1152,83 +1132,6 @@ bool is_aer_internal_error(struct aer_err_info *info)
 
 	return info->status & PCI_ERR_UNC_INTN;
 }
-
-static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
-{
-	struct aer_err_info *info = (struct aer_err_info *)data;
-	const struct pci_error_handlers *err_handler;
-
-	if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev))
-		return 0;
-
-	/* Protect dev->driver */
-	device_lock(&dev->dev);
-
-	err_handler = dev->driver ? dev->driver->err_handler : NULL;
-	if (!err_handler)
-		goto out;
-
-	if (info->severity == AER_CORRECTABLE) {
-		if (err_handler->cor_error_detected)
-			err_handler->cor_error_detected(dev);
-	} else if (err_handler->error_detected) {
-		if (info->severity == AER_NONFATAL)
-			err_handler->error_detected(dev, pci_channel_io_normal);
-		else if (info->severity == AER_FATAL)
-			err_handler->error_detected(dev, pci_channel_io_frozen);
-	}
-out:
-	device_unlock(&dev->dev);
-	return 0;
-}
-
-static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
-{
-	/*
-	 * Internal errors of an RCEC indicate an AER error in an
-	 * RCH's downstream port. Check and handle them in the CXL.mem
-	 * device driver.
-	 */
-	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
-	    is_aer_internal_error(info))
-		pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
-}
-
-static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
-{
-	bool *handles_cxl = data;
-
-	if (!*handles_cxl)
-		*handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev);
-
-	/* Non-zero terminates iteration */
-	return *handles_cxl;
-}
-
-static bool handles_cxl_errors(struct pci_dev *rcec)
-{
-	bool handles_cxl = false;
-
-	if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
-	    pcie_aer_is_native(rcec))
-		pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
-
-	return handles_cxl;
-}
-
-static void cxl_rch_enable_rcec(struct pci_dev *rcec)
-{
-	if (!handles_cxl_errors(rcec))
-		return;
-
-	pci_aer_unmask_internal_errors(rcec);
-	pci_info(rcec, "CXL: Internal errors unmasked");
-}
-
-#else
-static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
-static inline void cxl_rch_handle_error(struct pci_dev *dev,
-					struct aer_err_info *info) { }
 #endif
 
 /**
diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c
new file mode 100644
index 0000000000000..6b515edb12c15
--- /dev/null
+++ b/drivers/pci/pcie/aer_cxl_rch.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 AMD Corporation. All rights reserved. */
+
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/bitfield.h>
+#include "../pci.h"
+#include "portdrv.h"
+
+static bool is_cxl_mem_dev(struct pci_dev *dev)
+{
+	/*
+	 * The capability, status, and control fields in Device 0,
+	 * Function 0 DVSEC control the CXL functionality of the
+	 * entire device (CXL 3.0, 8.1.3).
+	 */
+	if (dev->devfn != PCI_DEVFN(0, 0))
+		return false;
+
+	/*
+	 * CXL Memory Devices must have the 502h class code set (CXL
+	 * 3.0, 8.1.12.1).
+	 */
+	if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
+		return false;
+
+	return true;
+}
+
+static bool cxl_error_is_native(struct pci_dev *dev)
+{
+	struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
+
+	return (pcie_ports_native || host->native_aer);
+}
+
+static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
+{
+	struct aer_err_info *info = (struct aer_err_info *)data;
+	const struct pci_error_handlers *err_handler;
+
+	if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
+		return 0;
+
+	device_lock(&dev->dev);
+
+	err_handler = dev->driver ? dev->driver->err_handler : NULL;
+	if (!err_handler)
+		goto out;
+
+	if (info->severity == AER_CORRECTABLE) {
+		if (err_handler->cor_error_detected)
+			err_handler->cor_error_detected(dev);
+	} else if (err_handler->error_detected) {
+		if (info->severity == AER_NONFATAL)
+			err_handler->error_detected(dev, pci_channel_io_normal);
+		else if (info->severity == AER_FATAL)
+			err_handler->error_detected(dev, pci_channel_io_frozen);
+	}
+out:
+	device_unlock(&dev->dev);
+	return 0;
+}
+
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
+{
+	/*
+	 * Internal errors of an RCEC indicate an AER error in an
+	 * RCH's downstream port. Check and handle them in the CXL.mem
+	 * device driver.
+	 */
+	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
+	    is_aer_internal_error(info))
+		pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+}
+
+static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
+{
+	bool *handles_cxl = data;
+
+	if (!*handles_cxl)
+		*handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
+
+	/* Non-zero terminates iteration */
+	return *handles_cxl;
+}
+
+static bool handles_cxl_errors(struct pci_dev *rcec)
+{
+	bool handles_cxl = false;
+
+	if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
+	    pcie_aer_is_native(rcec))
+		pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
+
+	return handles_cxl;
+}
+
+void cxl_rch_enable_rcec(struct pci_dev *rcec)
+{
+	if (!handles_cxl_errors(rcec))
+		return;
+
+	pci_aer_unmask_internal_errors(rcec);
+	pci_info(rcec, "CXL: Internal errors unmasked");
+}
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index e7a0a2cffea93..cc58bf2f2c844 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -126,10 +126,13 @@ struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
 
 struct aer_err_info;
 
-#ifdef CONFIG_PCIEAER_CXL
+#ifdef CONFIG_CXL_RAS
 bool is_aer_internal_error(struct aer_err_info *info);
+void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info);
+void cxl_rch_enable_rcec(struct pci_dev *rcec);
 #else
 static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; }
-#endif /* CONFIG_PCIEAER_CXL */
-
+static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { }
+static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { }
+#endif /* CONFIG_CXL_RAS */
 #endif /* _PORTDRV_H_ */

From fa00d6812c94691961f0b52143748f536649d340 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:33 -0600
Subject: [PATCH 102/143] PCI/AER: Use guard() in cxl_rch_handle_error_iter()

cxl_rch_handle_error_iter() includes a call to device_lock() using a goto
for multiple return paths. Improve readability and maintainability by
using the guard() lock variant.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://patch.msgid.link/20260114182055.46029-13-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit da71bd360ded15626dabd59dd1d6939de38cab39)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pcie/aer_cxl_rch.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c
index 6b515edb12c15..e471eefec9c40 100644
--- a/drivers/pci/pcie/aer_cxl_rch.c
+++ b/drivers/pci/pcie/aer_cxl_rch.c
@@ -42,11 +42,11 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
 	if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
 		return 0;
 
-	device_lock(&dev->dev);
+	guard(device)(&dev->dev);
 
 	err_handler = dev->driver ? dev->driver->err_handler : NULL;
 	if (!err_handler)
-		goto out;
+		return 0;
 
 	if (info->severity == AER_CORRECTABLE) {
 		if (err_handler->cor_error_detected)
@@ -57,8 +57,6 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
 		else if (info->severity == AER_FATAL)
 			err_handler->error_detected(dev, pci_channel_io_frozen);
 	}
-out:
-	device_unlock(&dev->dev);
 	return 0;
 }
 

From 4c19ee278cd408d47446812388d75d905372a3d1 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Wed, 14 Jan 2026 12:20:35 -0600
Subject: [PATCH 103/143] PCI/AER: Report CXL or PCIe bus type in AER trace
 logging

The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
for all errors. Update the driver and aer_event tracing to log 'CXL Bus
Type' for CXL device errors.

This requires that AER can identify and distinguish between PCIe errors and
CXL errors.

Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
aer_get_device_error_info() and pci_print_aer().

Update the aer_event trace routine to accept a bus type string parameter.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-15-terry.bowman@amd.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit 83cba5b31e6b0aeb32f41b9c954fe97b60db2817)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pci.h       |  8 +++++++-
 drivers/pci/pcie/aer.c  | 20 +++++++++++++-------
 include/ras/ras_event.h | 12 ++++++++----
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index ee7b515125826..7ed929cfd45dd 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -621,7 +621,8 @@ struct aer_err_info {
 	unsigned int multi_error_valid:1;
 
 	unsigned int first_error:5;
-	unsigned int __pad2:2;
+	unsigned int __pad2:1;
+	unsigned int is_cxl:1;
 	unsigned int tlp_header_valid:1;
 
 	unsigned int status;		/* COR/UNCOR Error Status */
@@ -632,6 +633,11 @@ struct aer_err_info {
 int aer_get_device_error_info(struct aer_err_info *info, int i);
 void aer_print_error(struct aer_err_info *info, int i);
 
+static inline const char *aer_err_bus(struct aer_err_info *info)
+{
+	return info->is_cxl ? "CXL" : "PCIe";
+}
+
 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
 		      unsigned int tlp_len, bool flit,
 		      struct pcie_tlp_log *log);
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 95a829b6c0889..5331a1c908375 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -844,6 +844,7 @@ void aer_print_error(struct aer_err_info *info, int i)
 	struct pci_dev *dev;
 	int layer, agent, id;
 	const char *level = info->level;
+	const char *bus_type = aer_err_bus(info);
 
 	if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES))
 		return;
@@ -853,22 +854,22 @@ void aer_print_error(struct aer_err_info *info, int i)
 
 	pci_dev_aer_stats_incr(dev, info);
 	trace_aer_event(pci_name(dev), (info->status & ~info->mask),
-			info->severity, info->tlp_header_valid, &info->tlp);
+			info->severity, info->tlp_header_valid, &info->tlp, bus_type);
 
 	if (!info->ratelimit_print[i])
 		return;
 
 	if (!info->status) {
-		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
-			aer_error_severity_string[info->severity]);
+		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
+			bus_type, aer_error_severity_string[info->severity]);
 		goto out;
 	}
 
 	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
 	agent = AER_GET_AGENT(info->severity, info->status);
 
-	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
-		   aer_error_severity_string[info->severity],
+	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
+		   bus_type, aer_error_severity_string[info->severity],
 		   aer_error_layer[layer], aer_agent_string[agent]);
 
 	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
@@ -902,6 +903,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 void pci_print_aer(struct pci_dev *dev, int aer_severity,
 		   struct aer_capability_regs *aer)
 {
+	const char *bus_type;
 	int layer, agent, tlp_header_valid = 0;
 	u32 status, mask;
 	struct aer_err_info info = {
@@ -922,10 +924,13 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
 
 	info.status = status;
 	info.mask = mask;
+	info.is_cxl = pcie_is_cxl(dev);
+
+	bus_type = aer_err_bus(&info);
 
 	pci_dev_aer_stats_incr(dev, &info);
-	trace_aer_event(pci_name(dev), (status & ~mask),
-			aer_severity, tlp_header_valid, &aer->header_log);
+	trace_aer_event(pci_name(dev), (status & ~mask), aer_severity,
+			tlp_header_valid, &aer->header_log, bus_type);
 
 	if (!aer_ratelimit(dev, info.severity))
 		return;
@@ -1280,6 +1285,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i)
 	/* Must reset in this function */
 	info->status = 0;
 	info->tlp_header_valid = 0;
+	info->is_cxl = pcie_is_cxl(dev);
 
 	/* The device might not support AER */
 	if (!aer)
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index fecfeb7c8be7f..3523cc8597612 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -301,9 +301,11 @@ TRACE_EVENT(aer_event,
 		 const u32 status,
 		 const u8 severity,
 		 const u8 tlp_header_valid,
-		 struct pcie_tlp_log *tlp),
+		 struct pcie_tlp_log *tlp,
+		 const char *bus_type),
 
-	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
+
+	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp, bus_type),
 
 	TP_STRUCT__entry(
 		__string(	dev_name,	dev_name	)
@@ -311,10 +313,12 @@ TRACE_EVENT(aer_event,
 		__field(	u8,		severity	)
 		__field(	u8, 		tlp_header_valid)
 		__array(	u32, 		tlp_header, PCIE_STD_MAX_TLP_HEADERLOG)
+		__string(	bus_type,	bus_type	)
 	),
 
 	TP_fast_assign(
 		__assign_str(dev_name);
+		__assign_str(bus_type);
 		__entry->status		= status;
 		__entry->severity	= severity;
 		__entry->tlp_header_valid = tlp_header_valid;
@@ -326,8 +330,8 @@ TRACE_EVENT(aer_event,
 		}
 	),
 
-	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
-		__get_str(dev_name),
+	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
+		__get_str(dev_name), __get_str(bus_type),
 		__entry->severity == AER_CORRECTABLE ? "Corrected" :
 			__entry->severity == AER_FATAL ?
 			"Fatal" : "Uncorrected, non-fatal",

From 6b3be6b7a150f84a77bfffed07bdeba4b5558436 Mon Sep 17 00:00:00 2001
From: Terry Bowman <terry.bowman@amd.com>
Date: Mon, 19 Jan 2026 18:40:58 -0800
Subject: [PATCH 104/143] PCI/AER: Update struct aer_err_info with kernel-doc
 formatting

Update the existing 'struct aer_err_info' definition to use kernel-doc
formatting. Remove the inline comments to reduce noise and do not introduce
functional changes. This will improve readability and maintainability.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Link: https://patch.msgid.link/20260114182055.46029-16-terry.bowman@amd.com
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
(cherry picked from commit fda78d848178fb2b4eea74d96218c6c98fbe8562)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pci.h | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 7ed929cfd45dd..d4ae4eef89975 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -607,16 +607,35 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev)
 
 #define AER_MAX_MULTI_ERR_DEVICES	5	/* Not likely to have more */
 
+/**
+ * struct aer_err_info - AER Error Information
+ * @dev: Devices reporting error
+ * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log
+ * @__pad1: Padding for alignment
+ * @error_dev_num: Number of devices reporting an error
+ * @level: printk level to use in logging
+ * @id: Value from register PCI_ERR_ROOT_ERR_SRC
+ * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR
+ * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log
+ * @multi_error_valid: If multiple errors are reported
+ * @first_error: First reported error
+ * @__pad2: Padding for alignment
+ * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error
+ * @tlp_header_valid: Indicates if TLP field contains error information
+ * @status: COR/UNCOR error status
+ * @mask: COR/UNCOR mask
+ * @tlp: Transaction packet information
+ */
 struct aer_err_info {
 	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
 	int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES];
 	int error_dev_num;
-	const char *level;		/* printk level */
+	const char *level;
 
 	unsigned int id:16;
 
-	unsigned int severity:2;	/* 0:NONFATAL | 1:FATAL | 2:COR */
-	unsigned int root_ratelimit_print:1;	/* 0=skip, 1=print */
+	unsigned int severity:2;
+	unsigned int root_ratelimit_print:1;
 	unsigned int __pad1:4;
 	unsigned int multi_error_valid:1;
 
@@ -625,9 +644,9 @@ struct aer_err_info {
 	unsigned int is_cxl:1;
 	unsigned int tlp_header_valid:1;
 
-	unsigned int status;		/* COR/UNCOR Error Status */
-	unsigned int mask;		/* COR/UNCOR Error Mask */
-	struct pcie_tlp_log tlp;	/* TLP Header */
+	unsigned int status;
+	unsigned int mask;
+	struct pcie_tlp_log tlp;
 };
 
 int aer_get_device_error_info(struct aer_err_info *info, int i);

From dd9ca3b59da228165eb8383119a54603261fb837 Mon Sep 17 00:00:00 2001
From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Date: Tue, 10 Feb 2026 06:44:55 +0000
Subject: [PATCH 105/143] NVIDIA: VR: SAUCE: cxl/region: Skip decoder reset on
 detach for autodiscovered regions

__cxl_decoder_detach() currently resets decoder programming whenever a
region is detached if cxl_config_state is beyond CXL_CONFIG_ACTIVE. For
autodiscovered regions, this can incorrectly tear down decoder state
that may be relied upon by other consumers or by subsequent ownership
decisions.

Skip cxl_region_decode_reset() during detach when CXL_REGION_F_AUTO is
set.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alejandro Lucero <alucerop@amd.com>
Tested-by: Tomasz Wolski <tomasz.wolski@fujitsu.com>
(backported from https://lore.kernel.org/linux-cxl/20260210064501.157591-4-Smita.KoralahalliChannabasappa@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index dee25d90b3e49..50df9afac20ff 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2178,7 +2178,9 @@ __cxl_decoder_detach(struct cxl_region *cxlr,
 		cxled->part = -1;
 
 	if (p->state > CXL_CONFIG_ACTIVE) {
-		cxl_region_decode_reset(cxlr, p->interleave_ways);
+		if (!test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+			cxl_region_decode_reset(cxlr, p->interleave_ways);
+
 		p->state = CXL_CONFIG_ACTIVE;
 	}
 

From adc3833ba27c3f26e88f1427a92f6b138a452552 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:17 +0000
Subject: [PATCH 106/143] NVIDIA: VR: SAUCE: cxl: Add type2 device basic
 support

Differentiate CXL memory expanders (type 3) from CXL device accelerators
(type 2) with a new function for initializing cxl_dev_state and a macro
for helping accel drivers to embed cxl_dev_state inside a private
struct.

Move structs to include/cxl as the size of the accel driver private
struct embedding cxl_dev_state needs to know the size of this struct.

Use same new initialization with the type3 pci driver.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/mbox.c      |  12 +-
 drivers/cxl/core/memdev.c    |  32 +++++
 drivers/cxl/cxl.h            |  97 +--------------
 drivers/cxl/cxlmem.h         |  86 +------------
 drivers/cxl/pci.c            |  14 +--
 include/cxl/cxl.h            | 226 +++++++++++++++++++++++++++++++++++
 tools/testing/cxl/test/mem.c |   3 +-
 7 files changed, 274 insertions(+), 196 deletions(-)
 create mode 100644 include/cxl/cxl.h

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index fa6dd0c94656f..bee84d0101d1a 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1514,23 +1514,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
 
-struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
+struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial,
+						 u16 dvsec)
 {
 	struct cxl_memdev_state *mds;
 	int rc;
 
-	mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
+	mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial,
+					dvsec, struct cxl_memdev_state, cxlds,
+					true);
 	if (!mds) {
 		dev_err(dev, "No memory available\n");
 		return ERR_PTR(-ENOMEM);
 	}
 
 	mutex_init(&mds->event.log_lock);
-	mds->cxlds.dev = dev;
-	mds->cxlds.reg_map.host = dev;
-	mds->cxlds.cxl_mbox.host = dev;
-	mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
-	mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
 
 	rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
 	if (rc == -EOPNOTSUPP)
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index af3d0cc651387..22d156f25305d 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -656,6 +656,38 @@ static void detach_memdev(struct work_struct *work)
 
 static struct lock_class_key cxl_memdev_key;
 
+static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev,
+			       enum cxl_devtype type, u64 serial, u16 dvsec,
+			       bool has_mbox)
+{
+	*cxlds = (struct cxl_dev_state) {
+		.dev = dev,
+		.type = type,
+		.serial = serial,
+		.cxl_dvsec = dvsec,
+		.reg_map.host = dev,
+		.reg_map.resource = CXL_RESOURCE_NONE,
+	};
+
+	if (has_mbox)
+		cxlds->cxl_mbox.host = dev;
+}
+
+struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev,
+						 enum cxl_devtype type,
+						 u64 serial, u16 dvsec,
+						 size_t size, bool has_mbox)
+{
+	struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL);
+
+	if (!cxlds)
+		return NULL;
+
+	cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox);
+	return cxlds;
+}
+EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL");
+
 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 					   const struct file_operations *fops,
 					   const struct cxl_memdev_attach *attach)
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index e1d47062e1d3d..3eaa353e430b8 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -12,6 +12,7 @@
 #include <linux/node.h>
 #include <linux/io.h>
 #include <linux/range.h>
+#include <cxl/cxl.h>
 
 extern const struct nvdimm_security_ops *cxl_security_ops;
 
@@ -201,97 +202,6 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
 #define   CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48)
 #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
 
-/*
- * Using struct_group() allows for per register-block-type helper routines,
- * without requiring block-type agnostic code to include the prefix.
- */
-struct cxl_regs {
-	/*
-	 * Common set of CXL Component register block base pointers
-	 * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
-	 * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure
-	 */
-	struct_group_tagged(cxl_component_regs, component,
-		void __iomem *hdm_decoder;
-		void __iomem *ras;
-	);
-	/*
-	 * Common set of CXL Device register block base pointers
-	 * @status: CXL 2.0 8.2.8.3 Device Status Registers
-	 * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers
-	 * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers
-	 */
-	struct_group_tagged(cxl_device_regs, device_regs,
-		void __iomem *status, *mbox, *memdev;
-	);
-
-	struct_group_tagged(cxl_pmu_regs, pmu_regs,
-		void __iomem *pmu;
-	);
-
-	/*
-	 * RCH downstream port specific RAS register
-	 * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB
-	 */
-	struct_group_tagged(cxl_rch_regs, rch_regs,
-		void __iomem *dport_aer;
-	);
-
-	/*
-	 * RCD upstream port specific PCIe cap register
-	 * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB
-	 */
-	struct_group_tagged(cxl_rcd_regs, rcd_regs,
-		void __iomem *rcd_pcie_cap;
-	);
-};
-
-struct cxl_reg_map {
-	bool valid;
-	int id;
-	unsigned long offset;
-	unsigned long size;
-};
-
-struct cxl_component_reg_map {
-	struct cxl_reg_map hdm_decoder;
-	struct cxl_reg_map ras;
-};
-
-struct cxl_device_reg_map {
-	struct cxl_reg_map status;
-	struct cxl_reg_map mbox;
-	struct cxl_reg_map memdev;
-};
-
-struct cxl_pmu_reg_map {
-	struct cxl_reg_map pmu;
-};
-
-/**
- * struct cxl_register_map - DVSEC harvested register block mapping parameters
- * @host: device for devm operations and logging
- * @base: virtual base of the register-block-BAR + @block_offset
- * @resource: physical resource base of the register block
- * @max_size: maximum mapping size to perform register search
- * @reg_type: see enum cxl_regloc_type
- * @component_map: cxl_reg_map for component registers
- * @device_map: cxl_reg_maps for device registers
- * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
- */
-struct cxl_register_map {
-	struct device *host;
-	void __iomem *base;
-	resource_size_t resource;
-	resource_size_t max_size;
-	u8 reg_type;
-	union {
-		struct cxl_component_reg_map component_map;
-		struct cxl_device_reg_map device_map;
-		struct cxl_pmu_reg_map pmu_map;
-	};
-};
-
 void cxl_probe_component_regs(struct device *dev, void __iomem *base,
 			      struct cxl_component_reg_map *map);
 void cxl_probe_device_regs(struct device *dev, void __iomem *base,
@@ -497,11 +407,6 @@ struct cxl_region_params {
 	resource_size_t cache_size;
 };
 
-enum cxl_partition_mode {
-	CXL_PARTMODE_RAM,
-	CXL_PARTMODE_PMEM,
-};
-
 /*
  * Indicate whether this region has been assembled by autodetection or
  * userspace assembly. Prevent endpoint decoders outside of automatic
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index ef202b34e5ea4..281546de426e4 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -113,8 +113,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
 			 resource_size_t base, resource_size_t len,
 			 resource_size_t skipped);
 
-#define CXL_NR_PARTITIONS_MAX 2
-
 struct cxl_dpa_info {
 	u64 size;
 	struct cxl_dpa_part_info {
@@ -373,87 +371,6 @@ struct cxl_security_state {
 	struct kernfs_node *sanitize_node;
 };
 
-/*
- * enum cxl_devtype - delineate type-2 from a generic type-3 device
- * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or
- *			 HDM-DB, no requirement that this device implements a
- *			 mailbox, or other memory-device-standard manageability
- *			 flows.
- * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with
- *			   HDM-H and class-mandatory memory device registers
- */
-enum cxl_devtype {
-	CXL_DEVTYPE_DEVMEM,
-	CXL_DEVTYPE_CLASSMEM,
-};
-
-/**
- * struct cxl_dpa_perf - DPA performance property entry
- * @dpa_range: range for DPA address
- * @coord: QoS performance data (i.e. latency, bandwidth)
- * @cdat_coord: raw QoS performance data from CDAT
- * @qos_class: QoS Class cookies
- */
-struct cxl_dpa_perf {
-	struct range dpa_range;
-	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
-	struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX];
-	int qos_class;
-};
-
-/**
- * struct cxl_dpa_partition - DPA partition descriptor
- * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res)
- * @perf: performance attributes of the partition from CDAT
- * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic...
- */
-struct cxl_dpa_partition {
-	struct resource res;
-	struct cxl_dpa_perf perf;
-	enum cxl_partition_mode mode;
-};
-
-/**
- * struct cxl_dev_state - The driver device state
- *
- * cxl_dev_state represents the CXL driver/device state.  It provides an
- * interface to mailbox commands as well as some cached data about the device.
- * Currently only memory devices are represented.
- *
- * @dev: The device associated with this CXL state
- * @cxlmd: The device representing the CXL.mem capabilities of @dev
- * @reg_map: component and ras register mapping parameters
- * @regs: Parsed register blocks
- * @cxl_dvsec: Offset to the PCIe device DVSEC
- * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
- * @media_ready: Indicate whether the device media is usable
- * @dpa_res: Overall DPA resource tree for the device
- * @part: DPA partition array
- * @nr_partitions: Number of DPA partitions
- * @serial: PCIe Device Serial Number
- * @type: Generic Memory Class device or Vendor Specific Memory device
- * @cxl_mbox: CXL mailbox context
- * @cxlfs: CXL features context
- */
-struct cxl_dev_state {
-	struct device *dev;
-	struct cxl_memdev *cxlmd;
-	struct cxl_register_map reg_map;
-	struct cxl_regs regs;
-	int cxl_dvsec;
-	bool rcd;
-	bool media_ready;
-	struct resource dpa_res;
-	struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX];
-	unsigned int nr_partitions;
-	u64 serial;
-	enum cxl_devtype type;
-	struct cxl_mailbox cxl_mbox;
-#ifdef CONFIG_CXL_FEATURES
-	struct cxl_features_state *cxlfs;
-#endif
-};
-
 static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds)
 {
 	/*
@@ -858,7 +775,8 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds);
 int cxl_await_media_ready(struct cxl_dev_state *cxlds);
 int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
 int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info);
-struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev);
+struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial,
+						 u16 dvsec);
 void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
 				unsigned long *cmds);
 void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1cf2322208735..24179cc702bfc 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -911,25 +911,25 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	int rc, pmu_count;
 	unsigned int i;
 	bool irq_avail;
+	u16 dvsec;
 
 	rc = pcim_enable_device(pdev);
 	if (rc)
 		return rc;
 	pci_set_master(pdev);
 
-	mds = cxl_memdev_state_create(&pdev->dev);
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n");
+
+	mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec);
 	if (IS_ERR(mds))
 		return PTR_ERR(mds);
 	cxlds = &mds->cxlds;
 	pci_set_drvdata(pdev, cxlds);
 
 	cxlds->rcd = is_cxl_restricted(pdev);
-	cxlds->serial = pci_get_dsn(pdev);
-	cxlds->cxl_dvsec = pci_find_dvsec_capability(
-		pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE);
-	if (!cxlds->cxl_dvsec)
-		dev_warn(&pdev->dev,
-			 "Device DVSEC not present, skip CXL.mem init\n");
 
 	rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
 	if (rc)
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
new file mode 100644
index 0000000000000..13d448686189c
--- /dev/null
+++ b/include/cxl/cxl.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. */
+/* Copyright(c) 2025 Advanced Micro Devices, Inc. */
+
+#ifndef __CXL_CXL_H__
+#define __CXL_CXL_H__
+
+#include <linux/node.h>
+#include <linux/ioport.h>
+#include <cxl/mailbox.h>
+
+/**
+ * enum cxl_devtype - delineate type-2 from a generic type-3 device
+ * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or
+ *			 HDM-DB, no requirement that this device implements a
+ *			 mailbox, or other memory-device-standard manageability
+ *			 flows.
+ * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with
+ *			   HDM-H and class-mandatory memory device registers
+ */
+enum cxl_devtype {
+	CXL_DEVTYPE_DEVMEM,
+	CXL_DEVTYPE_CLASSMEM,
+};
+
+struct device;
+
+/*
+ * Using struct_group() allows for per register-block-type helper routines,
+ * without requiring block-type agnostic code to include the prefix.
+ */
+struct cxl_regs {
+	/*
+	 * Common set of CXL Component register block base pointers
+	 * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure
+	 * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure
+	 */
+	struct_group_tagged(cxl_component_regs, component,
+		void __iomem *hdm_decoder;
+		void __iomem *ras;
+	);
+	/*
+	 * Common set of CXL Device register block base pointers
+	 * @status: CXL 2.0 8.2.8.3 Device Status Registers
+	 * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers
+	 * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers
+	 */
+	struct_group_tagged(cxl_device_regs, device_regs,
+		void __iomem *status, *mbox, *memdev;
+	);
+
+	struct_group_tagged(cxl_pmu_regs, pmu_regs,
+		void __iomem *pmu;
+	);
+
+	/*
+	 * RCH downstream port specific RAS register
+	 * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB
+	 */
+	struct_group_tagged(cxl_rch_regs, rch_regs,
+		void __iomem *dport_aer;
+	);
+
+	/*
+	 * RCD upstream port specific PCIe cap register
+	 * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB
+	 */
+	struct_group_tagged(cxl_rcd_regs, rcd_regs,
+		void __iomem *rcd_pcie_cap;
+	);
+};
+
+struct cxl_reg_map {
+	bool valid;
+	int id;
+	unsigned long offset;
+	unsigned long size;
+};
+
+struct cxl_component_reg_map {
+	struct cxl_reg_map hdm_decoder;
+	struct cxl_reg_map ras;
+};
+
+struct cxl_device_reg_map {
+	struct cxl_reg_map status;
+	struct cxl_reg_map mbox;
+	struct cxl_reg_map memdev;
+};
+
+struct cxl_pmu_reg_map {
+	struct cxl_reg_map pmu;
+};
+
+/**
+ * struct cxl_register_map - DVSEC harvested register block mapping parameters
+ * @host: device for devm operations and logging
+ * @base: virtual base of the register-block-BAR + @block_offset
+ * @resource: physical resource base of the register block
+ * @max_size: maximum mapping size to perform register search
+ * @reg_type: see enum cxl_regloc_type
+ * @component_map: cxl_reg_map for component registers
+ * @device_map: cxl_reg_maps for device registers
+ * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units
+ */
+struct cxl_register_map {
+	struct device *host;
+	void __iomem *base;
+	resource_size_t resource;
+	resource_size_t max_size;
+	u8 reg_type;
+	union {
+		struct cxl_component_reg_map component_map;
+		struct cxl_device_reg_map device_map;
+		struct cxl_pmu_reg_map pmu_map;
+	};
+};
+
+/**
+ * struct cxl_dpa_perf - DPA performance property entry
+ * @dpa_range: range for DPA address
+ * @coord: QoS performance data (i.e. latency, bandwidth)
+ * @cdat_coord: raw QoS performance data from CDAT
+ * @qos_class: QoS Class cookies
+ */
+struct cxl_dpa_perf {
+	struct range dpa_range;
+	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+	struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX];
+	int qos_class;
+};
+
+enum cxl_partition_mode {
+	CXL_PARTMODE_RAM,
+	CXL_PARTMODE_PMEM,
+};
+
+/**
+ * struct cxl_dpa_partition - DPA partition descriptor
+ * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res)
+ * @perf: performance attributes of the partition from CDAT
+ * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic...
+ */
+struct cxl_dpa_partition {
+	struct resource res;
+	struct cxl_dpa_perf perf;
+	enum cxl_partition_mode mode;
+};
+
+#define CXL_NR_PARTITIONS_MAX 2
+
+/**
+ * struct cxl_dev_state - The driver device state
+ *
+ * cxl_dev_state represents the CXL driver/device state.  It provides an
+ * interface to mailbox commands as well as some cached data about the device.
+ * Currently only memory devices are represented.
+ *
+ * @dev: The device associated with this CXL state
+ * @cxlmd: The device representing the CXL.mem capabilities of @dev
+ * @reg_map: component and ras register mapping parameters
+ * @regs: Parsed register blocks
+ * @cxl_dvsec: Offset to the PCIe device DVSEC
+ * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH)
+ * @media_ready: Indicate whether the device media is usable
+ * @dpa_res: Overall DPA resource tree for the device
+ * @part: DPA partition array
+ * @nr_partitions: Number of DPA partitions
+ * @serial: PCIe Device Serial Number
+ * @type: Generic Memory Class device or Vendor Specific Memory device
+ * @cxl_mbox: CXL mailbox context
+ * @cxlfs: CXL features context
+ */
+struct cxl_dev_state {
+	/* public for Type2 drivers */
+	struct device *dev;
+	struct cxl_memdev *cxlmd;
+
+	/* private for Type2 drivers */
+	struct cxl_register_map reg_map;
+	struct cxl_regs regs;
+	int cxl_dvsec;
+	bool rcd;
+	bool media_ready;
+	struct resource dpa_res;
+	struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX];
+	unsigned int nr_partitions;
+	u64 serial;
+	enum cxl_devtype type;
+	struct cxl_mailbox cxl_mbox;
+#ifdef CONFIG_CXL_FEATURES
+	struct cxl_features_state *cxlfs;
+#endif
+};
+
+struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev,
+						 enum cxl_devtype type,
+						 u64 serial, u16 dvsec,
+						 size_t size, bool has_mbox);
+
+/**
+ * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a
+ * driver specific struct.
+ *
+ * @parent: device behind the request
+ * @type: CXL device type
+ * @serial: device identification
+ * @dvsec: dvsec capability offset
+ * @drv_struct: driver struct embedding a cxl_dev_state struct
+ * @member: drv_struct member as cxl_dev_state
+ * @mbox: true if mailbox supported
+ *
+ * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state
+ * struct initialized.
+ *
+ * Introduced for Type2 driver support.
+ */
+#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox)	\
+	({										\
+		static_assert(__same_type(struct cxl_dev_state,				\
+			      ((drv_struct *)NULL)->member));				\
+		static_assert(offsetof(drv_struct, member) == 0);			\
+		(drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec,	\
+						      sizeof(drv_struct), mbox);	\
+	})
+#endif /* __CXL_CXL_H__ */
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index cb87e8c0e63c0..79f42f4474d47 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1716,7 +1716,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	mds = cxl_memdev_state_create(dev);
+	mds = cxl_memdev_state_create(dev, pdev->id + 1, 0);
 	if (IS_ERR(mds))
 		return PTR_ERR(mds);
 
@@ -1732,7 +1732,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
 	mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
 	INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
 
-	cxlds->serial = pdev->id + 1;
 	if (is_rcd(pdev))
 		cxlds->rcd = true;
 

From 25de561ceb1c37778055869229a1227a7949945b Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:18 +0000
Subject: [PATCH 107/143] NVIDIA: VR: SAUCE: sfc: add cxl support

Add CXL initialization based on new CXL API for accel drivers and make
it dependent on kernel CXL configuration.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/Kconfig      |  9 +++++
 drivers/net/ethernet/sfc/Makefile     |  1 +
 drivers/net/ethernet/sfc/efx.c        | 15 ++++++-
 drivers/net/ethernet/sfc/efx_cxl.c    | 56 +++++++++++++++++++++++++++
 drivers/net/ethernet/sfc/efx_cxl.h    | 40 +++++++++++++++++++
 drivers/net/ethernet/sfc/net_driver.h | 10 +++++
 6 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/sfc/efx_cxl.c
 create mode 100644 drivers/net/ethernet/sfc/efx_cxl.h

diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index c4c43434f3143..979f2801e2a8e 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -66,6 +66,15 @@ config SFC_MCDI_LOGGING
 	  Driver-Interface) commands and responses, allowing debugging of
 	  driver/firmware interaction.  The tracing is actually enabled by
 	  a sysfs file 'mcdi_logging' under the PCI device.
+config SFC_CXL
+	bool "Solarflare SFC9100-family CXL support"
+	depends on SFC && CXL_BUS >= SFC
+	default SFC
+	help
+	  This enables SFC CXL support if the kernel is configuring CXL for
+	  using CTPIO with CXL.mem. The SFC device with CXL support and
+	  with a CXL-aware firmware can be used for minimizing latencies
+	  when sending through CTPIO.
 
 source "drivers/net/ethernet/sfc/falcon/Kconfig"
 source "drivers/net/ethernet/sfc/siena/Kconfig"
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index d99039ec468d6..bb0f1891cde65 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -13,6 +13,7 @@ sfc-$(CONFIG_SFC_SRIOV)	+= sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \
                            mae.o tc.o tc_bindings.o tc_counters.o \
                            tc_encap_actions.o tc_conntrack.o
 
+sfc-$(CONFIG_SFC_CXL)	+= efx_cxl.o
 obj-$(CONFIG_SFC)	+= sfc.o
 
 obj-$(CONFIG_SFC_FALCON) += falcon/
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 112e55b98ed3b..537668278375b 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -34,6 +34,7 @@
 #include "selftest.h"
 #include "sriov.h"
 #include "efx_devlink.h"
+#include "efx_cxl.h"
 
 #include "mcdi_port_common.h"
 #include "mcdi_pcol.h"
@@ -981,12 +982,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
 	efx_pci_remove_main(efx);
 
 	efx_fini_io(efx);
+
+	probe_data = container_of(efx, struct efx_probe_data, efx);
+	efx_cxl_exit(probe_data);
+
 	pci_dbg(efx->pci_dev, "shutdown successful\n");
 
 	efx_fini_devlink_and_unlock(efx);
 	efx_fini_struct(efx);
 	free_netdev(efx->net_dev);
-	probe_data = container_of(efx, struct efx_probe_data, efx);
 	kfree(probe_data);
 };
 
@@ -1190,6 +1194,15 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	if (rc)
 		goto fail2;
 
+	/* A successful cxl initialization implies a CXL region created to be
+	 * used for PIO buffers. If there is no CXL support, or initialization
+	 * fails, efx_cxl_pio_initialised will be false and legacy PIO buffers
+	 * defined at specific PCI BAR regions will be used.
+	 */
+	rc = efx_cxl_init(probe_data);
+	if (rc)
+		pci_err(pci_dev, "CXL initialization failed with error %d\n", rc);
+
 	rc = efx_pci_probe_post_io(efx);
 	if (rc) {
 		/* On failure, retry once immediately.
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
new file mode 100644
index 0000000000000..8e0481d8dced6
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ *
+ * Driver for AMD network controllers and boards
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/pci.h>
+
+#include "net_driver.h"
+#include "efx_cxl.h"
+
+#define EFX_CTPIO_BUFFER_SIZE	SZ_256M
+
+int efx_cxl_init(struct efx_probe_data *probe_data)
+{
+	struct efx_nic *efx = &probe_data->efx;
+	struct pci_dev *pci_dev = efx->pci_dev;
+	struct efx_cxl *cxl;
+	u16 dvsec;
+
+	probe_data->cxl_pio_initialised = false;
+
+	/* Is the device configured with and using CXL? */
+	if (!pcie_is_cxl(pci_dev))
+		return 0;
+
+	dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec) {
+		pci_err(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability not found\n");
+		return 0;
+	}
+
+	pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n");
+
+	/* Create a cxl_dev_state embedded in the cxl struct using cxl core api
+	 * specifying no mbox available.
+	 */
+	cxl = devm_cxl_dev_state_create(&pci_dev->dev, CXL_DEVTYPE_DEVMEM,
+					pci_dev->dev.id, dvsec, struct efx_cxl,
+					cxlds, false);
+
+	if (!cxl)
+		return -ENOMEM;
+
+	probe_data->cxl = cxl;
+
+	return 0;
+}
+
+void efx_cxl_exit(struct efx_probe_data *probe_data)
+{
+}
+
+MODULE_IMPORT_NS("CXL");
diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h
new file mode 100644
index 0000000000000..961639cef692e
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_cxl.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for AMD network controllers and boards
+ * Copyright (C) 2025, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_CXL_H
+#define EFX_CXL_H
+
+#ifdef CONFIG_SFC_CXL
+
+#include <cxl/cxl.h>
+
+struct cxl_root_decoder;
+struct cxl_port;
+struct cxl_endpoint_decoder;
+struct cxl_region;
+struct efx_probe_data;
+
+struct efx_cxl {
+	struct cxl_dev_state cxlds;
+	struct cxl_memdev *cxlmd;
+	struct cxl_root_decoder *cxlrd;
+	struct cxl_port *endpoint;
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_region *efx_region;
+	void __iomem *ctpio_cxl;
+};
+
+int efx_cxl_init(struct efx_probe_data *probe_data);
+void efx_cxl_exit(struct efx_probe_data *probe_data);
+#else
+static inline int efx_cxl_init(struct efx_probe_data *probe_data) { return 0; }
+static inline void efx_cxl_exit(struct efx_probe_data *probe_data) {}
+#endif
+#endif
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index b98c259f672db..3964b2c56609c 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1197,14 +1197,24 @@ struct efx_nic {
 	atomic_t n_rx_noskb_drops;
 };
 
+#ifdef CONFIG_SFC_CXL
+struct efx_cxl;
+#endif
+
 /**
  * struct efx_probe_data - State after hardware probe
  * @pci_dev: The PCI device
  * @efx: Efx NIC details
+ * @cxl: details of related cxl objects
+ * @cxl_pio_initialised: cxl initialization outcome.
  */
 struct efx_probe_data {
 	struct pci_dev *pci_dev;
 	struct efx_nic efx;
+#ifdef CONFIG_SFC_CXL
+	struct efx_cxl *cxl;
+	bool cxl_pio_initialised;
+#endif
 };
 
 static inline struct efx_nic *efx_netdev_priv(struct net_device *dev)

From 98d5c84f1b8c77896e4e39dff2c973e8015a2902 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:19 +0000
Subject: [PATCH 108/143] NVIDIA: VR: SAUCE: cxl: Move pci generic code

Inside cxl/core/pci.c there are helpers for CXL PCIe initialization
meanwhile cxl/pci_drv.c implements the functionality for a Type3 device
initialization.

Move helper functions from cxl/core/pci_drv.c to cxl/core/pci.c in order
to be exported and shared with CXL Type2 device initialization.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Fan Ni <fan.ni@samsung.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h |  3 +-
 drivers/cxl/core/pci.c  | 62 ++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/regs.c |  1 -
 drivers/cxl/cxl.h       |  2 --
 drivers/cxl/cxlpci.h    | 13 ++++++++
 drivers/cxl/pci.c       | 70 -----------------------------------------
 6 files changed, 77 insertions(+), 74 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 422531799af2f..256799d393616 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -187,5 +187,6 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
 		    size_t feat_data_size, u32 feat_flag, u16 offset,
 		    u16 *return_code);
 #endif
-
+resource_size_t cxl_rcd_component_reg_phys(struct device *dev,
+					   struct cxl_dport *dport);
 #endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index b838c59d7a3c0..6b7e50858d56d 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -696,6 +696,68 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL");
 
+static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
+				  struct cxl_register_map *map,
+				  struct cxl_dport *dport)
+{
+	resource_size_t component_reg_phys;
+
+	*map = (struct cxl_register_map) {
+		.host = &pdev->dev,
+		.resource = CXL_RESOURCE_NONE,
+	};
+
+	struct cxl_port *port __free(put_cxl_port) =
+		cxl_pci_find_port(pdev, &dport);
+	if (!port)
+		return -EPROBE_DEFER;
+
+	component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
+	if (component_reg_phys == CXL_RESOURCE_NONE)
+		return -ENXIO;
+
+	map->resource = component_reg_phys;
+	map->reg_type = CXL_REGLOC_RBI_COMPONENT;
+	map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
+
+	return 0;
+}
+
+int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+			struct cxl_register_map *map)
+{
+	int rc;
+
+	rc = cxl_find_regblock(pdev, type, map);
+
+	/*
+	 * If the Register Locator DVSEC does not exist, check if it
+	 * is an RCH and try to extract the Component Registers from
+	 * an RCRB.
+	 */
+	if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) {
+		struct cxl_dport *dport;
+		struct cxl_port *port __free(put_cxl_port) =
+			cxl_pci_find_port(pdev, &dport);
+		if (!port)
+			return -EPROBE_DEFER;
+
+		rc = cxl_rcrb_get_comp_regs(pdev, map, dport);
+		if (rc)
+			return rc;
+
+		rc = cxl_dport_map_rcd_linkcap(pdev, dport);
+		if (rc)
+			return rc;
+
+	} else if (rc) {
+		return rc;
+	}
+
+	return cxl_setup_regs(map);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL");
+
 int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c)
 {
 	int speed, bw;
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index a010b32143422..93710cf4f0a69 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -641,4 +641,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev,
 		return CXL_RESOURCE_NONE;
 	return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM);
 }
-EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 3eaa353e430b8..5d111980d879d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -222,8 +222,6 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 		      struct cxl_register_map *map);
 int cxl_setup_regs(struct cxl_register_map *map);
 struct cxl_dport;
-resource_size_t cxl_rcd_component_reg_phys(struct device *dev,
-					   struct cxl_dport *dport);
 int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport);
 
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 6f9c78886fd9a..d879120b27800 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -74,6 +74,17 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
 	return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
 }
 
+/*
+ * Assume that the caller has already validated that @pdev has CXL
+ * capabilities, any RCiEP with CXL capabilities is treated as a
+ * Restricted CXL Device (RCD) and finds upstream port and endpoint
+ * registers in a Root Complex Register Block (RCRB).
+ */
+static inline bool is_cxl_restricted(struct pci_dev *pdev)
+{
+	return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
+}
+
 struct cxl_dev_state;
 void read_cdat_data(struct cxl_port *port);
 
@@ -95,4 +106,6 @@ static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
 						struct device *host) { }
 #endif
 
+int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+		       struct cxl_register_map *map);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 24179cc702bfc..668d44eb1bf5c 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -465,76 +465,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
 	return 0;
 }
 
-/*
- * Assume that any RCIEP that emits the CXL memory expander class code
- * is an RCD
- */
-static bool is_cxl_restricted(struct pci_dev *pdev)
-{
-	return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
-}
-
-static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
-				  struct cxl_register_map *map,
-				  struct cxl_dport *dport)
-{
-	resource_size_t component_reg_phys;
-
-	*map = (struct cxl_register_map) {
-		.host = &pdev->dev,
-		.resource = CXL_RESOURCE_NONE,
-	};
-
-	struct cxl_port *port __free(put_cxl_port) =
-		cxl_pci_find_port(pdev, &dport);
-	if (!port)
-		return -EPROBE_DEFER;
-
-	component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
-	if (component_reg_phys == CXL_RESOURCE_NONE)
-		return -ENXIO;
-
-	map->resource = component_reg_phys;
-	map->reg_type = CXL_REGLOC_RBI_COMPONENT;
-	map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
-
-	return 0;
-}
-
-static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
-			      struct cxl_register_map *map)
-{
-	int rc;
-
-	rc = cxl_find_regblock(pdev, type, map);
-
-	/*
-	 * If the Register Locator DVSEC does not exist, check if it
-	 * is an RCH and try to extract the Component Registers from
-	 * an RCRB.
-	 */
-	if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) {
-		struct cxl_dport *dport;
-		struct cxl_port *port __free(put_cxl_port) =
-			cxl_pci_find_port(pdev, &dport);
-		if (!port)
-			return -EPROBE_DEFER;
-
-		rc = cxl_rcrb_get_comp_regs(pdev, map, dport);
-		if (rc)
-			return rc;
-
-		rc = cxl_dport_map_rcd_linkcap(pdev, dport);
-		if (rc)
-			return rc;
-
-	} else if (rc) {
-		return rc;
-	}
-
-	return cxl_setup_regs(map);
-}
-
 static int cxl_pci_ras_unmask(struct pci_dev *pdev)
 {
 	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);

From 17c0f738323147a3fcac101b00a62cee8a9c9ad8 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:20 +0000
Subject: [PATCH 109/143] NVIDIA: VR: SAUCE: cxl/sfc: Map cxl component regs

Export cxl core functions for a Type2 driver being able to discover and
map the device component registers.

Use it in sfc driver cxl initialization.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c             |  1 +
 drivers/cxl/core/port.c            |  1 +
 drivers/cxl/core/regs.c            |  1 +
 drivers/cxl/cxl.h                  |  7 ------
 drivers/cxl/cxlpci.h               | 12 ----------
 drivers/cxl/pci.c                  |  1 +
 drivers/net/ethernet/sfc/efx_cxl.c | 35 ++++++++++++++++++++++++++++++
 include/cxl/cxl.h                  | 19 ++++++++++++++++
 include/cxl/pci.h                  | 21 ++++++++++++++++++
 9 files changed, 79 insertions(+), 19 deletions(-)
 create mode 100644 include/cxl/pci.h

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 6b7e50858d56d..ba2d393c540af 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/pci-doe.h>
+#include <cxl/pci.h>
 #include <linux/aer.h>
 #include <cxlpci.h>
 #include <cxlmem.h>
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 54f72452fb062..385588b8b30b5 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -11,6 +11,7 @@
 #include <linux/idr.h>
 #include <linux/node.h>
 #include <cxl/einj.h>
+#include <cxl/pci.h>
 #include <cxlmem.h>
 #include <cxlpci.h>
 #include <cxl.h>
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 93710cf4f0a69..20c2d9fbcfe7d 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -4,6 +4,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <cxl/pci.h>
 #include <cxlmem.h>
 #include <cxlpci.h>
 #include <pmu.h>
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 5d111980d879d..944c5d1cccebe 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -39,10 +39,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops;
 #define   CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
 #define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
 
-#define   CXL_CM_CAP_CAP_ID_RAS 0x2
-#define   CXL_CM_CAP_CAP_ID_HDM 0x5
-#define   CXL_CM_CAP_CAP_HDM_VERSION 1
-
 /* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */
 #define CXL_HDM_DECODER_CAP_OFFSET 0x0
 #define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
@@ -206,9 +202,6 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
 			      struct cxl_component_reg_map *map);
 void cxl_probe_device_regs(struct device *dev, void __iomem *base,
 			   struct cxl_device_reg_map *map);
-int cxl_map_component_regs(const struct cxl_register_map *map,
-			   struct cxl_component_regs *regs,
-			   unsigned long map_mask);
 int cxl_map_device_regs(const struct cxl_register_map *map,
 			struct cxl_device_regs *regs);
 int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index d879120b27800..93df1b1fa3268 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -13,16 +13,6 @@
  */
 #define CXL_PCI_DEFAULT_MAX_VECTORS 16
 
-/* Register Block Identifier (RBI) */
-enum cxl_regloc_type {
-	CXL_REGLOC_RBI_EMPTY = 0,
-	CXL_REGLOC_RBI_COMPONENT,
-	CXL_REGLOC_RBI_VIRT,
-	CXL_REGLOC_RBI_MEMDEV,
-	CXL_REGLOC_RBI_PMU,
-	CXL_REGLOC_RBI_TYPES
-};
-
 /*
  * Table Access DOE, CDAT Read Entry Response
  *
@@ -106,6 +96,4 @@ static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
 						struct device *host) { }
 #endif
 
-int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
-		       struct cxl_register_map *map);
 #endif /* __CXL_PCI_H__ */
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 668d44eb1bf5c..7b4699fb88709 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -11,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/io.h>
+#include <cxl/pci.h>
 #include <cxl/mailbox.h>
 #include "cxlmem.h"
 #include "cxlpci.h"
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 8e0481d8dced6..34126bc4826c8 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -7,6 +7,8 @@
 
 #include <linux/pci.h>
 
+#include <cxl/cxl.h>
+#include <cxl/pci.h>
 #include "net_driver.h"
 #include "efx_cxl.h"
 
@@ -18,6 +20,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 	struct pci_dev *pci_dev = efx->pci_dev;
 	struct efx_cxl *cxl;
 	u16 dvsec;
+	int rc;
 
 	probe_data->cxl_pio_initialised = false;
 
@@ -44,6 +47,38 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 	if (!cxl)
 		return -ENOMEM;
 
+	rc = cxl_pci_setup_regs(pci_dev, CXL_REGLOC_RBI_COMPONENT,
+				&cxl->cxlds.reg_map);
+	if (rc) {
+		pci_err(pci_dev, "No component registers\n");
+		return rc;
+	}
+
+	if (!cxl->cxlds.reg_map.component_map.hdm_decoder.valid) {
+		pci_err(pci_dev, "Expected HDM component register not found\n");
+		return -ENODEV;
+	}
+
+	if (!cxl->cxlds.reg_map.component_map.ras.valid) {
+		pci_err(pci_dev, "Expected RAS component register not found\n");
+		return -ENODEV;
+	}
+
+	rc = cxl_map_component_regs(&cxl->cxlds.reg_map,
+				    &cxl->cxlds.regs.component,
+				    BIT(CXL_CM_CAP_CAP_ID_RAS));
+	if (rc) {
+		pci_err(pci_dev, "Failed to map RAS capability.\n");
+		return rc;
+	}
+
+	/*
+	 * Set media ready explicitly as there are neither mailbox for checking
+	 * this state nor the CXL register involved, both not mandatory for
+	 * type2.
+	 */
+	cxl->cxlds.media_ready = true;
+
 	probe_data->cxl = cxl;
 
 	return 0;
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 13d448686189c..7f2e23bce1f78 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -70,6 +70,10 @@ struct cxl_regs {
 	);
 };
 
+#define   CXL_CM_CAP_CAP_ID_RAS 0x2
+#define   CXL_CM_CAP_CAP_ID_HDM 0x5
+#define   CXL_CM_CAP_CAP_HDM_VERSION 1
+
 struct cxl_reg_map {
 	bool valid;
 	int id;
@@ -223,4 +227,19 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev,
 		(drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec,	\
 						      sizeof(drv_struct), mbox);	\
 	})
+
+/**
+ * cxl_map_component_regs - map cxl component registers
+ *
+ * @map: cxl register map to update with the mappings
+ * @regs: cxl component registers to work with
+ * @map_mask: cxl component regs to map
+ *
+ * Returns integer: success (0) or error (-ENOMEM)
+ *
+ * Made public for Type2 driver support.
+ */
+int cxl_map_component_regs(const struct cxl_register_map *map,
+			   struct cxl_component_regs *regs,
+			   unsigned long map_mask);
 #endif /* __CXL_CXL_H__ */
diff --git a/include/cxl/pci.h b/include/cxl/pci.h
new file mode 100644
index 0000000000000..a172439f08c60
--- /dev/null
+++ b/include/cxl/pci.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+
+#ifndef __CXL_CXL_PCI_H__
+#define __CXL_CXL_PCI_H__
+
+/* Register Block Identifier (RBI) */
+enum cxl_regloc_type {
+	CXL_REGLOC_RBI_EMPTY = 0,
+	CXL_REGLOC_RBI_COMPONENT,
+	CXL_REGLOC_RBI_VIRT,
+	CXL_REGLOC_RBI_MEMDEV,
+	CXL_REGLOC_RBI_PMU,
+	CXL_REGLOC_RBI_TYPES
+};
+
+struct cxl_register_map;
+
+int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+		       struct cxl_register_map *map);
+#endif

From ac5bfc99d6b8a2792c6870ed591dc69b258a9294 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:21 +0000
Subject: [PATCH 110/143] NVIDIA: VR: SAUCE: cxl/sfc: Initialize dpa without a
 mailbox

Type3 relies on mailbox CXL_MBOX_OP_IDENTIFY command for initializing
memdev state params which end up being used for DPA initialization.

Allow a Type2 driver to initialize DPA simply by giving the size of its
volatile hardware partition.

Move related functions to memdev.

Add sfc driver as the client.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h            |  2 +
 drivers/cxl/core/mbox.c            | 51 +----------------------
 drivers/cxl/core/memdev.c          | 66 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/sfc/efx_cxl.c |  5 +++
 include/cxl/cxl.h                  |  1 +
 5 files changed, 75 insertions(+), 50 deletions(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 256799d393616..e3c85ceda2485 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -89,6 +89,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
 struct dentry *cxl_debugfs_create_dir(const char *dir);
 int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
 		     enum cxl_partition_mode mode);
+struct cxl_memdev_state;
+int cxl_mem_get_partition_info(struct cxl_memdev_state *mds);
 int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size);
 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
 resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index bee84d0101d1a..d57a0c2d39fb6 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1144,7 +1144,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL");
  *
  * See CXL @8.2.9.5.2.1 Get Partition Info
  */
-static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
+int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
 {
 	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
 	struct cxl_mbox_get_partition_info pi;
@@ -1300,55 +1300,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
 	return -EBUSY;
 }
 
-static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode)
-{
-	int i = info->nr_partitions;
-
-	if (size == 0)
-		return;
-
-	info->part[i].range = (struct range) {
-		.start = start,
-		.end = start + size - 1,
-	};
-	info->part[i].mode = mode;
-	info->nr_partitions++;
-}
-
-int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
-{
-	struct cxl_dev_state *cxlds = &mds->cxlds;
-	struct device *dev = cxlds->dev;
-	int rc;
-
-	if (!cxlds->media_ready) {
-		info->size = 0;
-		return 0;
-	}
-
-	info->size = mds->total_bytes;
-
-	if (mds->partition_align_bytes == 0) {
-		add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM);
-		add_part(info, mds->volatile_only_bytes,
-			 mds->persistent_only_bytes, CXL_PARTMODE_PMEM);
-		return 0;
-	}
-
-	rc = cxl_mem_get_partition_info(mds);
-	if (rc) {
-		dev_err(dev, "Failed to query partition information\n");
-		return rc;
-	}
-
-	add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM);
-	add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes,
-		 CXL_PARTMODE_PMEM);
-
-	return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
-
 int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
 {
 	struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 22d156f25305d..2c5dd72f43ca4 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -582,6 +582,72 @@ bool is_cxl_memdev(const struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
 
+static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode)
+{
+	int i = info->nr_partitions;
+
+	if (size == 0)
+		return;
+
+	info->part[i].range = (struct range) {
+		.start = start,
+		.end = start + size - 1,
+	};
+	info->part[i].mode = mode;
+	info->nr_partitions++;
+}
+
+int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
+{
+	struct cxl_dev_state *cxlds = &mds->cxlds;
+	struct device *dev = cxlds->dev;
+	int rc;
+
+	if (!cxlds->media_ready) {
+		info->size = 0;
+		return 0;
+	}
+
+	info->size = mds->total_bytes;
+
+	if (mds->partition_align_bytes == 0) {
+		add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM);
+		add_part(info, mds->volatile_only_bytes,
+			 mds->persistent_only_bytes, CXL_PARTMODE_PMEM);
+		return 0;
+	}
+
+	rc = cxl_mem_get_partition_info(mds);
+	if (rc) {
+		dev_err(dev, "Failed to query partition information\n");
+		return rc;
+	}
+
+	add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM);
+	add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes,
+		 CXL_PARTMODE_PMEM);
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
+
+/**
+ * cxl_set_capacity: initialize dpa by a driver without a mailbox.
+ *
+ * @cxlds: pointer to cxl_dev_state
+ * @capacity: device volatile memory size
+ */
+int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity)
+{
+	struct cxl_dpa_info range_info = {
+		.size = capacity,
+	};
+
+	add_part(&range_info, 0, capacity, CXL_PARTMODE_RAM);
+	return cxl_dpa_setup(cxlds, &range_info);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_set_capacity, "CXL");
+
 /**
  * set_exclusive_cxl_commands() - atomically disable user cxl commands
  * @mds: The device state to operate on
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 34126bc4826c8..0b10a2e6aceb6 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -79,6 +79,11 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 	 */
 	cxl->cxlds.media_ready = true;
 
+	if (cxl_set_capacity(&cxl->cxlds, EFX_CTPIO_BUFFER_SIZE)) {
+		pci_err(pci_dev, "dpa capacity setup failed\n");
+		return -ENODEV;
+	}
+
 	probe_data->cxl = cxl;
 
 	return 0;
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 7f2e23bce1f78..fb2f8f2395d50 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -242,4 +242,5 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev,
 int cxl_map_component_regs(const struct cxl_register_map *map,
 			   struct cxl_component_regs *regs,
 			   unsigned long map_mask);
+int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity);
 #endif /* __CXL_CXL_H__ */

From 34564978cea4888a1627260f4eb564ad865e34d4 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:22 +0000
Subject: [PATCH 111/143] NVIDIA: VR: SAUCE: cxl: Prepare memdev creation for
 type2

Current cxl core is relying on a CXL_DEVTYPE_CLASSMEM type device when
creating a memdev leading to problems when obtaining cxl_memdev_state
references from a CXL_DEVTYPE_DEVMEM type.

Modify check for obtaining cxl_memdev_state adding CXL_DEVTYPE_DEVMEM
support.

Make devm_cxl_add_memdev accessible from an accel driver.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/memdev.c | 15 +++++++++++--
 drivers/cxl/cxlmem.h      |  6 ------
 drivers/cxl/mem.c         | 45 +++++++++++++++++++++++++++++----------
 include/cxl/cxl.h         |  6 ++++++
 4 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 2c5dd72f43ca4..1b43763b8e20e 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/pci.h>
+#include <cxl/cxl.h>
 #include <cxlmem.h>
 #include "trace.h"
 #include "core.h"
@@ -576,9 +577,16 @@ static const struct device_type cxl_memdev_type = {
 	.groups = cxl_memdev_attribute_groups,
 };
 
+static const struct device_type cxl_accel_memdev_type = {
+	.name = "cxl_accel_memdev",
+	.release = cxl_memdev_release,
+	.devnode = cxl_memdev_devnode,
+};
+
 bool is_cxl_memdev(const struct device *dev)
 {
-	return dev->type == &cxl_memdev_type;
+	return (dev->type == &cxl_memdev_type ||
+		dev->type == &cxl_accel_memdev_type);
 }
 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL");
 
@@ -781,7 +789,10 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
 	dev->parent = cxlds->dev;
 	dev->bus = &cxl_bus_type;
 	dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
-	dev->type = &cxl_memdev_type;
+	if (cxlds->type == CXL_DEVTYPE_DEVMEM)
+		dev->type = &cxl_accel_memdev_type;
+	else
+		dev->type = &cxl_memdev_type;
 	device_set_pm_not_required(dev);
 	INIT_WORK(&cxlmd->detach_work, detach_memdev);
 
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 281546de426e4..c98db6f18aa29 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -34,10 +34,6 @@
 	(FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
 	 CXLMDEV_RESET_NEEDED_NOT)
 
-struct cxl_memdev_attach {
-	int (*probe)(struct cxl_memdev *cxlmd);
-};
-
 /**
  * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
  * @dev: driver core device object
@@ -103,8 +99,6 @@ static inline bool is_cxl_endpoint(struct cxl_port *port)
 
 struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
 					 const struct cxl_memdev_attach *attach);
-struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
-				       const struct cxl_memdev_attach *attach);
 int devm_cxl_sanitize_setup_notifier(struct device *host,
 				     struct cxl_memdev *cxlmd);
 struct cxl_memdev_state;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 0958bea915acb..39687baedd1a9 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -65,6 +65,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa)
 DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
 			 cxl_debugfs_poison_clear, "%llx\n");
 
+static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds,
+				     struct cxl_memdev *cxlmd,
+				     struct dentry *dentry)
+{
+	/*
+	 * Avoid poison debugfs for DEVMEM aka accelerators as they rely on
+	 * cxl_memdev_state.
+	 */
+	if (!mds)
+		return;
+
+	if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
+		debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
+				    &cxl_poison_inject_fops);
+
+	if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
+		debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
+				    &cxl_poison_clear_fops);
+}
+
 static int cxl_mem_probe(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
@@ -92,12 +112,7 @@ static int cxl_mem_probe(struct device *dev)
 	dentry = cxl_debugfs_create_dir(dev_name(dev));
 	debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
 
-	if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
-		debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
-				    &cxl_poison_inject_fops);
-	if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
-		debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
-				    &cxl_poison_clear_fops);
+	cxl_memdev_poison_enable(mds, cxlmd, dentry);
 
 	rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
 	if (rc)
@@ -208,16 +223,24 @@ static ssize_t trigger_poison_list_store(struct device *dev,
 }
 static DEVICE_ATTR_WO(trigger_poison_list);
 
-static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
+static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a)
 {
 	struct device *dev = kobj_to_dev(kobj);
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
 
-	if (a == &dev_attr_trigger_poison_list.attr)
-		if (!test_bit(CXL_POISON_ENABLED_LIST,
-			      mds->poison.enabled_cmds))
-			return 0;
+	if (!mds ||
+	    !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds))
+		return false;
+
+	return true;
+}
+
+static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+	if (a == &dev_attr_trigger_poison_list.attr &&
+	    !cxl_poison_attr_visible(kobj, a))
+		return 0;
 
 	return a->mode;
 }
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index fb2f8f2395d50..6f8d365067af7 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -153,6 +153,10 @@ struct cxl_dpa_partition {
 
 #define CXL_NR_PARTITIONS_MAX 2
 
+struct cxl_memdev_attach {
+	int (*probe)(struct cxl_memdev *cxlmd);
+};
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -243,4 +247,6 @@ int cxl_map_component_regs(const struct cxl_register_map *map,
 			   struct cxl_component_regs *regs,
 			   unsigned long map_mask);
 int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity);
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
+				       const struct cxl_memdev_attach *attach);
 #endif /* __CXL_CXL_H__ */

From e3e4d2429cc1de170daba4de15209b665512ee32 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:23 +0000
Subject: [PATCH 112/143] NVIDIA: VR: SAUCE: sfc: create type2 cxl memdev

Use cxl API for creating a cxl memory device using the type2
cxl_dev_state struct.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
Reviewed-by: Fan Ni <fan.ni@samsung.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/efx_cxl.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 0b10a2e6aceb6..a77ef4783fcb8 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -84,6 +84,12 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 		return -ENODEV;
 	}
 
+	cxl->cxlmd = devm_cxl_add_memdev(&cxl->cxlds, NULL);
+	if (IS_ERR(cxl->cxlmd)) {
+		pci_err(pci_dev, "CXL accel memdev creation failed");
+		return PTR_ERR(cxl->cxlmd);
+	}
+
 	probe_data->cxl = cxl;
 
 	return 0;

From 7222243b64891c1bb8f482fc877026ebe79bbd18 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:24 +0000
Subject: [PATCH 113/143] NVIDIA: VR: SAUCE: cxl/hdm: Add support for getting
 region from committed decoder

A Type2 device configured by the BIOS can already have its HDM
committed. Add a cxl_get_committed_decoder() function for cheking
so after memdev creation. A CXL region should have been created
during memdev initialization, therefore a Type2 driver can ask for
such a region for working with the HPA. If the HDM is not committed,
a Type2 driver will create the region after obtaining proper HPA
and DPA space.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/cxl/cxl.h      |  3 +++
 2 files changed, 42 insertions(+)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 061f364cc9a00..b4bd3d91f1cfb 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -686,6 +686,45 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
 	return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
 }
 
+static int find_committed_endpoint_decoder(struct device *dev, const void *data)
+{
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_port *port;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	port = cxled_to_port(cxled);
+
+	return cxled->cxld.id == port->hdm_end;
+}
+
+struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd,
+						       struct cxl_region **cxlr)
+{
+	struct cxl_port *endpoint = cxlmd->endpoint;
+	struct cxl_endpoint_decoder *cxled;
+	struct device *cxled_dev;
+
+	if (!endpoint)
+		return NULL;
+
+	guard(rwsem_read)(&cxl_rwsem.dpa);
+	cxled_dev = device_find_child(&endpoint->dev, NULL,
+				      find_committed_endpoint_decoder);
+
+	if (!cxled_dev)
+		return NULL;
+
+	cxled = to_cxl_endpoint_decoder(cxled_dev);
+	*cxlr = cxled->cxld.region;
+
+	put_device(cxled_dev);
+	return cxled;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_get_committed_decoder, "CXL");
+
 static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
 {
 	u16 eig;
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 6f8d365067af7..928276dba9526 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -249,4 +249,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map,
 int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity);
 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
 				       const struct cxl_memdev_attach *attach);
+struct cxl_region;
+struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd,
+						       struct cxl_region **cxlr);
 #endif /* __CXL_CXL_H__ */

From 8834e8542c5cc5453936864a68d664bbe633ae43 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:25 +0000
Subject: [PATCH 114/143] NVIDIA: VR: SAUCE: cxl: Add function for obtaining
 region range

A CXL region struct contains the physical address to work with.

Type2 drivers can create a CXL region but have not access to the
related struct as it is defined as private by the kernel CXL core.
Add a function for getting the cxl region range to be used for mapping
such memory range by a Type2 driver.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Zhi Wang <zhiw@nvidia.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 23 +++++++++++++++++++++++
 include/cxl/cxl.h         |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 50df9afac20ff..faac07bb80c83 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2623,6 +2623,29 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	return ERR_PTR(rc);
 }
 
+/**
+ * cxl_get_region_range - obtain range linked to a CXL region
+ *
+ * @region: a pointer to struct cxl_region
+ * @range: a pointer to a struct range to be set
+ *
+ * Returns 0 or error.
+ */
+int cxl_get_region_range(struct cxl_region *region, struct range *range)
+{
+	if (WARN_ON_ONCE(!region))
+		return -ENODEV;
+
+	if (!region->params.res)
+		return -ENOSPC;
+
+	range->start = region->params.res->start;
+	range->end = region->params.res->end;
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_get_region_range, "CXL");
+
 static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf)
 {
 	return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 928276dba9526..906065e0d2a69 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -252,4 +252,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds,
 struct cxl_region;
 struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd,
 						       struct cxl_region **cxlr);
+struct range;
+int cxl_get_region_range(struct cxl_region *region, struct range *range);
 #endif /* __CXL_CXL_H__ */

From 564c150f82c94a2b3c2a413011395a227a64818a Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:26 +0000
Subject: [PATCH 115/143] NVIDIA: VR: SAUCE: cxl: Export function for unwinding
 cxl by accelerators

Add cxl_unregister_region() to the accelerator driver API
for a clean exit.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 17 ++++++++++++-----
 include/cxl/cxl.h         |  1 +
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index faac07bb80c83..b145b69e70bb6 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2440,9 +2440,8 @@ static struct cxl_region *to_cxl_region(struct device *dev)
 	return container_of(dev, struct cxl_region, dev);
 }
 
-static void unregister_region(void *_cxlr)
+void cxl_unregister_region(struct cxl_region *cxlr)
 {
-	struct cxl_region *cxlr = _cxlr;
 	struct cxl_region_params *p = &cxlr->params;
 	int i;
 
@@ -2459,6 +2458,14 @@ static void unregister_region(void *_cxlr)
 	cxl_region_iomem_release(cxlr);
 	put_device(&cxlr->dev);
 }
+EXPORT_SYMBOL_NS_GPL(cxl_unregister_region, "CXL");
+
+static void __unregister_region(void *_cxlr)
+{
+	struct cxl_region *cxlr = _cxlr;
+
+	return cxl_unregister_region(cxlr);
+}
 
 static struct lock_class_key cxl_region_key;
 
@@ -2610,7 +2617,7 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	if (rc)
 		goto err;
 
-	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
+	rc = devm_add_action_or_reset(port->uport_dev, __unregister_region, cxlr);
 	if (rc)
 		return ERR_PTR(rc);
 
@@ -2764,7 +2771,7 @@ static ssize_t delete_region_store(struct device *dev,
 	if (IS_ERR(cxlr))
 		return PTR_ERR(cxlr);
 
-	devm_release_action(port->uport_dev, unregister_region, cxlr);
+	devm_release_action(port->uport_dev, __unregister_region, cxlr);
 	put_device(&cxlr->dev);
 
 	return len;
@@ -3888,7 +3895,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 
 	rc = __construct_region(cxlr, cxlrd, cxled);
 	if (rc) {
-		devm_release_action(port->uport_dev, unregister_region, cxlr);
+		devm_release_action(port->uport_dev, __unregister_region, cxlr);
 		return ERR_PTR(rc);
 	}
 
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 906065e0d2a69..92880c26b2d52 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -254,4 +254,5 @@ struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd,
 						       struct cxl_region **cxlr);
 struct range;
 int cxl_get_region_range(struct cxl_region *region, struct range *range);
+void cxl_unregister_region(struct cxl_region *cxlr);
 #endif /* __CXL_CXL_H__ */

From 5b1e9dc8ea4afbc302237f51f9701ce172c8f50f Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:27 +0000
Subject: [PATCH 116/143] NVIDIA: VR: SAUCE: sfc: obtain decoder and region if
 committed by firmware

Check if device HDM is already committed during firmware/BIOS
initialization.

A CXL region should exist if so after memdev allocation/initialization.
Get HPA from region and map it.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/efx_cxl.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index a77ef4783fcb8..3536eccf1b2aa 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -19,6 +19,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 	struct efx_nic *efx = &probe_data->efx;
 	struct pci_dev *pci_dev = efx->pci_dev;
 	struct efx_cxl *cxl;
+	struct range range;
 	u16 dvsec;
 	int rc;
 
@@ -90,13 +91,38 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 		return PTR_ERR(cxl->cxlmd);
 	}
 
-	probe_data->cxl = cxl;
+	cxl->cxled = cxl_get_committed_decoder(cxl->cxlmd, &cxl->efx_region);
+	if (cxl->cxled) {
+		if (!cxl->efx_region) {
+			pci_err(pci_dev, "CXL found committed decoder without a region");
+			return -ENODEV;
+		}
+		rc = cxl_get_region_range(cxl->efx_region, &range);
+		if (rc) {
+			pci_err(pci_dev,
+				"CXL getting regions params from a committed decoder failed");
+			return rc;
+		}
+
+		cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1);
+		if (!cxl->ctpio_cxl) {
+			pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range);
+			return -ENOMEM;
+		}
+
+		probe_data->cxl = cxl;
+	}
 
 	return 0;
 }
 
 void efx_cxl_exit(struct efx_probe_data *probe_data)
 {
+	if (!probe_data->cxl)
+		return;
+
+	iounmap(probe_data->cxl->ctpio_cxl);
+	cxl_unregister_region(probe_data->cxl->efx_region);
 }
 
 MODULE_IMPORT_NS("CXL");

From 2cdf3a5021c1cc5747197c37705eec37c21e55fc Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:28 +0000
Subject: [PATCH 117/143] NVIDIA: VR: SAUCE: cxl: Define a driver interface for
 HPA free space enumeration

CXL region creation involves allocating capacity from Device Physical
Address (DPA) and assigning it to decode a given Host Physical Address
(HPA). Before determining how much DPA to allocate the amount of available
HPA must be determined. Also, not all HPA is created equal, some HPA
targets RAM, some targets PMEM, some is prepared for device-memory flows
like HDM-D and HDM-DB, and some is HDM-H (host-only).

In order to support Type2 CXL devices, wrap all of those concerns into
an API that retrieves a root decoder (platform CXL window) that fits the
specified constraints and the capacity available for a new region.

Add a complementary function for releasing the reference to such root
decoder.

Based on https://lore.kernel.org/linux-cxl/168592159290.1948938.13522227102445462976.stgit@dwillia2-xfh.jf.intel.com/

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 164 ++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |   3 +
 include/cxl/cxl.h         |   6 ++
 3 files changed, 173 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index b145b69e70bb6..400e5cadc3135 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -705,6 +705,170 @@ static int free_hpa(struct cxl_region *cxlr)
 	return 0;
 }
 
+struct cxlrd_max_context {
+	struct device * const *host_bridges;
+	int interleave_ways;
+	unsigned long flags;
+	resource_size_t max_hpa;
+	struct cxl_root_decoder *cxlrd;
+};
+
+static int find_max_hpa(struct device *dev, void *data)
+{
+	struct cxlrd_max_context *ctx = data;
+	struct cxl_switch_decoder *cxlsd;
+	struct cxl_root_decoder *cxlrd;
+	struct resource *res, *prev;
+	struct cxl_decoder *cxld;
+	resource_size_t free = 0;
+	resource_size_t max;
+	int found = 0;
+
+	if (!is_root_decoder(dev))
+		return 0;
+
+	cxlrd = to_cxl_root_decoder(dev);
+	cxlsd = &cxlrd->cxlsd;
+	cxld = &cxlsd->cxld;
+
+	if ((cxld->flags & ctx->flags) != ctx->flags) {
+		dev_dbg(dev, "flags not matching: %08lx vs %08lx\n",
+			cxld->flags, ctx->flags);
+		return 0;
+	}
+
+	for (int i = 0; i < ctx->interleave_ways; i++) {
+		for (int j = 0; j < ctx->interleave_ways; j++) {
+			if (ctx->host_bridges[i] == cxlsd->target[j]->dport_dev) {
+				found++;
+				break;
+			}
+		}
+	}
+
+	if (found != ctx->interleave_ways) {
+		dev_dbg(dev,
+			"Not enough host bridges. Found %d for %d interleave ways requested\n",
+			found, ctx->interleave_ways);
+		return 0;
+	}
+
+	/*
+	 * Walk the root decoder resource range relying on cxl_rwsem.region to
+	 * preclude sibling arrival/departure and find the largest free space
+	 * gap.
+	 */
+	lockdep_assert_held_read(&cxl_rwsem.region);
+	res = cxlrd->res->child;
+
+	/* With no resource child the whole parent resource is available */
+	if (!res)
+		max = resource_size(cxlrd->res);
+	else
+		max = 0;
+
+	for (prev = NULL; res; prev = res, res = res->sibling) {
+		if (!prev && res->start == cxlrd->res->start &&
+		    res->end == cxlrd->res->end) {
+			max = resource_size(cxlrd->res);
+			break;
+		}
+		/*
+		 * Sanity check for preventing arithmetic problems below as a
+		 * resource with size 0 could imply using the end field below
+		 * when set to unsigned zero - 1 or all f in hex.
+		 */
+		if (prev && !resource_size(prev))
+			continue;
+
+		if (!prev && res->start > cxlrd->res->start) {
+			free = res->start - cxlrd->res->start;
+			max = max(free, max);
+		}
+		if (prev && res->start > prev->end + 1) {
+			free = res->start - prev->end + 1;
+			max = max(free, max);
+		}
+	}
+
+	if (prev && prev->end + 1 < cxlrd->res->end + 1) {
+		free = cxlrd->res->end + 1 - prev->end + 1;
+		max = max(free, max);
+	}
+
+	dev_dbg(cxlrd_dev(cxlrd), "found %pa bytes of free space\n", &max);
+	if (max > ctx->max_hpa) {
+		if (ctx->cxlrd)
+			put_device(cxlrd_dev(ctx->cxlrd));
+		get_device(cxlrd_dev(cxlrd));
+		ctx->cxlrd = cxlrd;
+		ctx->max_hpa = max;
+	}
+	return 0;
+}
+
+/**
+ * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints
+ * @cxlmd: the mem device requiring the HPA
+ * @interleave_ways: number of entries in @host_bridges
+ * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and Type2 device
+ * @max_avail_contig: output parameter of max contiguous bytes available in the
+ *		      returned decoder
+ *
+ * Returns a pointer to a struct cxl_root_decoder
+ *
+ * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given
+ * in (@max_avail_contig))' is a point in time snapshot. If by the time the
+ * caller goes to use this decoder and its capacity is reduced then caller needs
+ * to loop and retry.
+ *
+ * The returned root decoder has an elevated reference count that needs to be
+ * put with cxl_put_root_decoder(cxlrd).
+ */
+struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd,
+					       int interleave_ways,
+					       unsigned long flags,
+					       resource_size_t *max_avail_contig)
+{
+	struct cxlrd_max_context ctx = {
+		.flags = flags,
+		.interleave_ways = interleave_ways,
+	};
+	struct cxl_port *root_port;
+	struct cxl_port *endpoint;
+
+	endpoint = cxlmd->endpoint;
+	if (!endpoint) {
+		dev_dbg(&cxlmd->dev, "endpoint not linked to memdev\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	ctx.host_bridges = &endpoint->host_bridge;
+
+	struct cxl_root *root __free(put_cxl_root) = find_cxl_root(endpoint);
+	if (!root) {
+		dev_dbg(&endpoint->dev, "endpoint is not related to a root port\n");
+		return ERR_PTR(-ENXIO);
+	}
+
+	root_port = &root->port;
+	scoped_guard(rwsem_read, &cxl_rwsem.region)
+		device_for_each_child(&root_port->dev, &ctx, find_max_hpa);
+
+	if (!ctx.cxlrd)
+		return ERR_PTR(-ENOMEM);
+
+	*max_avail_contig = ctx.max_hpa;
+	return ctx.cxlrd;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, "CXL");
+
+void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd)
+{
+	put_device(cxlrd_dev(cxlrd));
+}
+EXPORT_SYMBOL_NS_GPL(cxl_put_root_decoder, "CXL");
+
 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t len)
 {
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 944c5d1cccebe..c7d9b2c2908f8 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -706,6 +706,9 @@ struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
 struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
 struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev);
 bool is_root_decoder(struct device *dev);
+
+#define cxlrd_dev(cxlrd) (&(cxlrd)->cxlsd.cxld.dev)
+
 bool is_switch_decoder(struct device *dev);
 bool is_endpoint_decoder(struct device *dev);
 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 92880c26b2d52..834dc7e789347 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -255,4 +255,10 @@ struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd,
 struct range;
 int cxl_get_region_range(struct cxl_region *region, struct range *range);
 void cxl_unregister_region(struct cxl_region *cxlr);
+struct cxl_port;
+struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd,
+					       int interleave_ways,
+					       unsigned long flags,
+					       resource_size_t *max);
+void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd);
 #endif /* __CXL_CXL_H__ */

From a4b6f622bf3ac889d558add2d4350104d82222d0 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:29 +0000
Subject: [PATCH 118/143] NVIDIA: VR: SAUCE: sfc: get root decoder

Use cxl api for getting HPA (Host Physical Address) to use from a
CXL root decoder.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/cxl.h                  | 15 ---------------
 drivers/net/ethernet/sfc/Kconfig   |  1 +
 drivers/net/ethernet/sfc/efx_cxl.c | 26 +++++++++++++++++++++++---
 drivers/net/ethernet/sfc/efx_cxl.h |  1 +
 include/cxl/cxl.h                  | 15 +++++++++++++++
 5 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index c7d9b2c2908f8..d1b010e5e1d07 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -220,21 +220,6 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport);
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
 #define CXL_TARGET_STRLEN 20
 
-/*
- * cxl_decoder flags that define the type of memory / devices this
- * decoder supports as well as configuration lock status See "CXL 2.0
- * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
- * Additionally indicate whether decoder settings were autodetected,
- * user customized.
- */
-#define CXL_DECODER_F_RAM   BIT(0)
-#define CXL_DECODER_F_PMEM  BIT(1)
-#define CXL_DECODER_F_TYPE2 BIT(2)
-#define CXL_DECODER_F_TYPE3 BIT(3)
-#define CXL_DECODER_F_LOCK  BIT(4)
-#define CXL_DECODER_F_ENABLE    BIT(5)
-#define CXL_DECODER_F_MASK  GENMASK(5, 0)
-
 enum cxl_decoder_type {
 	CXL_DECODER_DEVMEM = 2,
 	CXL_DECODER_HOSTONLYMEM = 3,
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 979f2801e2a8e..e959d9b4f4cef 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -69,6 +69,7 @@ config SFC_MCDI_LOGGING
 config SFC_CXL
 	bool "Solarflare SFC9100-family CXL support"
 	depends on SFC && CXL_BUS >= SFC
+	depends on CXL_REGION
 	default SFC
 	help
 	  This enables SFC CXL support if the kernel is configuring CXL for
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 3536eccf1b2aa..1a4c1097c3152 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -18,6 +18,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 {
 	struct efx_nic *efx = &probe_data->efx;
 	struct pci_dev *pci_dev = efx->pci_dev;
+	resource_size_t max_size;
 	struct efx_cxl *cxl;
 	struct range range;
 	u16 dvsec;
@@ -110,9 +111,24 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 			return -ENOMEM;
 		}
 
-		probe_data->cxl = cxl;
+		cxl->hdm_was_committed = true;
+	} else {
+		cxl->cxlrd = cxl_get_hpa_freespace(cxl->cxlmd, 1, CXL_DECODER_F_RAM |
+						   CXL_DECODER_F_TYPE2, &max_size);
+		if (IS_ERR(cxl->cxlrd)) {
+			dev_err(&pci_dev->dev, "cxl_get_hpa_freespace failed\n");
+			return PTR_ERR(cxl->cxlrd);
+		}
+
+		if (max_size < EFX_CTPIO_BUFFER_SIZE) {
+			dev_err(&pci_dev->dev, "%s: not enough free HPA space %pap < %u\n",
+				__func__, &max_size, EFX_CTPIO_BUFFER_SIZE);
+			cxl_put_root_decoder(cxl->cxlrd);
+			return -ENOSPC;
+		}
 	}
 
+	probe_data->cxl = cxl;
 	return 0;
 }
 
@@ -121,8 +137,12 @@ void efx_cxl_exit(struct efx_probe_data *probe_data)
 	if (!probe_data->cxl)
 		return;
 
-	iounmap(probe_data->cxl->ctpio_cxl);
-	cxl_unregister_region(probe_data->cxl->efx_region);
+	if (probe_data->cxl->hdm_was_committed) {
+		iounmap(probe_data->cxl->ctpio_cxl);
+		cxl_unregister_region(probe_data->cxl->efx_region);
+	} else {
+		cxl_put_root_decoder(probe_data->cxl->cxlrd);
+	}
 }
 
 MODULE_IMPORT_NS("CXL");
diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h
index 961639cef692e..9a92e386695bb 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.h
+++ b/drivers/net/ethernet/sfc/efx_cxl.h
@@ -27,6 +27,7 @@ struct efx_cxl {
 	struct cxl_root_decoder *cxlrd;
 	struct cxl_port *endpoint;
 	struct cxl_endpoint_decoder *cxled;
+	bool hdm_was_committed;
 	struct cxl_region *efx_region;
 	void __iomem *ctpio_cxl;
 };
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 834dc7e789347..783ad570a6ebe 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -153,6 +153,21 @@ struct cxl_dpa_partition {
 
 #define CXL_NR_PARTITIONS_MAX 2
 
+/*
+ * cxl_decoder flags that define the type of memory / devices this
+ * decoder supports as well as configuration lock status See "CXL 2.0
+ * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details.
+ * Additionally indicate whether decoder settings were autodetected,
+ * user customized.
+ */
+#define CXL_DECODER_F_RAM   BIT(0)
+#define CXL_DECODER_F_PMEM  BIT(1)
+#define CXL_DECODER_F_TYPE2 BIT(2)
+#define CXL_DECODER_F_TYPE3 BIT(3)
+#define CXL_DECODER_F_LOCK  BIT(4)
+#define CXL_DECODER_F_ENABLE    BIT(5)
+#define CXL_DECODER_F_MASK  GENMASK(5, 0)
+
 struct cxl_memdev_attach {
 	int (*probe)(struct cxl_memdev *cxlmd);
 };

From 50ebf24971214621627b609f429dd80d2cc7eaaa Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:30 +0000
Subject: [PATCH 119/143] NVIDIA: VR: SAUCE: cxl: Define a driver interface for
 DPA allocation

Region creation involves finding available DPA (device-physical-address)
capacity to map into HPA (host-physical-address) space.

In order to support CXL Type2 devices, define an API, cxl_request_dpa(),
that tries to allocate the DPA memory the driver requires to operate.The
memory requested should not be bigger than the max available HPA obtained
previously with cxl_get_hpa_freespace().

Based on https://lore.kernel.org/linux-cxl/168592158743.1948938.7622563891193802610.stgit@dwillia2-xfh.jf.intel.com/

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/hdm.c | 84 ++++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h      |  1 +
 include/cxl/cxl.h      |  5 +++
 3 files changed, 90 insertions(+)

diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index b4bd3d91f1cfb..70da3daac3178 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -3,6 +3,7 @@
 #include <linux/seq_file.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <cxl/cxl.h>
 
 #include "cxlmem.h"
 #include "core.h"
@@ -546,6 +547,12 @@ bool cxl_resource_contains_addr(const struct resource *res, const resource_size_
 	return resource_contains(res, &_addr);
 }
 
+/**
+ * cxl_dpa_free - release DPA (Device Physical Address)
+ * @cxled: endpoint decoder linked to the DPA
+ *
+ * Returns 0 or error.
+ */
 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_port *port = cxled_to_port(cxled);
@@ -572,6 +579,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
 	devm_cxl_dpa_release(cxled);
 	return 0;
 }
+EXPORT_SYMBOL_NS_GPL(cxl_dpa_free, "CXL");
 
 int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
 		     enum cxl_partition_mode mode)
@@ -603,6 +611,82 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled,
 	return 0;
 }
 
+static int find_free_decoder(struct device *dev, const void *data)
+{
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_port *port;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	port = cxled_to_port(cxled);
+
+	return cxled->cxld.id == (port->hdm_end + 1);
+}
+
+static struct cxl_endpoint_decoder *
+cxl_find_free_decoder(struct cxl_memdev *cxlmd)
+{
+	struct cxl_port *endpoint = cxlmd->endpoint;
+	struct device *dev;
+
+	guard(rwsem_read)(&cxl_rwsem.dpa);
+	dev = device_find_child(&endpoint->dev, NULL,
+				find_free_decoder);
+	if (!dev)
+		return NULL;
+
+	return to_cxl_endpoint_decoder(dev);
+}
+
+/**
+ * cxl_request_dpa - search and reserve DPA given input constraints
+ * @cxlmd: memdev with an endpoint port with available decoders
+ * @mode: CXL partition mode (ram vs pmem)
+ * @alloc: dpa size required
+ *
+ * Returns a pointer to a 'struct cxl_endpoint_decoder' on success or
+ * an errno encoded pointer on failure.
+ *
+ * Given that a region needs to allocate from limited HPA capacity it
+ * may be the case that a device has more mappable DPA capacity than
+ * available HPA. The expectation is that @alloc is a driver known
+ * value based on the device capacity but which could not be fully
+ * available due to HPA constraints.
+ *
+ * Returns a pinned cxl_decoder with at least @alloc bytes of capacity
+ * reserved, or an error pointer. The caller is also expected to own the
+ * lifetime of the memdev registration associated with the endpoint to
+ * pin the decoder registered as well.
+ */
+struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
+					     enum cxl_partition_mode mode,
+					     resource_size_t alloc)
+{
+	int rc;
+
+	if (!IS_ALIGNED(alloc, SZ_256M))
+		return ERR_PTR(-EINVAL);
+
+	struct cxl_endpoint_decoder *cxled __free(put_cxled) =
+		cxl_find_free_decoder(cxlmd);
+
+	if (!cxled)
+		return ERR_PTR(-ENODEV);
+
+	rc = cxl_dpa_set_part(cxled, mode);
+	if (rc)
+		return ERR_PTR(rc);
+
+	rc = cxl_dpa_alloc(cxled, alloc);
+	if (rc)
+		return ERR_PTR(rc);
+
+	return no_free_ptr(cxled);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_request_dpa, "CXL");
+
 static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size)
 {
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d1b010e5e1d07..2b1f7d687a0e8 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -667,6 +667,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port);
 
 DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev))
 DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
+DEFINE_FREE(put_cxled, struct cxl_endpoint_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxld.dev))
 DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev))
 DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev))
 
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 783ad570a6ebe..4802371db00e0 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -7,6 +7,7 @@
 
 #include <linux/node.h>
 #include <linux/ioport.h>
+#include <linux/range.h>
 #include <cxl/mailbox.h>
 
 /**
@@ -276,4 +277,8 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd,
 					       unsigned long flags,
 					       resource_size_t *max);
 void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd);
+struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
+					     enum cxl_partition_mode mode,
+					     resource_size_t alloc);
+int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
 #endif /* __CXL_CXL_H__ */

From f5deab37ee296dd3c28b9755adb3a92e674819a1 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:31 +0000
Subject: [PATCH 120/143] NVIDIA: VR: SAUCE: sfc: get endpoint decoder

Use cxl api for getting DPA (Device Physical Address) to use through an
endpoint decoder.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>
Acked-by: Edward Cree <ecree.xilinx@gmail.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/efx_cxl.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 1a4c1097c3152..2cfd0a46225f9 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -126,6 +126,14 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 			cxl_put_root_decoder(cxl->cxlrd);
 			return -ENOSPC;
 		}
+
+		cxl->cxled = cxl_request_dpa(cxl->cxlmd, CXL_PARTMODE_RAM,
+					     EFX_CTPIO_BUFFER_SIZE);
+		if (IS_ERR(cxl->cxled)) {
+			pci_err(pci_dev, "CXL accel request DPA failed");
+			cxl_put_root_decoder(cxl->cxlrd);
+			return PTR_ERR(cxl->cxled);
+		}
 	}
 
 	probe_data->cxl = cxl;
@@ -141,6 +149,7 @@ void efx_cxl_exit(struct efx_probe_data *probe_data)
 		iounmap(probe_data->cxl->ctpio_cxl);
 		cxl_unregister_region(probe_data->cxl->efx_region);
 	} else {
+		cxl_dpa_free(probe_data->cxl->cxled);
 		cxl_put_root_decoder(probe_data->cxl->cxlrd);
 	}
 }

From 26af67c48ae74bf40b0b3cf63675ca6386bdef75 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:32 +0000
Subject: [PATCH 121/143] NVIDIA: VR: SAUCE: cxl: Make region type based on
 endpoint type

Current code is expecting Type3 or CXL_DECODER_HOSTONLYMEM devices only.
Support for Type2 implies region type needs to be based on the endpoint
type HDM-D[B] instead.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Zhi Wang <zhiw@nvidia.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Reviewed-by: Davidlohr Bueso <daves@stgolabs.net>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 400e5cadc3135..f72988b355ec9 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2835,7 +2835,8 @@ static ssize_t create_ram_region_show(struct device *dev,
 }
 
 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
-					  enum cxl_partition_mode mode, int id)
+					  enum cxl_partition_mode mode, int id,
+					  enum cxl_decoder_type target_type)
 {
 	int rc;
 
@@ -2857,7 +2858,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
 		return ERR_PTR(-EBUSY);
 	}
 
-	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
+	return devm_cxl_add_region(cxlrd, id, mode, target_type);
 }
 
 static ssize_t create_region_store(struct device *dev, const char *buf,
@@ -2871,7 +2872,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf,
 	if (rc != 1)
 		return -EINVAL;
 
-	cxlr = __create_region(cxlrd, mode, id);
+	cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM);
 	if (IS_ERR(cxlr))
 		return PTR_ERR(cxlr);
 
@@ -4046,7 +4047,8 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 
 	do {
 		cxlr = __create_region(cxlrd, cxlds->part[part].mode,
-				       atomic_read(&cxlrd->region_id));
+				       atomic_read(&cxlrd->region_id),
+				       cxled->cxld.target_type);
 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
 
 	if (IS_ERR(cxlr)) {

From d86e720921aa53b83a3f9da2caa938b022060e89 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:33 +0000
Subject: [PATCH 122/143] NVIDIA: VR: SAUCE: cxl/region: Factor out interleave
 ways setup

Region creation based on Type3 devices is triggered from user space
allowing memory combination through interleaving.

In preparation for kernel driven region creation, that is Type2 drivers
triggering region creation backed with its advertised CXL memory, factor
out a common helper from the user-sysfs region setup for interleave ways.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Zhi Wang <zhiw@nvidia.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 43 ++++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f72988b355ec9..637914e320c7a 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -485,22 +485,14 @@ static ssize_t interleave_ways_show(struct device *dev,
 
 static const struct attribute_group *get_cxl_region_target_group(void);
 
-static ssize_t interleave_ways_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t len)
+static int set_interleave_ways(struct cxl_region *cxlr, int val)
 {
-	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
-	struct cxl_region *cxlr = to_cxl_region(dev);
 	struct cxl_region_params *p = &cxlr->params;
-	unsigned int val, save;
-	int rc;
+	int save, rc;
 	u8 iw;
 
-	rc = kstrtouint(buf, 0, &val);
-	if (rc)
-		return rc;
-
 	rc = ways_to_eiw(val, &iw);
 	if (rc)
 		return rc;
@@ -515,9 +507,7 @@ static ssize_t interleave_ways_store(struct device *dev,
 		return -EINVAL;
 	}
 
-	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
-	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
-		return rc;
+	lockdep_assert_held_write(&cxl_rwsem.region);
 
 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
 		return -EBUSY;
@@ -525,10 +515,31 @@ static ssize_t interleave_ways_store(struct device *dev,
 	save = p->interleave_ways;
 	p->interleave_ways = val;
 	rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
-	if (rc) {
+	if (rc)
 		p->interleave_ways = save;
+
+	return rc;
+}
+
+static ssize_t interleave_ways_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	unsigned int val;
+	int rc;
+
+	rc = kstrtouint(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+		return rc;
+
+	rc = set_interleave_ways(cxlr, val);
+	if (rc)
 		return rc;
-	}
 
 	return len;
 }

From e738dac445cc8e3ef0070976ffacd2d0cd5bcc71 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:34 +0000
Subject: [PATCH 123/143] NVIDIA: VR: SAUCE: cxl/region: Factor out interleave
 granularity setup

Region creation based on Type3 devices is triggered from user space
allowing memory combination through interleaving.

In preparation for kernel driven region creation, that is Type2 drivers
triggering region creation backed with its advertised CXL memory, factor
out a common helper from the user-sysfs region setup forinterleave
granularity.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Zhi Wang <zhiw@nvidia.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 637914e320c7a..a1003994a5564 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -559,21 +559,14 @@ static ssize_t interleave_granularity_show(struct device *dev,
 	return sysfs_emit(buf, "%d\n", p->interleave_granularity);
 }
 
-static ssize_t interleave_granularity_store(struct device *dev,
-					    struct device_attribute *attr,
-					    const char *buf, size_t len)
+static int set_interleave_granularity(struct cxl_region *cxlr, int val)
 {
-	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
-	struct cxl_region *cxlr = to_cxl_region(dev);
 	struct cxl_region_params *p = &cxlr->params;
-	int rc, val;
+	int rc;
 	u16 ig;
 
-	rc = kstrtoint(buf, 0, &val);
-	if (rc)
-		return rc;
-
 	rc = granularity_to_eig(val, &ig);
 	if (rc)
 		return rc;
@@ -589,14 +582,32 @@ static ssize_t interleave_granularity_store(struct device *dev,
 	if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
 		return -EINVAL;
 
-	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
-	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
-		return rc;
-
+	lockdep_assert_held_write(&cxl_rwsem.region);
 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
 		return -EBUSY;
 
 	p->interleave_granularity = val;
+	return 0;
+}
+
+static ssize_t interleave_granularity_store(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	int rc, val;
+
+	rc = kstrtoint(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+		return rc;
+
+	rc = set_interleave_granularity(cxlr, val);
+	if (rc)
+		return rc;
 
 	return len;
 }

From 5fff05702af15078130e886097b63c1e606c4ec7 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:35 +0000
Subject: [PATCH 124/143] NVIDIA: VR: SAUCE: cxl: Allow region creation by
 type2 drivers

Creating a CXL region requires userspace intervention through the cxl
sysfs files. Type2 support should allow accelerator drivers to create
such cxl region from kernel code.

Adding that functionality and integrating it with current support for
memory expanders.

Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
[jan: Resolve minor conflict due to code lines shift]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 131 ++++++++++++++++++++++++++++++++++++--
 include/cxl/cxl.h         |   3 +
 2 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index a1003994a5564..b8450895e8d15 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2946,6 +2946,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
 	return to_cxl_region(region_dev);
 }
 
+static void drop_region(struct cxl_region *cxlr)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_port *port = cxlrd_to_port(cxlrd);
+
+	devm_release_action(port->uport_dev, __unregister_region, cxlr);
+}
+
 static ssize_t delete_region_store(struct device *dev,
 				   struct device_attribute *attr,
 				   const char *buf, size_t len)
@@ -4049,14 +4057,12 @@ static int __construct_region(struct cxl_region *cxlr,
 	return 0;
 }
 
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
-					   struct cxl_endpoint_decoder *cxled)
+static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
+						 struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
-	struct cxl_port *port = cxlrd_to_port(cxlrd);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
-	int rc, part = READ_ONCE(cxled->part);
+	int part = READ_ONCE(cxled->part);
 	struct cxl_region *cxlr;
 
 	if (part < 0 || part >= cxlds->nr_partitions) {
@@ -4073,13 +4079,26 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 				       cxled->cxld.target_type);
 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
 
-	if (IS_ERR(cxlr)) {
+	if (IS_ERR(cxlr))
 		dev_err(cxlmd->dev.parent,
 			"%s:%s: %s failed assign region: %ld\n",
 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
 			__func__, PTR_ERR(cxlr));
+
+	return cxlr;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+					   struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_port *port = cxlrd_to_port(cxlrd);
+	struct cxl_region *cxlr;
+	int rc;
+
+	cxlr = construct_region_begin(cxlrd, cxled);
+	if (IS_ERR(cxlr))
 		return cxlr;
-	}
 
 	rc = __construct_region(cxlr, cxlrd, cxled);
 	if (rc) {
@@ -4090,6 +4109,104 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	return cxlr;
 }
 
+DEFINE_FREE(cxl_region_drop, struct cxl_region *, if (_T) drop_region(_T))
+
+static struct cxl_region *
+__construct_new_region(struct cxl_root_decoder *cxlrd,
+		       struct cxl_endpoint_decoder **cxled, int ways)
+{
+	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]);
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct cxl_region_params *p;
+	resource_size_t size = 0;
+	int rc, i;
+
+	struct cxl_region *cxlr __free(cxl_region_drop) =
+		construct_region_begin(cxlrd, cxled[0]);
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	guard(rwsem_write)(&cxl_rwsem.region);
+
+	/*
+	 * Sanity check. This should not happen with an accel driver handling
+	 * the region creation.
+	 */
+	p = &cxlr->params;
+	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+		dev_err(cxlmd->dev.parent,
+			"%s:%s: %s  unexpected region state\n",
+			dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev),
+			__func__);
+		return ERR_PTR(-EBUSY);
+	}
+
+	rc = set_interleave_ways(cxlr, ways);
+	if (rc)
+		return ERR_PTR(rc);
+
+	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
+	if (rc)
+		return ERR_PTR(rc);
+
+	scoped_guard(rwsem_read, &cxl_rwsem.dpa) {
+		for (i = 0; i < ways; i++) {
+			if (!cxled[i]->dpa_res)
+				return ERR_PTR(-EINVAL);
+			size += resource_size(cxled[i]->dpa_res);
+		}
+
+		rc = alloc_hpa(cxlr, size);
+		if (rc)
+			return ERR_PTR(rc);
+
+		for (i = 0; i < ways; i++) {
+			rc = cxl_region_attach(cxlr, cxled[i], 0);
+			if (rc)
+				return ERR_PTR(rc);
+		}
+	}
+
+	rc = cxl_region_decode_commit(cxlr);
+	if (rc)
+		return ERR_PTR(rc);
+
+	p->state = CXL_CONFIG_COMMIT;
+
+	return no_free_ptr(cxlr);
+}
+
+/**
+ * cxl_create_region - Establish a region given an endpoint decoder
+ * @cxlrd: root decoder to allocate HPA
+ * @cxled: endpoint decoders with reserved DPA capacity
+ * @ways: interleave ways required
+ *
+ * Returns a fully formed region in the commit state and attached to the
+ * cxl_region driver.
+ */
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder **cxled,
+				     int ways)
+{
+	struct cxl_region *cxlr;
+
+	mutex_lock(&cxlrd->range_lock);
+	cxlr = __construct_new_region(cxlrd, cxled, ways);
+	mutex_unlock(&cxlrd->range_lock);
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	if (device_attach(&cxlr->dev) <= 0) {
+		dev_err(&cxlr->dev, "failed to create region\n");
+		drop_region(cxlr);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return cxlr;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_create_region, "CXL");
+
 static struct cxl_region *
 cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa)
 {
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 4802371db00e0..50acbd13bcf85 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -281,4 +281,7 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
 					     enum cxl_partition_mode mode,
 					     resource_size_t alloc);
 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder **cxled,
+				     int ways);
 #endif /* __CXL_CXL_H__ */

From 6750cb7eb68749baa3fed1c64c5c1b5e1dad89d0 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:36 +0000
Subject: [PATCH 125/143] NVIDIA: VR: SAUCE: cxl: Avoid dax creation for
 accelerators

By definition a type2 cxl device will use the host managed memory for
specific functionality, therefore it should not be available to other
uses.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Davidlohr Bueso <daves@stgolabs.net>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index b8450895e8d15..5a9857ec226d1 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -4451,6 +4451,13 @@ static int cxl_region_probe(struct device *dev)
 	if (rc)
 		return rc;
 
+	/*
+	 * HDM-D[B] (device-memory) regions have accelerator specific usage.
+	 * Skip device-dax registration.
+	 */
+	if (cxlr->type == CXL_DECODER_DEVMEM)
+		return 0;
+
 	/*
 	 * From this point on any path that changes the region's state away from
 	 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.

From 0231daf04c229c2d6a2ebdcc33b21b13d9d5de01 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:37 +0000
Subject: [PATCH 126/143] NVIDIA: VR: SAUCE: sfc: create cxl region

Use cxl api for creating a region using the endpoint decoder related to
a DPA range.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/efx_cxl.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 2cfd0a46225f9..4d5f3974e51dc 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -134,6 +134,14 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 			cxl_put_root_decoder(cxl->cxlrd);
 			return PTR_ERR(cxl->cxled);
 		}
+
+		cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1);
+		if (IS_ERR(cxl->efx_region)) {
+			pci_err(pci_dev, "CXL accel create region failed");
+			cxl_put_root_decoder(cxl->cxlrd);
+			cxl_dpa_free(cxl->cxled);
+			return PTR_ERR(cxl->efx_region);
+		}
 	}
 
 	probe_data->cxl = cxl;
@@ -147,11 +155,11 @@ void efx_cxl_exit(struct efx_probe_data *probe_data)
 
 	if (probe_data->cxl->hdm_was_committed) {
 		iounmap(probe_data->cxl->ctpio_cxl);
-		cxl_unregister_region(probe_data->cxl->efx_region);
 	} else {
 		cxl_dpa_free(probe_data->cxl->cxled);
 		cxl_put_root_decoder(probe_data->cxl->cxlrd);
 	}
+	cxl_unregister_region(probe_data->cxl->efx_region);
 }
 
 MODULE_IMPORT_NS("CXL");

From 36d7e3e41b178fb95d612397ff954c353131e128 Mon Sep 17 00:00:00 2001
From: Alejandro Lucero <alucerop@amd.com>
Date: Sun, 1 Feb 2026 15:54:38 +0000
Subject: [PATCH 127/143] NVIDIA: VR: SAUCE: sfc: support pio mapping based on
 cxl

A PIO buffer is a region of device memory to which the driver can write a
packet for TX, with the device handling the transmit doorbell without
requiring a DMA for getting the packet data, which helps reducing latency
in certain exchanges. With CXL mem protocol this latency can be lowered
further.

With a device supporting CXL and successfully initialised, use the cxl
region to map the memory range and use this mapping for PIO buffers.

Add the disabling of those CXL-based PIO buffers if the callback for
potential cxl endpoint removal by the CXL code happens.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
(backported from https://lore.kernel.org/linux-cxl/20260201155438.2664640-1-alejandro.lucero-palau@amd.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/net/ethernet/sfc/ef10.c       | 50 +++++++++++++++++++++++----
 drivers/net/ethernet/sfc/efx_cxl.c    | 33 ++++++++++++++----
 drivers/net/ethernet/sfc/net_driver.h |  2 ++
 drivers/net/ethernet/sfc/nic.h        |  3 ++
 4 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index fcec81f862ec5..2bb6d3136c7c3 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -24,6 +24,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <net/udp_tunnel.h>
+#include "efx_cxl.h"
 
 /* Hardware control for EF10 architecture including 'Huntington'. */
 
@@ -106,7 +107,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx)
 
 static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 {
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V4_OUT_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V7_OUT_LEN);
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	size_t outlen;
 	int rc;
@@ -177,6 +178,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 			  efx->num_mac_stats);
 	}
 
+	if (outlen < MC_CMD_GET_CAPABILITIES_V7_OUT_LEN)
+		nic_data->datapath_caps3 = 0;
+	else
+		nic_data->datapath_caps3 = MCDI_DWORD(outbuf,
+						      GET_CAPABILITIES_V7_OUT_FLAGS3);
+
 	return 0;
 }
 
@@ -919,6 +926,9 @@ static void efx_ef10_forget_old_piobufs(struct efx_nic *efx)
 static void efx_ef10_remove(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+#ifdef CONFIG_SFC_CXL
+	struct efx_probe_data *probe_data;
+#endif
 	int rc;
 
 #ifdef CONFIG_SFC_SRIOV
@@ -949,7 +959,12 @@ static void efx_ef10_remove(struct efx_nic *efx)
 
 	efx_mcdi_rx_free_indir_table(efx);
 
+#ifdef CONFIG_SFC_CXL
+	probe_data = container_of(efx, struct efx_probe_data, efx);
+	if (nic_data->wc_membase && !probe_data->cxl_pio_in_use)
+#else
 	if (nic_data->wc_membase)
+#endif
 		iounmap(nic_data->wc_membase);
 
 	rc = efx_mcdi_free_vis(efx);
@@ -1140,6 +1155,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	unsigned int channel_vis, pio_write_vi_base, max_vis;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	unsigned int uc_mem_map_size, wc_mem_map_size;
+#ifdef CONFIG_SFC_CXL
+	struct efx_probe_data *probe_data;
+#endif
 	void __iomem *membase;
 	int rc;
 
@@ -1263,8 +1281,25 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	iounmap(efx->membase);
 	efx->membase = membase;
 
-	/* Set up the WC mapping if needed */
-	if (wc_mem_map_size) {
+	if (!wc_mem_map_size)
+		goto skip_pio;
+
+	/* Set up the WC mapping */
+
+#ifdef CONFIG_SFC_CXL
+	probe_data = container_of(efx, struct efx_probe_data, efx);
+	if ((nic_data->datapath_caps3 &
+	    (1 << MC_CMD_GET_CAPABILITIES_V7_OUT_CXL_CONFIG_ENABLE_LBN)) &&
+	    probe_data->cxl_pio_initialised) {
+		/* Using PIO through CXL mapping? */
+		nic_data->pio_write_base = probe_data->cxl->ctpio_cxl +
+					   (pio_write_vi_base * efx->vi_stride +
+					    ER_DZ_TX_PIOBUF - uc_mem_map_size);
+		probe_data->cxl_pio_in_use = true;
+	} else
+#endif
+	{
+		/* Using legacy PIO BAR mapping */
 		nic_data->wc_membase = ioremap_wc(efx->membase_phys +
 						  uc_mem_map_size,
 						  wc_mem_map_size);
@@ -1279,12 +1314,13 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 			nic_data->wc_membase +
 			(pio_write_vi_base * efx->vi_stride + ER_DZ_TX_PIOBUF -
 			 uc_mem_map_size);
-
-		rc = efx_ef10_link_piobufs(efx);
-		if (rc)
-			efx_ef10_free_piobufs(efx);
 	}
 
+	rc = efx_ef10_link_piobufs(efx);
+	if (rc)
+		efx_ef10_free_piobufs(efx);
+
+skip_pio:
 	netif_dbg(efx, probe, efx->net_dev,
 		  "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n",
 		  &efx->membase_phys, efx->membase, uc_mem_map_size,
diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c
index 4d5f3974e51dc..c13e1f2bf7eaf 100644
--- a/drivers/net/ethernet/sfc/efx_cxl.c
+++ b/drivers/net/ethernet/sfc/efx_cxl.c
@@ -11,6 +11,7 @@
 #include <cxl/pci.h>
 #include "net_driver.h"
 #include "efx_cxl.h"
+#include "efx.h"
 
 #define EFX_CTPIO_BUFFER_SIZE	SZ_256M
 
@@ -138,14 +139,34 @@ int efx_cxl_init(struct efx_probe_data *probe_data)
 		cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1);
 		if (IS_ERR(cxl->efx_region)) {
 			pci_err(pci_dev, "CXL accel create region failed");
-			cxl_put_root_decoder(cxl->cxlrd);
-			cxl_dpa_free(cxl->cxled);
-			return PTR_ERR(cxl->efx_region);
+			rc = PTR_ERR(cxl->efx_region);
+			goto err_region;
+		}
+
+		rc = cxl_get_region_range(cxl->efx_region, &range);
+		if (rc) {
+			pci_err(pci_dev, "CXL getting regions params failed");
+			goto err_map;
+		}
+
+		cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1);
+		if (!cxl->ctpio_cxl) {
+			pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range);
+			rc = -ENOMEM;
+			goto err_map;
 		}
 	}
 
 	probe_data->cxl = cxl;
+	probe_data->cxl_pio_initialised = true;
 	return 0;
+
+err_map:
+	cxl_unregister_region(cxl->efx_region);
+err_region:
+	cxl_put_root_decoder(cxl->cxlrd);
+	cxl_dpa_free(cxl->cxled);
+	return rc;
 }
 
 void efx_cxl_exit(struct efx_probe_data *probe_data)
@@ -153,9 +174,9 @@ void efx_cxl_exit(struct efx_probe_data *probe_data)
 	if (!probe_data->cxl)
 		return;
 
-	if (probe_data->cxl->hdm_was_committed) {
-		iounmap(probe_data->cxl->ctpio_cxl);
-	} else {
+	iounmap(probe_data->cxl->ctpio_cxl);
+
+	if (!probe_data->cxl->hdm_was_committed) {
 		cxl_dpa_free(probe_data->cxl->cxled);
 		cxl_put_root_decoder(probe_data->cxl->cxlrd);
 	}
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 3964b2c56609c..bea4eecdf842d 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1207,6 +1207,7 @@ struct efx_cxl;
  * @efx: Efx NIC details
  * @cxl: details of related cxl objects
  * @cxl_pio_initialised: cxl initialization outcome.
+ * @cxl_pio_in_use: PIO using CXL mapping
  */
 struct efx_probe_data {
 	struct pci_dev *pci_dev;
@@ -1214,6 +1215,7 @@ struct efx_probe_data {
 #ifdef CONFIG_SFC_CXL
 	struct efx_cxl *cxl;
 	bool cxl_pio_initialised;
+	bool cxl_pio_in_use;
 #endif
 };
 
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 9fa5c4c713abd..c87cc9214690b 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -152,6 +152,8 @@ enum {
  *	%MC_CMD_GET_CAPABILITIES response)
  * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of
  * %MC_CMD_GET_CAPABILITIES response)
+ * @datapath_caps3: Further Capabilities of datapath firmware (FLAGS3 field of
+ * %MC_CMD_GET_CAPABILITIES response)
  * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
  * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
  * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot
@@ -186,6 +188,7 @@ struct efx_ef10_nic_data {
 	bool must_check_datapath_caps;
 	u32 datapath_caps;
 	u32 datapath_caps2;
+	u32 datapath_caps3;
 	unsigned int rx_dpcpu_fw_id;
 	unsigned int tx_dpcpu_fw_id;
 	bool must_probe_vswitching;

From 7460ed22b18dd9f22512081460045b518aa88a08 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@amd.com>
Date: Tue, 28 Oct 2025 10:47:53 +0100
Subject: [PATCH 128/143] NVIDIA: VR: SAUCE: cxl/region: Support multi-level
 interleaving with smaller granularities for lower levels

The CXL specification supports multi-level interleaving "as long as
all the levels use different, but consecutive, HPA bits to select the
target and no Interleave Set has more than 8 devices" (from 3.2).

Currently the kernel expects that a decoder's "interleave granularity
is a multiple of @parent_port granularity". That is, the granularity
of a lower level is bigger than those of the parent and uses the outer
HPA bits as selector. It works e.g. for the following 8-way config:

 * cross-link (cross-hostbridge config in CFMWS):
   * 4-way
   * 256 granularity
   * Selector: HPA[8:9]
 * sub-link (CXL Host bridge config of the HDM):
   * 2-way
   * 1024 granularity
   * Selector: HPA[10]

Now, if the outer HPA bits are used for the cross-hostbridge, an 8-way
config could look like this:

 * cross-link (cross-hostbridge config in CFMWS):
   * 4-way
   * 512 granularity
   * Selector: HPA[9:10]
 * sub-link (CXL Host bridge config of the HDM):
   * 2-way
   * 256 granularity
   * Selector: HPA[8]

The enumeration of decoders for this configuration fails then with
following error:

 cxl region0: pci0000:00:port1 cxl_port_setup_targets expected iw: 2 ig: 1024 [mem 0x10000000000-0x1ffffffffff flags 0x200]
 cxl region0: pci0000:00:port1 cxl_port_setup_targets got iw: 2 ig: 256 state: enabled 0x10000000000:0x1ffffffffff
 cxl_port endpoint12: failed to attach decoder12.0 to region0: -6

Note that this happens only if firmware is setting up the decoders
(CXL_REGION_F_AUTO). For userspace region assembly the granularities
are chosen to increase from root down to the lower levels. That is,
outer HPA bits are always used for lower interleaving levels.

Rework the implementation to also support multi-level interleaving
with smaller granularities for lower levels. Determine the interleave
set of autodetected decoders. Check that it is a subset of the root
interleave.

The HPA selector bits are extracted for all decoders of the set and
checked that there is no overlap and bits are consecutive. All
decoders can be programmed now to use any bit range within the
region's target selector.

Signed-off-by: Robert Richter <rrichter@amd.com>
(backported from https://lore.kernel.org/all/20251028094754.72816-1-rrichter@amd.com/)
[jan: Resolved minor conflicts]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/region.c | 201 ++++++++++++++++++++------------------
 1 file changed, 108 insertions(+), 93 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 5a9857ec226d1..b66d663deb8fd 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1555,57 +1555,119 @@ static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
 	return 0;
 }
 
+static inline u64 get_selector(u64 ways, u64 gran)
+{
+	if (!is_power_of_2(ways))
+		ways /= 3;
+
+	if (!is_power_of_2(ways) || !is_power_of_2(gran))
+		return 0;
+
+	return (ways - 1) * gran;
+}
+
 static int cxl_port_setup_targets(struct cxl_port *port,
 				  struct cxl_region *cxlr,
 				  struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
-	int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
 	struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
 	struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
 	struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
 	struct cxl_region_params *p = &cxlr->params;
 	struct cxl_decoder *cxld = cxl_rr->decoder;
-	struct cxl_switch_decoder *cxlsd;
+	struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(&cxld->dev);
 	struct cxl_port *iter = port;
-	u16 eig, peig;
-	u8 eiw, peiw;
+	int ig, iw = cxl_rr->nr_targets, rc, pos = cxled->pos;
+	int distance, parent_distance;
+	u64 selector, cxlr_sel;
+	u16 eig;
+	u8 eiw;
 
 	/*
 	 * While root level decoders support x3, x6, x12, switch level
 	 * decoders only support powers of 2 up to x16.
 	 */
-	if (!is_power_of_2(cxl_rr->nr_targets)) {
+	if (!is_power_of_2(iw)) {
 		dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
-			dev_name(port->uport_dev), dev_name(&port->dev),
-			cxl_rr->nr_targets);
+			dev_name(port->uport_dev), dev_name(&port->dev), iw);
 		return -EINVAL;
 	}
 
-	cxlsd = to_cxl_switch_decoder(&cxld->dev);
-	if (cxl_rr->nr_targets_set) {
-		int i, distance = 1;
-		struct cxl_region_ref *cxl_rr_iter;
+	if (iw > 8 || iw > cxlsd->nr_targets) {
+		dev_dbg(&cxlr->dev,
+			"%s:%s:%s: ways: %d overflows targets: %d\n",
+			dev_name(port->uport_dev), dev_name(&port->dev),
+			dev_name(&cxld->dev), iw, cxlsd->nr_targets);
+		return -ENXIO;
+	}
 
-		/*
-		 * The "distance" between peer downstream ports represents which
-		 * endpoint positions in the region interleave a given port can
-		 * host.
-		 *
-		 * For example, at the root of a hierarchy the distance is
-		 * always 1 as every index targets a different host-bridge. At
-		 * each subsequent switch level those ports map every Nth region
-		 * position where N is the width of the switch == distance.
-		 */
-		do {
-			cxl_rr_iter = cxl_rr_load(iter, cxlr);
-			distance *= cxl_rr_iter->nr_targets;
-			iter = to_cxl_port(iter->dev.parent);
-		} while (!is_cxl_root(iter));
-		distance *= cxlrd->cxlsd.cxld.interleave_ways;
+	/*
+	 * Calculate the effective granularity and ways to determine
+	 * HPA bits used as target selectors of the interleave set.
+	 * Use this to check if the root decoder and all subsequent
+	 * HDM decoders only use bits from that range as selectors.
+	 *
+	 * The "distance" between peer downstream ports represents which
+	 * endpoint positions in the region interleave a given port can
+	 * host.
+	 *
+	 * For example, at the root of a hierarchy the distance is
+	 * always 1 as every index targets a different host-bridge. At
+	 * each subsequent switch level those ports map every Nth region
+	 * position where N is the width of the switch == distance.
+	 */
+
+	/* Start with the root decoders selector and distance. */
+	selector = get_selector(cxlrd->cxlsd.cxld.interleave_ways,
+				cxlrd->cxlsd.cxld.interleave_granularity);
+	distance = cxlrd->cxlsd.cxld.interleave_ways;
+	if (!is_power_of_2(distance))
+		distance /= 3;
+
+	for (iter = parent_port; !is_cxl_root(iter);
+	     iter = to_cxl_port(iter->dev.parent)) {
+		struct cxl_region_ref *cxl_rr_iter = cxl_rr_load(iter, cxlr);
+		struct cxl_decoder *cxld_iter = cxl_rr_iter->decoder;
+		u64 cxld_sel;
+
+		if (cxld_iter->interleave_ways == 1)
+			continue;
+
+		cxld_sel = get_selector(cxld_iter->interleave_ways,
+					cxld_iter->interleave_granularity);
+
+		if (cxld_sel & selector) {
+			dev_dbg(&cxlr->dev, "%s:%s: overlapping selectors: %#llx:%#llx\n",
+				dev_name(iter->uport_dev),
+				dev_name(&iter->dev), cxld_sel, selector);
+			return -ENXIO;
+		}
 
-		for (i = 0; i < cxl_rr->nr_targets_set; i++)
+		selector |= cxld_sel;
+		distance *= cxl_rr_iter->nr_targets;
+	}
+
+	parent_distance = distance;
+	distance *= iw;
+
+	/* The combined selector bits must fit the region selector. */
+	cxlr_sel = get_selector(p->interleave_ways,
+				p->interleave_granularity);
+
+	if ((cxlr_sel & selector) != selector) {
+		dev_dbg(&cxlr->dev, "%s:%s: invalid selectors: %#llx:%#llx\n",
+			dev_name(iter->uport_dev),
+			dev_name(&iter->dev), cxlr_sel, selector);
+		return -ENXIO;
+	}
+
+	/* Calculate remaining selector bits available for use. */
+	selector = cxlr_sel & ~selector;
+
+	if (cxl_rr->nr_targets_set) {
+		for (int i = 0; i < cxl_rr->nr_targets_set; i++)
 			if (ep->dport == cxlsd->target[i]) {
 				rc = check_last_peer(cxled, ep, cxl_rr,
 						     distance);
@@ -1616,87 +1678,40 @@ static int cxl_port_setup_targets(struct cxl_port *port,
 		goto add_target;
 	}
 
-	if (is_cxl_root(parent_port)) {
+	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
+		ig = cxld->interleave_granularity;
+	else
 		/*
+		 * Set the interleave granularity with each interleave
+		 * level to a multiple of it's parent port interleave
+		 * ways. Beginning with the granularity of the root
+		 * decoder set to the region granularity (starting
+		 * with the inner selector bits of the HPA), the
+		 * granularity is increased with each level. Calculate
+		 * this using the parent distance and region
+		 * granularity.
+		 *
 		 * Root decoder IG is always set to value in CFMWS which
 		 * may be different than this region's IG.  We can use the
 		 * region's IG here since interleave_granularity_store()
 		 * does not allow interleaved host-bridges with
 		 * root IG != region IG.
 		 */
-		parent_ig = p->interleave_granularity;
-		parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
-		/*
-		 * For purposes of address bit routing, use power-of-2 math for
-		 * switch ports.
-		 */
-		if (!is_power_of_2(parent_iw))
-			parent_iw /= 3;
-	} else {
-		struct cxl_region_ref *parent_rr;
-		struct cxl_decoder *parent_cxld;
-
-		parent_rr = cxl_rr_load(parent_port, cxlr);
-		parent_cxld = parent_rr->decoder;
-		parent_ig = parent_cxld->interleave_granularity;
-		parent_iw = parent_cxld->interleave_ways;
-	}
-
-	rc = granularity_to_eig(parent_ig, &peig);
-	if (rc) {
-		dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
-			dev_name(parent_port->uport_dev),
-			dev_name(&parent_port->dev), parent_ig);
-		return rc;
-	}
-
-	rc = ways_to_eiw(parent_iw, &peiw);
-	if (rc) {
-		dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
-			dev_name(parent_port->uport_dev),
-			dev_name(&parent_port->dev), parent_iw);
-		return rc;
-	}
+		ig = p->interleave_granularity * parent_distance;
 
-	iw = cxl_rr->nr_targets;
 	rc = ways_to_eiw(iw, &eiw);
-	if (rc) {
-		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
-			dev_name(port->uport_dev), dev_name(&port->dev), iw);
-		return rc;
-	}
-
-	/*
-	 * Interleave granularity is a multiple of @parent_port granularity.
-	 * Multiplier is the parent port interleave ways.
-	 */
-	rc = granularity_to_eig(parent_ig * parent_iw, &eig);
-	if (rc) {
-		dev_dbg(&cxlr->dev,
-			"%s: invalid granularity calculation (%d * %d)\n",
-			dev_name(&parent_port->dev), parent_ig, parent_iw);
-		return rc;
-	}
-
-	rc = eig_to_granularity(eig, &ig);
-	if (rc) {
-		dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
-			dev_name(port->uport_dev), dev_name(&port->dev),
-			256 << eig);
-		return rc;
-	}
+	if (!rc)
+		rc = granularity_to_eig(ig, &eig);
 
-	if (iw > 8 || iw > cxlsd->nr_targets) {
-		dev_dbg(&cxlr->dev,
-			"%s:%s:%s: ways: %d overflows targets: %d\n",
+	if (rc || (iw > 1 && ~selector & get_selector(iw, ig))) {
+		dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d:%d:%#llx\n",
 			dev_name(port->uport_dev), dev_name(&port->dev),
-			dev_name(&cxld->dev), iw, cxlsd->nr_targets);
+			iw, ig, selector);
 		return -ENXIO;
 	}
 
 	if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
 		if (cxld->interleave_ways != iw ||
-		    (iw > 1 && cxld->interleave_granularity != ig) ||
 		    !spa_maps_hpa(p, &cxld->hpa_range) ||
 		    ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
 			dev_err(&cxlr->dev,

From 2ed6a4b939e31cf0dea4c2dd535a46098c2eedfd Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 08:00:15 +0000
Subject: [PATCH 129/143] NVIDIA: VR: SAUCE: PCI: Add CXL DVSEC control, lock,
 and range register definitions

PCI: Add CXL DVSEC control, lock, and range register definitions

Add register offset and field definitions for CXL DVSEC registers needed
by CXL state save/restore across resets:

  - CTRL2 (offset 0x10) and LOCK (offset 0x14) registers
  - CONFIG_LOCK bit in the LOCK register
  - RWL (read-write-when-locked) field masks for CTRL and range base
    registers.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 include/uapi/linux/pci_regs.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 49848c6765270..68aec848510d5 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1250,14 +1250,20 @@
 #define   PCI_DVSEC_CXL_HDM_COUNT			__GENMASK(5, 4)
 #define  PCI_DVSEC_CXL_CTRL				0xC
 #define   PCI_DVSEC_CXL_MEM_ENABLE			_BITUL(2)
+#define   PCI_DVSEC_CXL_CTRL_RWL			0x5FED
+#define  PCI_DVSEC_CXL_CTRL2				0x10
+#define  PCI_DVSEC_CXL_LOCK				0x14
+#define   PCI_DVSEC_CXL_LOCK_CONFIG			_BITUL(0)
 #define  PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i)		(0x18 + (i * 0x10))
 #define  PCI_DVSEC_CXL_RANGE_SIZE_LOW(i)		(0x1C + (i * 0x10))
 #define   PCI_DVSEC_CXL_MEM_INFO_VALID			_BITUL(0)
 #define   PCI_DVSEC_CXL_MEM_ACTIVE			_BITUL(1)
 #define   PCI_DVSEC_CXL_MEM_SIZE_LOW			__GENMASK(31, 28)
 #define  PCI_DVSEC_CXL_RANGE_BASE_HIGH(i)		(0x20 + (i * 0x10))
+#define   PCI_DVSEC_CXL_RANGE_BASE_HI_RWL		0xFFFFFFFF
 #define  PCI_DVSEC_CXL_RANGE_BASE_LOW(i)		(0x24 + (i * 0x10))
 #define   PCI_DVSEC_CXL_MEM_BASE_LOW			__GENMASK(31, 28)
+#define   PCI_DVSEC_CXL_RANGE_BASE_LO_RWL		0xF0000000
 
 #define CXL_DVSEC_RANGE_MAX				2
 

From 004d5d97613cad6e06d5a1facb1058114cc93b09 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 08:00:16 +0000
Subject: [PATCH 130/143] NVIDIA: VR: SAUCE: cxl: Move HDM decoder and register
 map definitions to include/cxl/cxl.h

Move CXL HDM decoder register defines, register map structs
(cxl_reg_map, cxl_component_reg_map, cxl_device_reg_map,
cxl_pmu_reg_map, cxl_register_map), cxl_hdm_decoder_count(),
enum cxl_regloc_type, and cxl_find_regblock()/cxl_setup_regs()
declarations from internal CXL headers to include/cxl/pci.h.

This makes them accessible to code outside the CXL subsystem, in
particular the PCI core CXL state save/restore support added in a
subsequent patch.

No functional change.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/)
[jan: Resolve conflicts by moving certain definitions to include/cxl/cxl.h instead of to include/cxl/pci.h to align with its dependency of Alejandro's series]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/cxl.h | 57 -----------------------------------------------
 include/cxl/cxl.h | 54 ++++++++++++++++++++++++++++++++++++++++++++
 include/cxl/pci.h |  4 ++++
 3 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 2b1f7d687a0e8..f84910ba7fa2b 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -24,59 +24,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops;
  * (port-driver, region-driver, nvdimm object-drivers... etc).
  */
 
-/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */
-#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K
-
-/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/
-#define CXL_CM_OFFSET 0x1000
-#define CXL_CM_CAP_HDR_OFFSET 0x0
-#define   CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0)
-#define     CM_CAP_HDR_CAP_ID 1
-#define   CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16)
-#define     CM_CAP_HDR_CAP_VERSION 1
-#define   CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20)
-#define     CM_CAP_HDR_CACHE_MEM_VERSION 1
-#define   CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
-#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
-
-/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */
-#define CXL_HDM_DECODER_CAP_OFFSET 0x0
-#define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
-#define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
-#define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
-#define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
-#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
-#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
-#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
-#define   CXL_HDM_DECODER_ENABLE BIT(1)
-#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
-#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14)
-#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18)
-#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c)
-#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
-#define   CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
-#define   CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
-#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
-#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
-#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
-#define   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
-#define   CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12)
-#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
-#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
-#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
-#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
-
-/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */
-#define CXL_DECODER_MIN_GRANULARITY 256
-#define CXL_DECODER_MAX_ENCODED_IG 6
-
-static inline int cxl_hdm_decoder_count(u32 cap_hdr)
-{
-	int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr);
-
-	return val ? val * 2 : 1;
-}
-
 /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */
 static inline int eig_to_granularity(u16 eig, unsigned int *granularity)
 {
@@ -207,13 +154,9 @@ int cxl_map_device_regs(const struct cxl_register_map *map,
 int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs);
 
 #define CXL_INSTANCES_COUNT -1
-enum cxl_regloc_type;
 int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type);
 int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type,
 			       struct cxl_register_map *map, unsigned int index);
-int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
-		      struct cxl_register_map *map);
-int cxl_setup_regs(struct cxl_register_map *map);
 struct cxl_dport;
 int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport);
 
diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h
index 50acbd13bcf85..7d0b09ff57681 100644
--- a/include/cxl/cxl.h
+++ b/include/cxl/cxl.h
@@ -5,6 +5,7 @@
 #ifndef __CXL_CXL_H__
 #define __CXL_CXL_H__
 
+#include <linux/bitfield.h>
 #include <linux/node.h>
 #include <linux/ioport.h>
 #include <linux/range.h>
@@ -75,6 +76,59 @@ struct cxl_regs {
 #define   CXL_CM_CAP_CAP_ID_HDM 0x5
 #define   CXL_CM_CAP_CAP_HDM_VERSION 1
 
+/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */
+#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K
+
+/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers */
+#define CXL_CM_OFFSET 0x1000
+#define CXL_CM_CAP_HDR_OFFSET 0x0
+#define   CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0)
+#define     CM_CAP_HDR_CAP_ID 1
+#define   CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16)
+#define     CM_CAP_HDR_CAP_VERSION 1
+#define   CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20)
+#define     CM_CAP_HDR_CACHE_MEM_VERSION 1
+#define   CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24)
+#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20)
+
+/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */
+#define CXL_HDM_DECODER_CAP_OFFSET 0x0
+#define   CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0)
+#define   CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4)
+#define   CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8)
+#define   CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9)
+#define   CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11)
+#define   CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12)
+#define CXL_HDM_DECODER_CTRL_OFFSET 0x4
+#define   CXL_HDM_DECODER_ENABLE BIT(1)
+#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10)
+#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14)
+#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18)
+#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c)
+#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20)
+#define   CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0)
+#define   CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4)
+#define   CXL_HDM_DECODER0_CTRL_LOCK BIT(8)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT BIT(9)
+#define   CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10)
+#define   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11)
+#define   CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12)
+#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24)
+#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28)
+#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i)
+#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i)
+
+/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */
+#define CXL_DECODER_MIN_GRANULARITY 256
+#define CXL_DECODER_MAX_ENCODED_IG 6
+
+static inline int cxl_hdm_decoder_count(u32 cap_hdr)
+{
+	int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr);
+
+	return val ? val * 2 : 1;
+}
+
 struct cxl_reg_map {
 	bool valid;
 	int id;
diff --git a/include/cxl/pci.h b/include/cxl/pci.h
index a172439f08c60..edbf980c283f1 100644
--- a/include/cxl/pci.h
+++ b/include/cxl/pci.h
@@ -14,8 +14,12 @@ enum cxl_regloc_type {
 	CXL_REGLOC_RBI_TYPES
 };
 
+struct pci_dev;
 struct cxl_register_map;
 
 int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
 		       struct cxl_register_map *map);
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+		      struct cxl_register_map *map);
+int cxl_setup_regs(struct cxl_register_map *map);
 #endif

From a0c071f697b1f08b5d707080c8662f75717c5d29 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 08:00:17 +0000
Subject: [PATCH 131/143] NVIDIA: VR: SAUCE: PCI: Add virtual extended cap save
 buffer for CXL state

Add pci_add_virtual_ext_cap_save_buffer() to allocate save buffers
using virtual cap IDs (above PCI_EXT_CAP_ID_MAX) that don't require
a real capability in config space.

The existing pci_add_ext_cap_save_buffer() cannot be used for
CXL DVSEC state because it calls pci_find_saved_ext_cap()
which searches for a matching capability in PCI config space.
The CXL state saved here is a synthetic snapshot (DVSEC+HDM)
and should not be tied to a real extended-cap instance. A
virtual extended-cap save buffer API (cap IDs above
PCI_EXT_CAP_ID_MAX) allows PCI to track this state without
a backing config space capability.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pci.c | 20 ++++++++++++++++++++
 drivers/pci/pci.h | 18 ++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 372de7961d2a6..81733831e248a 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3528,6 +3528,26 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size)
 	return _pci_add_cap_save_buffer(dev, cap, true, size);
 }
 
+int pci_add_virtual_ext_cap_save_buffer(struct pci_dev *dev, u16 cap,
+					unsigned int size)
+{
+	struct pci_cap_saved_state *save_state;
+
+	if (cap <= PCI_EXT_CAP_ID_MAX)
+		return -EINVAL;
+
+	save_state = kzalloc(sizeof(*save_state) + size, GFP_KERNEL);
+	if (!save_state)
+		return -ENOMEM;
+
+	save_state->cap.cap_nr = cap;
+	save_state->cap.cap_extended = true;
+	save_state->cap.size = size;
+	pci_add_saved_cap(dev, save_state);
+
+	return 0;
+}
+
 /**
  * pci_allocate_cap_save_buffers - allocate buffers for saving capabilities
  * @dev: the PCI device
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d4ae4eef89975..6167e0e204ade 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -119,15 +119,33 @@ struct pci_cap_saved_state {
 	struct pci_cap_saved_data	cap;
 };
 
+/*
+ * Virtual extended cap ID for CXL DVSEC state in the cap save chain.
+ */
+#define PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL	0xFFFF
+static_assert(PCI_EXT_CAP_ID_MAX < PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL);
+
 void pci_allocate_cap_save_buffers(struct pci_dev *dev);
 void pci_free_cap_save_buffers(struct pci_dev *dev);
 int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size);
 int pci_add_ext_cap_save_buffer(struct pci_dev *dev,
 				u16 cap, unsigned int size);
+int pci_add_virtual_ext_cap_save_buffer(struct pci_dev *dev, u16 cap,
+					unsigned int size);
 struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
 struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev,
 						   u16 cap);
 
+#ifdef CONFIG_PCI_CXL
+void pci_allocate_cxl_save_buffer(struct pci_dev *dev);
+void pci_save_cxl_state(struct pci_dev *dev);
+void pci_restore_cxl_state(struct pci_dev *dev);
+#else
+static inline void pci_allocate_cxl_save_buffer(struct pci_dev *dev) { }
+static inline void pci_save_cxl_state(struct pci_dev *dev) { }
+static inline void pci_restore_cxl_state(struct pci_dev *dev) { }
+#endif
+
 #define PCI_PM_D2_DELAY         200	/* usec; see PCIe r4.0, sec 5.9.1 */
 #define PCI_PM_D3HOT_WAIT       10	/* msec */
 #define PCI_PM_D3COLD_WAIT      100	/* msec */

From 0ce70d718d954531e8197201594da6787ab709fa Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 08:00:18 +0000
Subject: [PATCH 132/143] NVIDIA: VR: SAUCE: PCI: Add cxl DVSEC state
 save/restore across resets

Save and restore CXL DVSEC control registers (CTRL, CTRL2), range
base registers, and lock state across PCI resets.

When the DVSEC CONFIG_LOCK bit is set, certain DVSEC fields
become read-only and hardware may have updated them. Blindly
restoring saved values would be silently ignored or conflict
with hardware state. Instead, a read-merge-write approach is
used: current hardware values are read for the RWL
(read-write-when-locked) fields and merged with saved state,
so only writable bits are restored while locked bits retain
their hardware values.

Hooked into pci_save_state()/pci_restore_state() so all PCI reset
paths automatically preserve CXL DVSEC configuration.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/)
[jan: Resolve minor conflict in drivers/pci/Makefile due to code line shifts ]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/Kconfig  |   4 +
 drivers/pci/Makefile |   1 +
 drivers/pci/cxl.c    | 177 +++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.c    |   3 +
 4 files changed, 185 insertions(+)
 create mode 100644 drivers/pci/cxl.c

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 9a249c65aedcd..d094f9532b74f 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -119,6 +119,10 @@ config XEN_PCIDEV_FRONTEND
 	  The PCI device frontend driver allows the kernel to import arbitrary
 	  PCI devices from a PCI backend to support PCI driver domains.
 
+config PCI_CXL
+	bool
+	default y if CXL_BUS
+
 config PCI_ATS
 	bool
 
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 67647f1880fb8..8d39e070c6ec0 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_PCI_DOE)		+= doe.o
 obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o
 obj-$(CONFIG_PCI_NPEM)		+= npem.o
 obj-$(CONFIG_PCIE_TPH)		+= tph.o
+obj-$(CONFIG_PCI_CXL)		+= cxl.o
 
 # Endpoint library must be initialized before its users
 obj-$(CONFIG_PCI_ENDPOINT)	+= endpoint/
diff --git a/drivers/pci/cxl.c b/drivers/pci/cxl.c
new file mode 100644
index 0000000000000..abcf70de91715
--- /dev/null
+++ b/drivers/pci/cxl.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CXL PCI state save/restore support.
+ *
+ * Saves and restores CXL DVSEC registers across PCI resets and link
+ * disable/enable transitions. Hooked into pci_save_state() /
+ * pci_restore_state() via the PCI capability save chain.
+ */
+#include <linux/pci.h>
+#include <cxl/pci.h>
+#include "pci.h"
+
+struct cxl_pci_state {
+	u16 dvsec;
+	u16 dvsec_ctrl;
+	u16 dvsec_ctrl2;
+	u32 range_base_hi[CXL_DVSEC_RANGE_MAX];
+	u32 range_base_lo[CXL_DVSEC_RANGE_MAX];
+	u16 dvsec_lock;
+	bool dvsec_valid;
+};
+
+static void cxl_save_dvsec(struct pci_dev *pdev, struct cxl_pci_state *state)
+{
+	int rc_ctrl, rc_ctrl2;
+	u16 dvsec;
+	int i;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return;
+
+	state->dvsec = dvsec;
+	rc_ctrl = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL,
+				       &state->dvsec_ctrl);
+	rc_ctrl2 = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					&state->dvsec_ctrl2);
+	if (rc_ctrl || rc_ctrl2) {
+		pci_warn(pdev,
+			 "CXL: DVSEC read failed (ctrl rc=%d, ctrl2 rc=%d)\n",
+			 rc_ctrl, rc_ctrl2);
+		return;
+	}
+
+	for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) {
+		pci_read_config_dword(pdev,
+			dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i),
+			&state->range_base_hi[i]);
+		pci_read_config_dword(pdev,
+			dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i),
+			&state->range_base_lo[i]);
+	}
+
+	pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_LOCK,
+			     &state->dvsec_lock);
+
+	state->dvsec_valid = true;
+}
+
+static u32 cxl_merge_rwl(u32 saved, u32 current_hw, u32 rwl_mask)
+{
+	return (current_hw & rwl_mask) | (saved & ~rwl_mask);
+}
+
+static void cxl_restore_dvsec(struct pci_dev *pdev,
+			      const struct cxl_pci_state *state)
+{
+	u16 lock_reg = 0;
+	int i;
+
+	if (!state->dvsec_valid)
+		return;
+
+	pci_read_config_word(pdev, state->dvsec + PCI_DVSEC_CXL_LOCK,
+			     &lock_reg);
+
+	if (lock_reg & PCI_DVSEC_CXL_LOCK_CONFIG) {
+		u16 hw_ctrl;
+		u32 hw_range_hi, hw_range_lo;
+
+		pci_read_config_word(pdev,
+				     state->dvsec + PCI_DVSEC_CXL_CTRL,
+				     &hw_ctrl);
+		pci_write_config_word(pdev,
+			state->dvsec + PCI_DVSEC_CXL_CTRL,
+			cxl_merge_rwl(state->dvsec_ctrl, hw_ctrl,
+				      PCI_DVSEC_CXL_CTRL_RWL));
+
+		pci_write_config_word(pdev,
+			state->dvsec + PCI_DVSEC_CXL_CTRL2,
+			state->dvsec_ctrl2);
+
+		for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) {
+			pci_read_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i),
+				&hw_range_hi);
+			pci_write_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i),
+				cxl_merge_rwl(state->range_base_hi[i],
+					      hw_range_hi,
+					      PCI_DVSEC_CXL_RANGE_BASE_HI_RWL));
+
+			pci_read_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i),
+				&hw_range_lo);
+			pci_write_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i),
+				cxl_merge_rwl(state->range_base_lo[i],
+					      hw_range_lo,
+					      PCI_DVSEC_CXL_RANGE_BASE_LO_RWL));
+		}
+	} else {
+		pci_write_config_word(pdev,
+				      state->dvsec + PCI_DVSEC_CXL_CTRL,
+				      state->dvsec_ctrl);
+		pci_write_config_word(pdev,
+				      state->dvsec + PCI_DVSEC_CXL_CTRL2,
+				      state->dvsec_ctrl2);
+		for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) {
+			pci_write_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i),
+				state->range_base_hi[i]);
+			pci_write_config_dword(pdev,
+				state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i),
+				state->range_base_lo[i]);
+		}
+
+		pci_write_config_word(pdev,
+			state->dvsec + PCI_DVSEC_CXL_LOCK,
+			state->dvsec_lock);
+	}
+}
+
+void pci_allocate_cxl_save_buffer(struct pci_dev *dev)
+{
+	if (!pcie_is_cxl(dev))
+		return;
+
+	if (pci_add_virtual_ext_cap_save_buffer(dev,
+			PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL,
+			sizeof(struct cxl_pci_state)))
+		pci_err(dev, "unable to allocate CXL save buffer\n");
+}
+
+void pci_save_cxl_state(struct pci_dev *pdev)
+{
+	struct pci_cap_saved_state *save_state;
+	struct cxl_pci_state *state;
+
+	save_state = pci_find_saved_ext_cap(pdev,
+					    PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL);
+	if (!save_state)
+		return;
+
+	state = (struct cxl_pci_state *)save_state->cap.data;
+	state->dvsec_valid = false;
+
+	cxl_save_dvsec(pdev, state);
+}
+
+void pci_restore_cxl_state(struct pci_dev *pdev)
+{
+	struct pci_cap_saved_state *save_state;
+	struct cxl_pci_state *state;
+
+	save_state = pci_find_saved_ext_cap(pdev,
+					    PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL);
+	if (!save_state)
+		return;
+
+	state = (struct cxl_pci_state *)save_state->cap.data;
+	if (!state->dvsec_valid)
+		return;
+
+	cxl_restore_dvsec(pdev, state);
+}
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 81733831e248a..193bbb6347566 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1817,6 +1817,7 @@ int pci_save_state(struct pci_dev *dev)
 	pci_save_aer_state(dev);
 	pci_save_ptm_state(dev);
 	pci_save_tph_state(dev);
+	pci_save_cxl_state(dev);
 	return pci_save_vc_state(dev);
 }
 EXPORT_SYMBOL(pci_save_state);
@@ -1928,6 +1929,7 @@ void pci_restore_state(struct pci_dev *dev)
 	pci_restore_aer_state(dev);
 
 	pci_restore_config_space(dev);
+	pci_restore_cxl_state(dev);
 
 	pci_restore_pcix_state(dev);
 	pci_restore_msi_state(dev);
@@ -3571,6 +3573,7 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev)
 		pci_err(dev, "unable to allocate suspend buffer for LTR\n");
 
 	pci_allocate_vc_save_buffers(dev);
+	pci_allocate_cxl_save_buffer(dev);
 }
 
 void pci_free_cap_save_buffers(struct pci_dev *dev)

From 2eb6dee0c0e67a32c0c9ac7b60e766712e6e5914 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 08:00:19 +0000
Subject: [PATCH 133/143] NVIDIA: VR: SAUCE: PCI: Add HDM decoder state
 save/restore

Save and restore CXL HDM decoder registers (global control,
per-decoder base/size/target-list, and commit state) across PCI
resets. On restore, decoders that were committed are reprogrammed
and recommitted with a 10ms timeout. Locked decoders that are
already committed are skipped, since their state is protected by
hardware and reprogramming them would fail.

The Register Locator DVSEC is parsed directly via PCI config space
reads rather than calling cxl_find_regblock()/cxl_setup_regs(),
since this code lives in the PCI core and must not depend on CXL
module symbols.

MSE is temporarily enabled during save/restore to allow MMIO
access to the HDM decoder register block.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306080026.116789-1-smadhavan@nvidia.com/)
[jan: Include <cxl/cxl.h> in drivers/pci/cxl.c due to conflict resolution in "4acbc27592b8 NVIDIA: VR: SAUCE: cxl: Move HDM decoder and register map definitions to include/cxl/cxl.h"]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/cxl.c | 298 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 295 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/cxl.c b/drivers/pci/cxl.c
index abcf70de91715..900d0316f38d0 100644
--- a/drivers/pci/cxl.c
+++ b/drivers/pci/cxl.c
@@ -2,15 +2,32 @@
 /*
  * CXL PCI state save/restore support.
  *
- * Saves and restores CXL DVSEC registers across PCI resets and link
- * disable/enable transitions. Hooked into pci_save_state() /
+ * Saves and restores CXL DVSEC and HDM decoder registers across PCI resets
+ * and link disable/enable transitions. Hooked into pci_save_state() /
  * pci_restore_state() via the PCI capability save chain.
  */
 #include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/cleanup.h>
+#include <cxl/cxl.h>
 #include <cxl/pci.h>
 #include "pci.h"
 
+#define CXL_HDM_MAX_DECODERS 32
+
+struct cxl_hdm_decoder_snapshot {
+	u32 base_lo;
+	u32 base_hi;
+	u32 size_lo;
+	u32 size_hi;
+	u32 ctrl;
+	u32 tl_lo;
+	u32 tl_hi;
+};
+
 struct cxl_pci_state {
+	/* DVSEC saved state */
 	u16 dvsec;
 	u16 dvsec_ctrl;
 	u16 dvsec_ctrl2;
@@ -18,6 +35,15 @@ struct cxl_pci_state {
 	u32 range_base_lo[CXL_DVSEC_RANGE_MAX];
 	u16 dvsec_lock;
 	bool dvsec_valid;
+
+	/* HDM decoder saved state */
+	int hdm_bar;
+	unsigned long hdm_bar_offset;
+	unsigned long hdm_map_size;
+	u32 hdm_global_ctrl;
+	int hdm_count;
+	struct cxl_hdm_decoder_snapshot decoders[CXL_HDM_MAX_DECODERS];
+	bool hdm_valid;
 };
 
 static void cxl_save_dvsec(struct pci_dev *pdev, struct cxl_pci_state *state)
@@ -132,6 +158,269 @@ static void cxl_restore_dvsec(struct pci_dev *pdev,
 	}
 }
 
+struct pci_cmd_saved {
+	struct pci_dev *pdev;
+	u16 cmd;
+};
+
+DEFINE_FREE(restore_pci_cmd, struct pci_cmd_saved,
+	    if (!(_T.cmd & PCI_COMMAND_MEMORY))
+		    pci_write_config_word(_T.pdev, PCI_COMMAND, _T.cmd))
+
+/**
+ * cxl_find_component_regblock - Find the Component Register Block via
+ *                               the Register Locator DVSEC
+ * @pdev: PCI device to scan
+ * @bir: output BAR index
+ * @offset: output offset within the BAR
+ *
+ * Parses the Register Locator DVSEC (ID 8) directly via PCI config space
+ * reads.  No dependency on CXL module symbols.
+ *
+ * Return: 0 on success, -ENODEV if not found.
+ */
+static int cxl_find_component_regblock(struct pci_dev *pdev,
+				       int *bir, u64 *offset)
+{
+	u32 regloc_size, regblocks;
+	u16 regloc;
+	int i;
+
+	regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					   PCI_DVSEC_CXL_REG_LOCATOR);
+	if (!regloc)
+		return -ENODEV;
+
+	pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
+	regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size);
+	regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8;
+
+	for (i = 0; i < regblocks; i++) {
+		u32 reg_lo, reg_hi;
+		unsigned int off;
+
+		off = regloc + PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 + i * 8;
+		pci_read_config_dword(pdev, off, &reg_lo);
+		pci_read_config_dword(pdev, off + 4, &reg_hi);
+
+		if (FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo) !=
+		    CXL_REGLOC_RBI_COMPONENT)
+			continue;
+
+		*bir = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo);
+		*offset = ((u64)reg_hi << 32) |
+			  (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW);
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+/*
+ * Discover and map HDM decoder registers.
+ * Caller must pci_iounmap() the returned pointer.
+ */
+static void __iomem *cxl_hdm_map(struct pci_dev *pdev, int *bar_out,
+				  unsigned long *offset_out,
+				  unsigned long *size_out)
+{
+	int bir;
+	u64 reg_offset;
+	void __iomem *comp_base, *cm_base;
+	u32 cap_hdr;
+	int cap, cap_count;
+	unsigned long hdm_offset = 0, hdm_size = 0;
+	void __iomem *hdm;
+
+	if (cxl_find_component_regblock(pdev, &bir, &reg_offset))
+		return NULL;
+
+	comp_base = pci_iomap_range(pdev, bir, reg_offset,
+				    CXL_CM_OFFSET + SZ_4K);
+	if (!comp_base)
+		return NULL;
+
+	cm_base = comp_base + CXL_CM_OFFSET;
+	cap_hdr = readl(cm_base);
+
+	if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_hdr) != CM_CAP_HDR_CAP_ID) {
+		pci_iounmap(pdev, comp_base);
+		return NULL;
+	}
+
+	cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_hdr);
+
+	for (cap = 1; cap <= cap_count; cap++) {
+		u32 hdr = readl(cm_base + cap * 4);
+		u16 cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
+		u32 cap_off = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
+
+		if (cap_id != CXL_CM_CAP_CAP_ID_HDM)
+			continue;
+
+		hdr = readl(cm_base + cap_off);
+		hdm_offset = CXL_CM_OFFSET + cap_off;
+		hdm_size = 0x20 * cxl_hdm_decoder_count(hdr) + 0x10;
+		break;
+	}
+
+	pci_iounmap(pdev, comp_base);
+
+	if (!hdm_size)
+		return NULL;
+
+	hdm = pci_iomap_range(pdev, bir, reg_offset + hdm_offset, hdm_size);
+	if (!hdm)
+		return NULL;
+
+	*bar_out = bir;
+	*offset_out = reg_offset + hdm_offset;
+	*size_out = hdm_size;
+	return hdm;
+}
+
+static void cxl_save_hdm(struct pci_dev *pdev, void __iomem *hdm,
+			  struct cxl_pci_state *state, int count)
+{
+	int i;
+
+	state->hdm_count = min_t(int, count, CXL_HDM_MAX_DECODERS);
+	state->hdm_global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+
+	for (i = 0; i < state->hdm_count; i++) {
+		struct cxl_hdm_decoder_snapshot *d = &state->decoders[i];
+
+		d->base_lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i));
+		d->base_hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i));
+		d->size_lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i));
+		d->size_hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i));
+		d->ctrl    = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+		d->tl_lo   = readl(hdm + CXL_HDM_DECODER0_TL_LOW(i));
+		d->tl_hi   = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(i));
+	}
+}
+
+static void cxl_restore_hdm(struct pci_dev *pdev, void __iomem *hdm,
+			     const struct cxl_pci_state *state)
+{
+	int i;
+
+	writel(state->hdm_global_ctrl, hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+
+	for (i = 0; i < state->hdm_count; i++) {
+		const struct cxl_hdm_decoder_snapshot *d = &state->decoders[i];
+		unsigned long timeout;
+		u32 ctrl;
+
+		if (!(d->ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
+			continue;
+
+		ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+		if ((ctrl & CXL_HDM_DECODER0_CTRL_LOCK) &&
+		    (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED))
+			continue;
+
+		if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) {
+			ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
+			writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+		}
+
+		writel(d->base_lo, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i));
+		writel(d->base_hi, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i));
+		writel(d->size_lo, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i));
+		writel(d->size_hi, hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i));
+		writel(d->tl_lo, hdm + CXL_HDM_DECODER0_TL_LOW(i));
+		writel(d->tl_hi, hdm + CXL_HDM_DECODER0_TL_HIGH(i));
+
+		wmb();
+
+		ctrl = d->ctrl & ~(CXL_HDM_DECODER0_CTRL_COMMITTED |
+				   CXL_HDM_DECODER0_CTRL_COMMIT_ERROR);
+		ctrl |= CXL_HDM_DECODER0_CTRL_COMMIT;
+		writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+
+		timeout = jiffies + msecs_to_jiffies(10);
+		for (;;) {
+			ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+			if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
+				break;
+			if (ctrl & CXL_HDM_DECODER0_CTRL_COMMIT_ERROR) {
+				pci_warn(pdev,
+					 "HDM decoder %d commit error on restore\n",
+					 i);
+				break;
+			}
+			if (time_after(jiffies, timeout)) {
+				pci_warn(pdev,
+					 "HDM decoder %d commit timeout on restore\n",
+					 i);
+				break;
+			}
+			cpu_relax();
+		}
+	}
+}
+
+static void cxl_save_hdm_decoders(struct pci_dev *pdev,
+				   struct cxl_pci_state *state)
+{
+	int hdm_bar;
+	unsigned long hdm_bar_offset, hdm_map_size;
+	void __iomem *hdm;
+	u16 cmd;
+	u32 cap;
+	struct pci_cmd_saved saved __free(restore_pci_cmd) = {
+		.pdev = pdev, .cmd = PCI_COMMAND_MEMORY,
+	};
+
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	saved.cmd = cmd;
+	if (!(cmd & PCI_COMMAND_MEMORY))
+		pci_write_config_word(pdev, PCI_COMMAND,
+				      cmd | PCI_COMMAND_MEMORY);
+
+	hdm = cxl_hdm_map(pdev, &hdm_bar, &hdm_bar_offset, &hdm_map_size);
+	if (!hdm)
+		return;
+
+	cap = readl(hdm + CXL_HDM_DECODER_CAP_OFFSET);
+	cxl_save_hdm(pdev, hdm, state, cxl_hdm_decoder_count(cap));
+	state->hdm_bar = hdm_bar;
+	state->hdm_bar_offset = hdm_bar_offset;
+	state->hdm_map_size = hdm_map_size;
+	state->hdm_valid = true;
+	pci_iounmap(pdev, hdm);
+}
+
+static void cxl_restore_hdm_decoders(struct pci_dev *pdev,
+				      const struct cxl_pci_state *state)
+{
+	void __iomem *hdm;
+	u16 cmd;
+	struct pci_cmd_saved saved __free(restore_pci_cmd) = {
+		.pdev = pdev, .cmd = PCI_COMMAND_MEMORY,
+	};
+
+	if (!state->hdm_valid)
+		return;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	saved.cmd = cmd;
+	if (!(cmd & PCI_COMMAND_MEMORY))
+		pci_write_config_word(pdev, PCI_COMMAND,
+				      cmd | PCI_COMMAND_MEMORY);
+
+	hdm = pci_iomap_range(pdev, state->hdm_bar, state->hdm_bar_offset,
+			      state->hdm_map_size);
+	if (!hdm) {
+		pci_warn(pdev, "CXL: failed to map HDM for restore\n");
+		return;
+	}
+
+	cxl_restore_hdm(pdev, hdm, state);
+	pci_iounmap(pdev, hdm);
+}
+
 void pci_allocate_cxl_save_buffer(struct pci_dev *dev)
 {
 	if (!pcie_is_cxl(dev))
@@ -155,8 +444,10 @@ void pci_save_cxl_state(struct pci_dev *pdev)
 
 	state = (struct cxl_pci_state *)save_state->cap.data;
 	state->dvsec_valid = false;
+	state->hdm_valid = false;
 
 	cxl_save_dvsec(pdev, state);
+	cxl_save_hdm_decoders(pdev, state);
 }
 
 void pci_restore_cxl_state(struct pci_dev *pdev)
@@ -170,8 +461,9 @@ void pci_restore_cxl_state(struct pci_dev *pdev)
 		return;
 
 	state = (struct cxl_pci_state *)save_state->cap.data;
-	if (!state->dvsec_valid)
+	if (!state->dvsec_valid && !state->hdm_valid)
 		return;
 
 	cxl_restore_dvsec(pdev, state);
+	cxl_restore_hdm_decoders(pdev, state);
 }

From 8c8c7156d239adfd020db7845de93fb0601e17f8 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:16 +0000
Subject: [PATCH 134/143] NVIDIA: VR: SAUCE: PCI: Add CXL DVSEC reset and
 capability register definitions

Add CXL DVSEC register definitions needed for CXL device reset per
CXL r3.2 section 8.1.3.1:
- Capability bits: RST_CAPABLE, CACHE_CAPABLE, CACHE_WBI_CAPABLE,
  RST_TIMEOUT, RST_MEM_CLR_CAPABLE
- Control2 register: DISABLE_CACHING, INIT_CACHE_WBI, INIT_CXL_RST,
  RST_MEM_CLR_EN
- Status2 register: CACHE_INV, RST_DONE, RST_ERR
- Non-CXL Function Map DVSEC register offset

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
[jan: Resolve conflicts where PCI_DVSEC_CXL_CACHE_CAPABLE is already added by "72bd823fb4f1 NVIDIA: VR: SAUCE: PCI: Allow ATS to be always on for CXL.cache capable devices"]
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 include/uapi/linux/pci_regs.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 68aec848510d5..0b16aa7864f42 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1248,10 +1248,22 @@
 #define     PCI_DVSEC_CXL_CACHE_CAPABLE			_BITUL(0)
 #define   PCI_DVSEC_CXL_MEM_CAPABLE			_BITUL(2)
 #define   PCI_DVSEC_CXL_HDM_COUNT			__GENMASK(5, 4)
+#define   PCI_DVSEC_CXL_CACHE_WBI_CAPABLE		_BITUL(6)
+#define   PCI_DVSEC_CXL_RST_CAPABLE			_BITUL(7)
+#define   PCI_DVSEC_CXL_RST_TIMEOUT			__GENMASK(10, 8)
+#define   PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE		_BITUL(11)
 #define  PCI_DVSEC_CXL_CTRL				0xC
 #define   PCI_DVSEC_CXL_MEM_ENABLE			_BITUL(2)
 #define   PCI_DVSEC_CXL_CTRL_RWL			0x5FED
 #define  PCI_DVSEC_CXL_CTRL2				0x10
+#define   PCI_DVSEC_CXL_DISABLE_CACHING		_BITUL(0)
+#define   PCI_DVSEC_CXL_INIT_CACHE_WBI			_BITUL(1)
+#define   PCI_DVSEC_CXL_INIT_CXL_RST			_BITUL(2)
+#define   PCI_DVSEC_CXL_RST_MEM_CLR_EN			_BITUL(3)
+#define  PCI_DVSEC_CXL_STATUS2				0x12
+#define   PCI_DVSEC_CXL_CACHE_INV			_BITUL(0)
+#define   PCI_DVSEC_CXL_RST_DONE			_BITUL(1)
+#define   PCI_DVSEC_CXL_RST_ERR			_BITUL(2)
 #define  PCI_DVSEC_CXL_LOCK				0x14
 #define   PCI_DVSEC_CXL_LOCK_CONFIG			_BITUL(0)
 #define  PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i)		(0x18 + (i * 0x10))
@@ -1269,6 +1281,7 @@
 
 /* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */
 #define PCI_DVSEC_CXL_FUNCTION_MAP			2
+#define  PCI_DVSEC_CXL_FUNCTION_MAP_REG			0x0C
 
 /* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */
 #define PCI_DVSEC_CXL_PORT				3

From c0f8ddc053543ab2943a78ce12dbe8f14a5c4648 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:17 +0000
Subject: [PATCH 135/143] NVIDIA: VR: SAUCE: PCI: Export
 pci_dev_save_and_disable() and pci_dev_restore()

Export pci_dev_save_and_disable() and pci_dev_restore() so that
subsystems performing non-standard reset sequences (e.g. CXL)
can reuse the PCI core standard pre/post reset lifecycle:
driver reset_prepare/reset_done callbacks, PCI config space
save/restore, and device disable/re-enable.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/pci/pci.c   | 21 +++++++++++++++++++--
 include/linux/pci.h |  3 +++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 193bbb6347566..f12e6eca601c3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5184,7 +5184,15 @@ void pci_dev_unlock(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_dev_unlock);
 
-static void pci_dev_save_and_disable(struct pci_dev *dev)
+/**
+ * pci_dev_save_and_disable - Save device state and disable it
+ * @dev: PCI device to save and disable
+ *
+ * Save the PCI configuration state, invoke the driver's reset_prepare
+ * callback (if any), and disable the device by clearing the Command register.
+ * The device lock must be held by the caller.
+ */
+void pci_dev_save_and_disable(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
 			dev->driver ? dev->driver->err_handler : NULL;
@@ -5216,8 +5224,16 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 	 */
 	pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 }
+EXPORT_SYMBOL_GPL(pci_dev_save_and_disable);
 
-static void pci_dev_restore(struct pci_dev *dev)
+/**
+ * pci_dev_restore - Restore device state after reset
+ * @dev: PCI device to restore
+ *
+ * Restore the saved PCI configuration state and invoke the driver's
+ * reset_done callback (if any). The device lock must be held by the caller.
+ */
+void pci_dev_restore(struct pci_dev *dev)
 {
 	const struct pci_error_handlers *err_handler =
 			dev->driver ? dev->driver->err_handler : NULL;
@@ -5234,6 +5250,7 @@ static void pci_dev_restore(struct pci_dev *dev)
 	else if (dev->driver)
 		pci_warn(dev, "reset done");
 }
+EXPORT_SYMBOL_GPL(pci_dev_restore);
 
 /* dev->reset_methods[] is a 0-terminated list of indices into this array */
 const struct pci_reset_fn_method pci_reset_fn_methods[] = {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a03cdd8c96122..60edd5520f751 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1962,6 +1962,9 @@ int pci_dev_trylock(struct pci_dev *dev);
 void pci_dev_unlock(struct pci_dev *dev);
 DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
 
+void pci_dev_save_and_disable(struct pci_dev *dev);
+void pci_dev_restore(struct pci_dev *dev);
+
 /*
  * PCI domain support.  Sometimes called PCI segment (eg by ACPI),
  * a PCI domain is defined to be a set of PCI buses which share

From c405cfa3b79c9741874d0501c586926916b893be Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:18 +0000
Subject: [PATCH 136/143] NVIDIA: VR: SAUCE: cxl: Add memory offlining and
 cache flush helpers

Add infrastructure for quiescing the CXL data path before reset:

- Memory offlining: check if CXL-backed memory is online and offline
  it via offline_and_remove_memory() before reset, per CXL
  spec requirement to quiesce all CXL.mem transactions before issuing
  CXL Reset.
- CPU cache flush: invalidate cache lines before reset
  as a safety measure after memory offline.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c | 110 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index ba2d393c540af..04651a156a79d 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -4,6 +4,8 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/memory_hotplug.h>
+#include <linux/memregion.h>
 #include <linux/pci.h>
 #include <linux/pci-doe.h>
 #include <cxl/pci.h>
@@ -932,3 +934,111 @@ int cxl_port_get_possible_dports(struct cxl_port *port)
 
 	return ctx.count;
 }
+
+/*
+ * CXL Reset support - core-provided reset logic for CXL devices.
+ *
+ * These functions implement the CXL reset sequence.
+ */
+
+/*
+ * If CXL memory backed by this decoder is online as System RAM, offline
+ * and remove it per CXL spec requirements before issuing CXL Reset.
+ * Returns 0 if memory was not online or was successfully offlined.
+ */
+static int __maybe_unused cxl_offline_memory(struct device *dev, void *data)
+{
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_region *cxlr;
+	struct cxl_region_params *p;
+	int rc;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	cxlr = cxled->cxld.region;
+	if (!cxlr)
+		return 0;
+
+	p = &cxlr->params;
+	if (!p->res)
+		return 0;
+
+	if (walk_iomem_res_desc(IORES_DESC_NONE,
+				IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+				p->res->start, p->res->end, NULL, NULL) <= 0)
+		return 0;
+
+	dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res);
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	rc = offline_and_remove_memory(p->res->start, resource_size(p->res));
+	if (rc) {
+		dev_err(dev,
+			"Failed to offline CXL memory [%pr]: %d\n",
+			p->res, rc);
+		return rc;
+	}
+#else
+	dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n");
+	rc = -EOPNOTSUPP;
+	return rc;
+#endif
+
+	return 0;
+}
+
+static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd)
+{
+	struct cxl_port *endpoint;
+	struct device *dev;
+
+	if (!cxlmd || !cxlmd->cxlds)
+		return -ENODEV;
+
+	dev = cxlmd->cxlds->dev;
+	endpoint = cxlmd->endpoint;
+	if (!endpoint)
+		return 0;
+
+	return device_for_each_child(&endpoint->dev, NULL,
+				      cxl_offline_memory);
+}
+
+static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data)
+{
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_region *cxlr;
+	struct resource *res;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	cxlr = cxled->cxld.region;
+	if (!cxlr || !cxlr->params.res)
+		return 0;
+
+	res = cxlr->params.res;
+	cpu_cache_invalidate_memregion(res->start, resource_size(res));
+	return 0;
+}
+
+static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
+{
+	struct cxl_port *endpoint;
+
+	if (!cxlmd)
+		return 0;
+
+	endpoint = cxlmd->endpoint;
+	if (!endpoint || IS_ERR(endpoint))
+		return 0;
+
+	if (!cpu_cache_has_invalidate_memregion())
+		return 0;
+
+	device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
+	return 0;
+}

From 9dcf9975a46115e4af081b0d6e205bf0ba245205 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:19 +0000
Subject: [PATCH 137/143] NVIDIA: VR: SAUCE: cxl: Add multi-function sibling
 coordination for CXL reset

Add sibling PCI function save/disable/restore coordination for CXL
reset. Before reset, all CXL.cachemem sibling functions are locked,
saved, and disabled; after reset they are restored. The Non-CXL Function
Map DVSEC and per-function DVSEC capability register are consulted to
skip non-CXL and CXL.io-only functions. A global mutex serializes
concurrent resets to prevent deadlocks between sibling functions.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c | 137 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 04651a156a79d..22b4f0b0ac4fa 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -16,6 +16,9 @@
 #include "core.h"
 #include "trace.h"
 
+/* Initial sibling array capacity: covers max non-ARI functions per slot */
+#define CXL_RESET_SIBLINGS_INIT	8
+
 /**
  * DOC: cxl core pci
  *
@@ -1042,3 +1045,137 @@ static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd)
 	device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache);
 	return 0;
 }
+
+/*
+ * Serialize all CXL reset operations globally.
+ */
+static DEFINE_MUTEX(cxl_reset_mutex);
+
+struct cxl_reset_context {
+	struct pci_dev *target;
+	struct pci_dev **pci_functions;
+	int pci_func_count;
+	int pci_func_cap;
+};
+
+/*
+ * Check if a sibling function is non-CXL using the Non-CXL Function Map
+ * DVSEC. Returns true if fn is listed as non-CXL, false otherwise (including
+ * on any read failure).
+ */
+static bool cxl_is_non_cxl_function(struct pci_dev *pdev,
+				     u16 func_map_dvsec, int fn)
+{
+	int reg, bit;
+	u32 map;
+
+	if (pci_ari_enabled(pdev->bus)) {
+		reg = fn / 32;
+		bit = fn % 32;
+	} else {
+		reg = fn;
+		bit = PCI_SLOT(pdev->devfn);
+	}
+
+	if (pci_read_config_dword(pdev,
+				   func_map_dvsec + PCI_DVSEC_CXL_FUNCTION_MAP_REG + (reg * 4),
+				   &map))
+		return false;
+
+	return map & BIT(bit);
+}
+
+struct cxl_reset_walk_ctx {
+	struct cxl_reset_context *ctx;
+	u16 func_map_dvsec;
+	bool ari;
+};
+
+static int cxl_reset_collect_sibling(struct pci_dev *func, void *data)
+{
+	struct cxl_reset_walk_ctx *wctx = data;
+	struct cxl_reset_context *ctx = wctx->ctx;
+	struct pci_dev *pdev = ctx->target;
+	u16 dvsec, cap;
+	int fn;
+
+	if (func == pdev)
+		return 0;
+
+	if (!wctx->ari &&
+	    PCI_SLOT(func->devfn) != PCI_SLOT(pdev->devfn))
+		return 0;
+
+	fn = wctx->ari ? func->devfn : PCI_FUNC(func->devfn);
+	if (wctx->func_map_dvsec &&
+	    cxl_is_non_cxl_function(pdev, wctx->func_map_dvsec, fn))
+		return 0;
+
+	/* Only coordinate with siblings that have CXL.cachemem */
+	dvsec = pci_find_dvsec_capability(func, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return 0;
+	if (pci_read_config_word(func, dvsec + PCI_DVSEC_CXL_CAP, &cap))
+		return 0;
+	if (!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE |
+		     PCI_DVSEC_CXL_MEM_CAPABLE)))
+		return 0;
+
+	/* Grow sibling array; double capacity for ARI devices when running out of space */
+	if (ctx->pci_func_count >= ctx->pci_func_cap) {
+		struct pci_dev **new;
+		int new_cap = ctx->pci_func_cap ? ctx->pci_func_cap * 2
+						: CXL_RESET_SIBLINGS_INIT;
+
+		new = krealloc(ctx->pci_functions,
+			       new_cap * sizeof(*new), GFP_KERNEL);
+		if (!new)
+			return 1;
+		ctx->pci_functions = new;
+		ctx->pci_func_cap = new_cap;
+	}
+
+	pci_dev_get(func);
+	ctx->pci_functions[ctx->pci_func_count++] = func;
+	return 0;
+}
+
+static void __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+{
+	struct pci_dev *pdev = ctx->target;
+	struct cxl_reset_walk_ctx wctx;
+	int i;
+
+	ctx->pci_func_count = 0;
+	ctx->pci_functions = NULL;
+	ctx->pci_func_cap = 0;
+
+	wctx.ctx = ctx;
+	wctx.ari = pci_ari_enabled(pdev->bus);
+	wctx.func_map_dvsec = pci_find_dvsec_capability(pdev,
+			PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_FUNCTION_MAP);
+
+	/* Collect CXL.cachemem siblings under pci_bus_sem */
+	pci_walk_bus(pdev->bus, cxl_reset_collect_sibling, &wctx);
+
+	/* Lock and save/disable siblings outside pci_bus_sem */
+	for (i = 0; i < ctx->pci_func_count; i++) {
+		pci_dev_lock(ctx->pci_functions[i]);
+		pci_dev_save_and_disable(ctx->pci_functions[i]);
+	}
+}
+
+static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < ctx->pci_func_count; i++) {
+		pci_dev_restore(ctx->pci_functions[i]);
+		pci_dev_unlock(ctx->pci_functions[i]);
+		pci_dev_put(ctx->pci_functions[i]);
+	}
+	kfree(ctx->pci_functions);
+	ctx->pci_functions = NULL;
+	ctx->pci_func_count = 0;
+}

From f4413ec2454b2e3aaffcd35130da47f75726f39d Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:20 +0000
Subject: [PATCH 138/143] NVIDIA: VR: SAUCE: cxl: Add CXL DVSEC reset sequence
 and flow orchestration

cxl_dev_reset() implements the hardware reset sequence:
optionally enable memory clear, initiate reset via
CTRL2, wait for completion, and re-enable caching.

cxl_do_reset() orchestrates the full reset flow:
  1. CXL pre-reset: mem offlining and cache flush (when memdev present)
  2. PCI save/disable: pci_dev_save_and_disable() automatically saves
     CXL DVSEC and HDM decoder state via PCI core hooks
  3. Sibling coordination: save/disable CXL.cachemem sibling functions
  4. Execute CXL DVSEC reset
  5. Sibling restore: always runs to re-enable sibling functions
  6. PCI restore: pci_dev_restore() automatically restores CXL state

The CXL-specific DVSEC and HDM save/restore is handled
by the PCI core's CXL save/restore infrastructure (drivers/pci/cxl.c).

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/pci.c | 181 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 179 insertions(+), 2 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 22b4f0b0ac4fa..0955cceced110 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1141,7 +1141,7 @@ static int cxl_reset_collect_sibling(struct pci_dev *func, void *data)
 	return 0;
 }
 
-static void __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
+static void cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx)
 {
 	struct pci_dev *pdev = ctx->target;
 	struct cxl_reset_walk_ctx wctx;
@@ -1166,7 +1166,7 @@ static void __maybe_unused cxl_pci_functions_reset_prepare(struct cxl_reset_cont
 	}
 }
 
-static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
+static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx)
 {
 	int i;
 
@@ -1179,3 +1179,180 @@ static void __maybe_unused cxl_pci_functions_reset_done(struct cxl_reset_context
 	ctx->pci_functions = NULL;
 	ctx->pci_func_count = 0;
 }
+
+/*
+ * CXL device reset execution
+ */
+static int cxl_dev_reset(struct pci_dev *pdev, int dvsec)
+{
+	static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 };
+	u16 cap, ctrl2, status2;
+	u32 timeout_ms;
+	int rc, idx;
+
+	if (!pci_wait_for_pending_transaction(pdev))
+		pci_err(pdev, "timed out waiting for pending transactions\n");
+
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap);
+	if (rc)
+		return rc;
+
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
+	if (rc)
+		return rc;
+
+	/*
+	 * Disable caching and initiate cache writeback+invalidation if the
+	 * device supports it. Poll for completion.
+	 * Per CXL r3.2 section 9.6, software may use the cache size from
+	 * DVSEC CXL Capability2 to compute a suitable timeout; we use a
+	 * default of 10ms.
+	 */
+	if (cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE) {
+		u32 wbi_poll_us = 100;
+		s32 wbi_remaining_us = 10000;
+
+		ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING;
+		rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					   ctrl2);
+		if (rc)
+			return rc;
+
+		ctrl2 |= PCI_DVSEC_CXL_INIT_CACHE_WBI;
+		rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					   ctrl2);
+		if (rc)
+			return rc;
+
+		do {
+			usleep_range(wbi_poll_us, wbi_poll_us + 1);
+			wbi_remaining_us -= wbi_poll_us;
+			rc = pci_read_config_word(pdev,
+						  dvsec + PCI_DVSEC_CXL_STATUS2,
+						  &status2);
+			if (rc)
+				return rc;
+		} while (!(status2 & PCI_DVSEC_CXL_CACHE_INV) &&
+			 wbi_remaining_us > 0);
+
+		if (!(status2 & PCI_DVSEC_CXL_CACHE_INV)) {
+			pci_err(pdev, "CXL cache WB+I timed out\n");
+			return -ETIMEDOUT;
+		}
+	} else if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE) {
+		ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING;
+		rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					   ctrl2);
+		if (rc)
+			return rc;
+	}
+
+	if (cap & PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE) {
+		rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					  &ctrl2);
+		if (rc)
+			return rc;
+
+		ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN;
+		rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2,
+					   ctrl2);
+		if (rc)
+			return rc;
+	}
+
+	idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap);
+	if (idx >= ARRAY_SIZE(reset_timeout_ms))
+		idx = ARRAY_SIZE(reset_timeout_ms) - 1;
+	timeout_ms = reset_timeout_ms[idx];
+
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
+	if (rc)
+		return rc;
+
+	ctrl2 |= PCI_DVSEC_CXL_INIT_CXL_RST;
+	rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
+	if (rc)
+		return rc;
+
+	msleep(timeout_ms);
+
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2,
+				  &status2);
+	if (rc)
+		return rc;
+
+	if (status2 & PCI_DVSEC_CXL_RST_ERR) {
+		pci_err(pdev, "CXL reset error\n");
+		return -EIO;
+	}
+
+	if (!(status2 & PCI_DVSEC_CXL_RST_DONE)) {
+		pci_err(pdev, "CXL reset timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2);
+	if (rc)
+		return rc;
+
+	ctrl2 &= ~PCI_DVSEC_CXL_DISABLE_CACHING;
+	rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static int match_memdev_by_parent(struct device *dev, const void *parent)
+{
+	return is_cxl_memdev(dev) && dev->parent == parent;
+}
+
+static int cxl_do_reset(struct pci_dev *pdev)
+{
+	struct cxl_reset_context ctx = { .target = pdev };
+	struct cxl_memdev *cxlmd = NULL;
+	struct device *memdev = NULL;
+	int dvsec, rc;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return -ENODEV;
+
+	memdev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev,
+				 match_memdev_by_parent);
+	if (memdev) {
+		cxlmd = to_cxl_memdev(memdev);
+		guard(device)(&cxlmd->dev);
+	}
+
+	mutex_lock(&cxl_reset_mutex);
+	pci_dev_lock(pdev);
+
+	if (cxlmd) {
+		rc = cxl_reset_prepare_memdev(cxlmd);
+		if (rc)
+			goto out_unlock;
+
+		cxl_reset_flush_cpu_caches(cxlmd);
+	}
+
+	pci_dev_save_and_disable(pdev);
+	cxl_pci_functions_reset_prepare(&ctx);
+
+	rc = cxl_dev_reset(pdev, dvsec);
+
+	cxl_pci_functions_reset_done(&ctx);
+
+	pci_dev_restore(pdev);
+
+out_unlock:
+	pci_dev_unlock(pdev);
+	mutex_unlock(&cxl_reset_mutex);
+
+	if (memdev)
+		put_device(memdev);
+
+	return rc;
+}

From 9b3c4a19da44c2e1aeccb63f19165ad31d01e00d Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:21 +0000
Subject: [PATCH 139/143] NVIDIA: VR: SAUCE: cxl: Add cxl_reset sysfs interface
 for PCI devices

Add a "cxl_reset" sysfs attribute to PCI devices that support CXL
Reset (CXL r3.2 section 8.1.3.1). The attribute is visible only on
devices with both CXL.cache and CXL.mem capabilities and the CXL
Reset Capable bit set in the DVSEC.

Writing "1" to the attribute triggers the full CXL reset flow via
cxl_do_reset(). The interface is decoupled from memdev creation:
when a CXL memdev exists, memory offlining and cache flush are
performed; otherwise reset proceeds without the memory management.

The sysfs attribute is managed entirely by the CXL module using
sysfs_create_group() / sysfs_remove_group() rather than the PCI
core's static attribute groups. This avoids cross-module symbol
dependencies between the PCI core (always built-in) and CXL_BUS
(potentially modular).

At module init, existing PCI devices are scanned and a PCI bus
notifier handles hot-plug/unplug. kernfs_drain() makes sure that
any in-flight store() completes before sysfs_remove_group() returns,
preventing use-after-free during module unload.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 drivers/cxl/core/core.h |   2 +
 drivers/cxl/core/pci.c  | 113 ++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/port.c |   3 ++
 3 files changed, 118 insertions(+)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e3c85ceda2485..f3d6e4e6ad81b 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -130,6 +130,8 @@ extern struct cxl_rwsem cxl_rwsem;
 int cxl_memdev_init(void);
 void cxl_memdev_exit(void);
 void cxl_mbox_init(void);
+void cxl_reset_sysfs_init(void);
+void cxl_reset_sysfs_exit(void);
 
 enum cxl_poison_trace_type {
 	CXL_POISON_TRACE_LIST,
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 0955cceced110..497d99b8908d0 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -1356,3 +1356,116 @@ static int cxl_do_reset(struct pci_dev *pdev)
 
 	return rc;
 }
+
+/*
+ * CXL reset sysfs attribute management.
+ *
+ * The cxl_reset attribute is added to PCI devices that advertise CXL Reset
+ * capability. Managed entirely by the CXL module via subsys_interface on
+ * pci_bus_type, avoiding cross-module symbol dependencies between the PCI
+ * core (built-in) and CXL (potentially modular).
+ *
+ * subsys_interface handles existing devices at register time and hot-plug
+ * add/remove automatically. On unregister, remove_dev runs for all tracked
+ * devices under bus core serialization.
+ */
+
+static bool pci_cxl_reset_capable(struct pci_dev *pdev)
+{
+	int dvsec;
+	u16 cap;
+
+	dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL,
+					  PCI_DVSEC_CXL_DEVICE);
+	if (!dvsec)
+		return false;
+
+	if (pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap))
+		return false;
+
+	if (!(cap & PCI_DVSEC_CXL_CACHE_CAPABLE) ||
+	    !(cap & PCI_DVSEC_CXL_MEM_CAPABLE))
+		return false;
+
+	return !!(cap & PCI_DVSEC_CXL_RST_CAPABLE);
+}
+
+static ssize_t cxl_reset_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int rc;
+
+	if (!sysfs_streq(buf, "1"))
+		return -EINVAL;
+
+	rc = cxl_do_reset(pdev);
+	return rc ? rc : count;
+}
+static DEVICE_ATTR_WO(cxl_reset);
+
+static umode_t cxl_reset_attr_is_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+	if (!pci_cxl_reset_capable(pdev))
+		return 0;
+
+	return a->mode;
+}
+
+static struct attribute *cxl_reset_attrs[] = {
+	&dev_attr_cxl_reset.attr,
+	NULL,
+};
+
+static const struct attribute_group cxl_reset_attr_group = {
+	.attrs = cxl_reset_attrs,
+	.is_visible = cxl_reset_attr_is_visible,
+};
+
+static int cxl_reset_add_dev(struct device *dev,
+			     struct subsys_interface *sif)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pci_cxl_reset_capable(pdev))
+		return 0;
+
+	return sysfs_create_group(&dev->kobj, &cxl_reset_attr_group);
+}
+
+static void cxl_reset_remove_dev(struct device *dev,
+				 struct subsys_interface *sif)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (!pci_cxl_reset_capable(pdev))
+		return;
+
+	sysfs_remove_group(&dev->kobj, &cxl_reset_attr_group);
+}
+
+static struct subsys_interface cxl_reset_interface = {
+	.name		= "cxl_reset",
+	.subsys		= &pci_bus_type,
+	.add_dev	= cxl_reset_add_dev,
+	.remove_dev	= cxl_reset_remove_dev,
+};
+
+void cxl_reset_sysfs_init(void)
+{
+	int rc;
+
+	rc = subsys_interface_register(&cxl_reset_interface);
+	if (rc)
+		pr_warn("CXL: failed to register cxl_reset interface (%d)\n",
+			rc);
+}
+
+void cxl_reset_sysfs_exit(void)
+{
+	subsys_interface_unregister(&cxl_reset_interface);
+}
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 385588b8b30b5..929caeec5c954 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -2505,6 +2505,8 @@ static __init int cxl_core_init(void)
 	if (rc)
 		goto err_ras;
 
+	cxl_reset_sysfs_init();
+
 	return 0;
 
 err_ras:
@@ -2520,6 +2522,7 @@ static __init int cxl_core_init(void)
 
 static void cxl_core_exit(void)
 {
+	cxl_reset_sysfs_exit();
 	cxl_ras_exit();
 	cxl_region_exit();
 	bus_unregister(&cxl_bus_type);

From c3dd0abf2681e4975afc3c2b6fcee76b8f7230a9 Mon Sep 17 00:00:00 2001
From: Srirangan Madhavan <smadhavan@nvidia.com>
Date: Fri, 6 Mar 2026 09:23:22 +0000
Subject: [PATCH 140/143] NVIDIA: VR: SAUCE: Documentation: ABI: Add CXL PCI
 cxl_reset sysfs attribute

Document the cxl_reset sysfs attribute added to PCI devices that
support CXL Reset.

Signed-off-by: Srirangan Madhavan <smadhavan@nvidia.com>
(backported from https://lore.kernel.org/linux-cxl/20260306092322.148765-1-smadhavan@nvidia.com/)
Signed-off-by: Jiandi An <jan@nvidia.com>
---
 Documentation/ABI/testing/sysfs-bus-pci | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 69f952fffec72..c11da40900f17 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -174,6 +174,28 @@ Description:
 		similiar to writing 1 to their individual "reset" file, so use
 		with caution.
 
+What:		/sys/bus/pci/devices/.../cxl_reset
+Date:		February 2026
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		This attribute is only visible when the device advertises
+		CXL Reset Capable in the CXL DVSEC Capability register
+		(CXL r3.2, section 8.1.3).
+
+		Writing 1 to this file triggers a CXL device reset which
+		affects CXL.cache and CXL.mem state on all CXL functions
+		(i.e. those not listed in the Non-CXL Function Map DVSEC,
+		section 8.1.4), not just CXL.io/PCIe state.  This is
+		separate from the standard PCI reset interface because CXL
+		Reset has different scope.
+
+		The reset will fail with -EBUSY if any CXL regions using this
+		device have drivers bound.  Active regions are torn down as
+		part of the reset sequence.
+
+		This attribute is registered by the CXL core when a CXL device
+		is discovered, independent of which driver binds the PCI device.
+
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Contact:	Ben Hutchings <bwh@kernel.org>

From 693f075bb9ea4916daafbf233e3f1fb83071bfc5 Mon Sep 17 00:00:00 2001
From: Jiandi An <jan@nvidia.com>
Date: Wed, 11 Mar 2026 17:09:13 -0500
Subject: [PATCH 141/143] NVIDIA: VR: SAUCE: [Config] CXL config annotations
 for Type-2 device and RAS support

Add Ubuntu kernel config annotations for CXL-related configs introduced
or changed by the following cherry-picked patch series:
  - drivers/cxl changes between v6.17.9 and upstream 7.0 (which includes
    a portion of Terry Bowman's v14 CXL RAS series merged via
    for-7.0/cxl-aer-prep)
  - Alejandro Lucero's v23 CXL Type-2 device support series
  - Smita Koralahalli's v6 patch 3/9 (cxl/region: Skip decoder reset on
    detach for autodiscovered regions)

CONFIG_CXL_BUS:           Enable CXL bus support built-in; required for
                          CXL Type-2 device and RAS support
CONFIG_CXL_PCI:           Enable CXL PCI management built-in; auto-selects
                          CXL_MEM; required for CXL Type-2 device support
CONFIG_CXL_MEM:           Auto-selected by CXL_PCI; required for CXL
                          memory expansion and Type-2 device support
CONFIG_CXL_PORT:          Required for CXL port enumeration; defaults to
                          CXL_BUS value
CONFIG_FWCTL:             Selected by CXL_BUS when CXL_FEATURES is enabled;
                          required for CXL feature mailbox access
CONFIG_CXL_RAS:           New def_bool replacing PCIEAER_CXL (Terry Bowman
                          v14); auto-enabled with ACPI_APEI_GHES+PCIEAER+
                          CXL_BUS for CXL RAS error handling
CONFIG_SFC_CXL:           Solarflare SFC9100-family CXL Type-2 device
                          support; not needed for NVIDIA platforms (n)
CONFIG_ACPI_APEI_EINJ:    Required prerequisite for CONFIG_ACPI_APEI_EINJ_CXL
CONFIG_ACPI_APEI_EINJ_CXL: CXL protocol error injection support via APEI EINJ

CONFIG_PCIEAER_CXL: Remove it from debian.master policy. This config
  was removed from Kconfig by upstream commit d18f1b7beadf
 (PCI/AER: Replace PCIEAER_CXL symbol with CXL_RAS) which is included
 in this port.

CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION: Override debian.master
  amd64-only policy to include arm64. Commit 4d873c5dc3ed added
  'select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION' to arch/arm64/Kconfig,
  making this y on arm64 as well.

CONFIG_GENERIC_CPU_CACHE_MAINTENANCE: New bool config defined by
  c460697d3472 in lib/Kconfig. Selected by arm64 via 4d873c5dc3ed;
  not selected by x86. Set arm64: y, amd64: -.

CONFIG_CACHEMAINT_FOR_HOTPLUG: New optional menuconfig defined by
  2ec3b54a6ff0 in drivers/cache/Kconfig. Depends on
  GENERIC_CPU_CACHE_MAINTENANCE so becomes visible on arm64. Defaults
  to n; HiSilicon HHA driver not needed for NVIDIA platforms.
  Set arm64: n, amd64: -.

Signed-off-by: Jiandi An <jan@nvidia.com>
---
 debian.master/config/annotations      |  1 -
 debian.nvidia-6.17/config/annotations | 36 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/debian.master/config/annotations b/debian.master/config/annotations
index b05e77739d76b..c27e5965589ab 100644
--- a/debian.master/config/annotations
+++ b/debian.master/config/annotations
@@ -10048,7 +10048,6 @@ CONFIG_PCENGINES_APU2                           policy<{'amd64': 'm'}>
 CONFIG_PCI                                      policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': 'y', 'riscv64': 'y', 's390x': 'y'}>
 CONFIG_PCI200SYN                                policy<{'amd64': 'm', 'arm64': 'm', 'armhf': 'm', 'ppc64el': 'm', 'riscv64': 'm'}>
 CONFIG_PCIEAER                                  policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': '-', 'riscv64': 'y', 's390x': 'y'}>
-CONFIG_PCIEAER_CXL                              policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'riscv64': 'y'}>
 CONFIG_PCIEAER_INJECT                           policy<{'amd64': 'n', 'arm64': 'n', 'armhf': 'n', 'ppc64el': '-', 'riscv64': 'n', 's390x': 'n'}>
 CONFIG_PCIEASPM                                 policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': 'y', 'riscv64': 'y', 's390x': 'y'}>
 CONFIG_PCIEASPM_PERFORMANCE                     policy<{'amd64': 'n', 'arm64': 'n', 'armhf': 'n', 'ppc64el': 'n', 'riscv64': 'n', 's390x': 'n'}>
diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations
index 4fb025e692b3a..800ca185cf530 100644
--- a/debian.nvidia-6.17/config/annotations
+++ b/debian.nvidia-6.17/config/annotations
@@ -204,6 +204,42 @@ CONFIG_UBUNTU_ODM_DRIVERS                       note<'Disable all Ubuntu ODM dri
 CONFIG_ULTRASOC_SMB                             policy<{'arm64': 'n'}>
 CONFIG_ULTRASOC_SMB                             note<'Required for Grace enablement'>
 
+CONFIG_CXL_BUS                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_CXL_BUS                                  note<'Enable CXL bus support built-in; required for CXL Type-2 device and RAS support'>
+
+CONFIG_CXL_PCI                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_CXL_PCI                                  note<'Enable CXL PCI management built-in; auto-selects CXL_MEM; required for CXL Type-2 device support'>
+
+CONFIG_CXL_MEM                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_CXL_MEM                                  note<'Auto-selected by CXL_PCI; required for CXL memory expansion and Type-2 device support'>
+
+CONFIG_CXL_PORT                                 policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_CXL_PORT                                 note<'Required for CXL port enumeration; defaults to CXL_BUS value'>
+
+CONFIG_FWCTL                                    policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_FWCTL                                    note<'Selected by CXL_BUS when CXL_FEATURES is enabled; required for CXL feature mailbox access'>
+
+CONFIG_CXL_RAS                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_CXL_RAS                                  note<'New def_bool replacing PCIEAER_CXL; auto-enabled with ACPI_APEI_GHES+PCIEAER+CXL_BUS; CXL RAS error handling support'>
+
+CONFIG_SFC_CXL                                  policy<{'amd64': 'n', 'arm64': 'n'}>
+CONFIG_SFC_CXL                                  note<'Solarflare SFC9100-family CXL Type-2 device support; not needed for NVIDIA platforms'>
+
+CONFIG_ACPI_APEI_EINJ                           policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_ACPI_APEI_EINJ                           note<'Required for CONFIG_ACPI_APEI_EINJ_CXL'>
+
+CONFIG_ACPI_APEI_EINJ_CXL                       policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_ACPI_APEI_EINJ_CXL                       note<'CXL protocol error injection support via APEI EINJ'>
+
+CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION  note<'Override debian.master amd64-only; arm64 selects this via arch/arm64/Kconfig since 4d873c5dc3ed'>
+
+CONFIG_GENERIC_CPU_CACHE_MAINTENANCE            policy<{'amd64': '-', 'arm64': 'y'}>
+CONFIG_GENERIC_CPU_CACHE_MAINTENANCE            note<'Selected by arm64 via arch/arm64/Kconfig since 4d873c5dc3ed; not selected by x86'>
+
+CONFIG_CACHEMAINT_FOR_HOTPLUG                   policy<{'amd64': '-', 'arm64': 'n'}>
+CONFIG_CACHEMAINT_FOR_HOTPLUG                   note<'Optional HiSilicon HHA cache maintenance driver; depends on GENERIC_CPU_CACHE_MAINTENANCE; not needed for NVIDIA platforms'>
+
 
 # ---- Annotations without notes ----
 

From ce505ce1ac1ac7a261ae47ef6e49435f590543fe Mon Sep 17 00:00:00 2001
From: Jiandi An <jan@nvidia.com>
Date: Wed, 11 Mar 2026 18:41:16 -0500
Subject: [PATCH 142/143] NVIDIA: VR: SAUCE: [Config] Enable CXL DAX and KMEM
 built-in for CXL memory access

Override debian.master policy (m->y) for DEV_DAX, DEV_DAX_CXL, and
DEV_DAX_KMEM to ensure CXL memory regions are accessible as both raw
DAX devices and hotplugged System-RAM nodes.

debian.master sets these to 'm' (modules). For NVIDIA platforms with
CXL Type-2 devices, built-in (y) is required to ensure CXL memory
regions provisioned early in boot are immediately accessible without
relying on module loading order.

CONFIG_DEV_DAX:     Override m->y; prerequisite for DEV_DAX_CXL and
                    DEV_DAX_KMEM to be built-in; depends on
                    TRANSPARENT_HUGEPAGE (already y in debian.master)

CONFIG_DEV_DAX_CXL: Override m->y; creates /dev/daxX.Y devices for CXL
                    RAM regions not in the default system memory map
                    (Soft Reserved or dynamically provisioned regions);
                    depends on CXL_BUS+CXL_REGION+DEV_DAX (all y)

CONFIG_DEV_DAX_KMEM: Override m->y; onlines CXL DAX devices as System-RAM
                    NUMA nodes via memory hotplug, making CXL memory
                    available for normal kernel and userspace allocation

Signed-off-by: Jiandi An <jan@nvidia.com>
---
 debian.nvidia-6.17/config/annotations | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations
index 800ca185cf530..c5aeb93d14ca9 100644
--- a/debian.nvidia-6.17/config/annotations
+++ b/debian.nvidia-6.17/config/annotations
@@ -225,6 +225,15 @@ CONFIG_CXL_RAS                                  note<'New def_bool replacing PCI
 CONFIG_SFC_CXL                                  policy<{'amd64': 'n', 'arm64': 'n'}>
 CONFIG_SFC_CXL                                  note<'Solarflare SFC9100-family CXL Type-2 device support; not needed for NVIDIA platforms'>
 
+CONFIG_DEV_DAX                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_DEV_DAX                                  note<'Override debian.master m->y; required built-in for DEV_DAX_CXL=y'>
+
+CONFIG_DEV_DAX_CXL                              policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_DEV_DAX_CXL                              note<'Override debian.master m->y; CXL RAM region DAX access; depends on CXL_BUS+CXL_REGION+DEV_DAX'>
+
+CONFIG_DEV_DAX_KMEM                             policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_DEV_DAX_KMEM                             note<'Override debian.master m->y; map CXL DAX devices as System-RAM'>
+
 CONFIG_ACPI_APEI_EINJ                           policy<{'amd64': 'y', 'arm64': 'y'}>
 CONFIG_ACPI_APEI_EINJ                           note<'Required for CONFIG_ACPI_APEI_EINJ_CXL'>
 

From acf188b93967c184dfc83dde5161fb0518ec6f54 Mon Sep 17 00:00:00 2001
From: Jiandi An <jan@nvidia.com>
Date: Sun, 22 Mar 2026 20:28:29 -0500
Subject: [PATCH 143/143] NVIDIA: VR: SAUCE: [Config] Add PCI_CXL annotation
 for CXL state save/restore

Add Ubuntu kernel config annotation for CONFIG_PCI_CXL introduced by
the CXL DVSEC and HDM state save/restore series (Srirangan Madhavan).

CONFIG_PCI_CXL:  Hidden bool in drivers/pci/Kconfig; auto-enabled when
                 CXL_BUS=y. Gates compilation of drivers/pci/cxl.o which
                 saves and restores CXL DVSEC control/range registers and
                 HDM decoder state across PCI resets and link transitions.

Signed-off-by: Jiandi An <jan@nvidia.com>
---
 debian.nvidia-6.17/config/annotations | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations
index c5aeb93d14ca9..9340e319beca8 100644
--- a/debian.nvidia-6.17/config/annotations
+++ b/debian.nvidia-6.17/config/annotations
@@ -249,6 +249,8 @@ CONFIG_GENERIC_CPU_CACHE_MAINTENANCE            note<'Selected by arm64 via arch
 CONFIG_CACHEMAINT_FOR_HOTPLUG                   policy<{'amd64': '-', 'arm64': 'n'}>
 CONFIG_CACHEMAINT_FOR_HOTPLUG                   note<'Optional HiSilicon HHA cache maintenance driver; depends on GENERIC_CPU_CACHE_MAINTENANCE; not needed for NVIDIA platforms'>
 
+CONFIG_PCI_CXL                                  policy<{'amd64': 'y', 'arm64': 'y'}>
+CONFIG_PCI_CXL                                  note<'Hidden bool; auto-enabled by CXL_BUS; PCI core CXL DVSEC and HDM state save/restore support'>
 
 # ---- Annotations without notes ----