diff --git a/.mailmap b/.mailmap index 8db24be50158d..d64a8ed369b29 100644 --- a/.mailmap +++ b/.mailmap @@ -182,6 +182,9 @@ Christian Brauner Christian Brauner Christian Brauner Christian Marangi +Christophe Leroy +Christophe Leroy +Christophe Leroy Christophe Ricard Christopher Obbard Christoph Hellwig diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index e95e21f131e96..2989d4da96c1b 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -19,6 +19,20 @@ Description: is returned to the user. The inject_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. What: /sys/kernel/debug/cxl/memX/clear_poison Date: April, 2023 @@ -35,6 +49,79 @@ Description: The clear_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + +What: /sys/kernel/debug/cxl/regionX/inject_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends an inject poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/inject_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. + +What: /sys/kernel/debug/cxl/regionX/clear_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends a clear poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/clear_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + What: /sys/kernel/debug/cxl/einj_types Date: January, 2024 KernelVersion: v6.9 diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 6b4e8c7a963da..c80a1b5a03dba 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -496,8 +496,17 @@ Description: changed, only freed by writing 0. The kernel makes no guarantees that data is maintained over an address space freeing event, and there is no guarantee that a free followed by an allocate - results in the same address being allocated. + results in the same address being allocated. If extended linear + cache is present, the size indicates extended linear cache size + plus the CXL region size. +What: /sys/bus/cxl/devices/regionZ/extended_linear_cache_size +Date: October, 2025 +KernelVersion: v6.19 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) The size of extended linear cache, if there is an extended + linear cache. Otherwise the attribute will not be visible. What: /sys/bus/cxl/devices/regionZ/mode Date: January, 2023 diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 69f952fffec72..c11da40900f17 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -174,6 +174,28 @@ Description: similiar to writing 1 to their individual "reset" file, so use with caution. +What: /sys/bus/pci/devices/.../cxl_reset +Date: February 2026 +Contact: linux-cxl@vger.kernel.org +Description: + This attribute is only visible when the device advertises + CXL Reset Capable in the CXL DVSEC Capability register + (CXL r3.2, section 8.1.3). + + Writing 1 to this file triggers a CXL device reset which + affects CXL.cache and CXL.mem state on all CXL functions + (i.e. those not listed in the Non-CXL Function Map DVSEC, + section 8.1.4), not just CXL.io/PCIe state. This is + separate from the standard PCI reset interface because CXL + Reset has different scope. + + The reset will fail with -EBUSY if any CXL regions using this + device have drivers bound. Active regions are torn down as + part of the reset sequence. + + This attribute is registered by the CXL core when a CXL device + is discovered, independent of which driver binds the PCI device. + What: /sys/bus/pci/devices/.../vpd Date: February 2008 Contact: Ben Hutchings diff --git a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml index b4f6695a60152..fa7c403c874a6 100644 --- a/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml +++ b/Documentation/devicetree/bindings/arm/amlogic/amlogic,meson-gx-ao-secure.yaml @@ -34,6 +34,9 @@ properties: - amlogic,a4-ao-secure - amlogic,c3-ao-secure - amlogic,s4-ao-secure + - amlogic,s6-ao-secure + - amlogic,s7-ao-secure + - amlogic,s7d-ao-secure - amlogic,t7-ao-secure - const: amlogic,meson-gx-ao-secure - const: syscon diff --git a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml index 579bacb66f348..c0e5ebb1fa4c7 100644 --- a/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml +++ b/Documentation/devicetree/bindings/cache/sifive,ccache0.yaml @@ -48,6 +48,11 @@ properties: - const: microchip,mpfs-ccache - const: sifive,fu540-c000-ccache - const: cache + - items: + - const: microchip,pic64gx-ccache + - const: microchip,mpfs-ccache + - const: sifive,fu540-c000-ccache + - const: cache cache-block-size: const: 64 diff --git a/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml new file mode 100644 index 0000000000000..1ab691db87950 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/microchip/microchip,mpfs-mss-top-sysreg.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip PolarFire SoC Microprocessor Subsystem (MSS) sysreg register region + +maintainers: + - Conor Dooley + +description: + An wide assortment of registers that control elements of the MSS on PolarFire + SoC, including pinmuxing, resets and clocks among others. + +properties: + compatible: + items: + - const: microchip,mpfs-mss-top-sysreg + - const: syscon + + reg: + maxItems: 1 + + '#reset-cells': + description: + The AHB/AXI peripherals on the PolarFire SoC have reset support, so + from CLK_ENVM to CLK_CFM. The reset consumer should specify the + desired peripheral via the clock ID in its "resets" phandle cell. + See include/dt-bindings/clock/microchip,mpfs-clock.h for the full list + of PolarFire clock/reset IDs. + const: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + syscon@20002000 { + compatible = "microchip,mpfs-mss-top-sysreg", "syscon"; + reg = <0x20002000 0x1000>; + #reset-cells = <1>; + }; + diff --git a/Documentation/driver-api/cxl/allocation/page-allocator.rst b/Documentation/driver-api/cxl/allocation/page-allocator.rst index 7b8fe1b8d5bbb..3fa584a248bdd 100644 --- a/Documentation/driver-api/cxl/allocation/page-allocator.rst +++ b/Documentation/driver-api/cxl/allocation/page-allocator.rst @@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over will fallback to allocate from :code:`ZONE_NORMAL`. -Zone and Node Quirks -==================== -Let's consider a configuration where the local DRAM capacity is largely onlined -into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The -CXL capacity has the opposite configuration - all onlined in -:code:`ZONE_MOVABLE`. - -Under the default allocation policy, the page allocator will completely skip -:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of -Linux v6.15, the page allocator does (approximately) the following: :: - - for (each zone in local_node): - - for (each node in fallback_order): - - attempt_allocation(gfp_flags); - -Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is -functionally unreachable for direct allocation. As a result, the only way -for CXL capacity to be used is via `demotion` in the reclaim path. - -This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE` -capacity - when that capacity is depleted, the page allocator will actually -prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages. - -We may wish to invert this priority in future Linux versions. - -If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes -when the DRAM nodes are depleted. See the reclaim section for more details. - - CGroups and CPUSets =================== Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst index da347a81a237a..e37336d7b116e 100644 --- a/Documentation/driver-api/cxl/conventions.rst +++ b/Documentation/driver-api/cxl/conventions.rst @@ -45,3 +45,138 @@ Detailed Description of the Change ---------------------------------- + + +Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders +============================================================================ + +Document +-------- + +CXL Revision 3.2, Version 1.0 + +License +------- + +SPDX-License Identifier: CC-BY-4.0 + +Creator/Contributors +-------------------- + +- Fabio M. De Francesco, Intel +- Dan J. Williams, Intel +- Mahesh Natu, Intel + +Summary of the Change +--------------------- + +According to the current Compute Express Link (CXL) Specifications (Revision +3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero +or more Host Physical Address (HPA) windows associated with each CXL Host +Bridge. Each window represents a contiguous HPA range that may be interleaved +across one or more targets, including CXL Host Bridges. Each window has a set +of restrictions that govern its usage. It is the Operating System-directed +configuration and Power Management (OSPM) responsibility to utilize each window +for the specified use. + +Table 9-22 of the current CXL Specifications states that the Window Size field +contains the total number of consecutive bytes of HPA this window describes. +This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB. + +Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a +memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases, +the CFMWS Range Size may not adhere to the NIW * 256 MB rule. + +The HPA represents the actual physical memory address space that the CXL devices +can decode and respond to, while the System Physical Address (SPA), a related +but distinct concept, represents the system-visible address space that users can +direct transaction to and so it excludes reserved regions. + +BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms +with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole, +resulting in lost capacity in the Endpoints with no SPA to map to that part of +the HPA range that intersects the hole. + +E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB: + + +--------+------------+-------------------+------------------+-------------------+------+ + | Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways | + +========+============+===================+==================+===================+======+ + |  0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + |  1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + +HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of +a 12 ways region and all the intermediate Switch Decoders. They are configured +by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of +3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root +Decoder HPA range that results smaller (2GB) than that of the Switch and +Endpoint Decoders in the hierarchy (3GB). + +This creates 2 issues which lead to a failure to construct a region: + +1) A mismatch in region size between root and any HDM decoder. The root decoders + will always be smaller due to the trim. + +2) The trim causes the root decoder to violate the (NIW * 256MB) rule. + +This change allows a region with a base address of 0GB to bypass these checks to +allow for region creation with the trimmed root decoder address range. + +This change does not allow for any other arbitrary region to violate these +checks - it is intended exclusively to enable x86 platforms which map CXL memory +under 4GB. + +Despite the HDM decoders covering the PCIE hole HPA region, it is expected that +the platform will never route address accesses to the CXL complex because the +root decoder only covers the trimmed region (which excludes this). This is +outside the ability of Linux to enforce. + +On the example platform, only the first 2GB will be potentially usable, but +Linux, aiming to adhere to the current specifications, fails to construct +Regions and attach Endpoint and intermediate Switch Decoders to them. + +There are several points of failure that due to the expectation that the Root +Decoder HPA size, that is equal to the CFMWS from which it is configured, has +to be greater or equal to the matching Switch and Endpoint HDM Decoders. + +In order to succeed with construction and attachment, Linux must construct a +Region with Root Decoder HPA range size, and then attach to that all the +intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy +regardless of their range sizes. + +Benefits of the Change +---------------------- + +Without the change, the OSPM wouldn't match intermediate Switch and Endpoint +Decoders with Root Decoders configured with CFMWS HPA sizes that don't align +with the NIW * 256MB constraint, and so it leads to lost memdev capacity. + +This change allows the OSPM to construct Regions and attach intermediate Switch +and Endpoint Decoders to them, so that the addressable part of the memory +devices total capacity is made available to the users. + +References +---------- + +Compute Express Link Specification Revision 3.2, Version 1.0 + + +Detailed Description of the Change +---------------------------------- + +The description of the Window Size field in table 9-22 needs to account for +platforms with Low Memory Holes, where SPA ranges might be subsets of the +endpoints HPA. Therefore, it has to be changed to the following: + +"The total number of consecutive bytes of HPA this window represents. This value +shall be a multiple of NIW * 256 MB. + +On platforms that reserve physical addresses below 4 GB, such as the Low Memory +Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might +have a size that doesn't align with the NIW * 256 MB constraint. + +Note that the matching intermediate Switch Decoders and the Endpoint Decoders +HPA range sizes must still align to the above-mentioned rule, but the memory +capacity that exceeds the CFMWS window size won't be accessible.". diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index 1330f3f52129a..282c1102dd819 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -173,7 +173,7 @@ Accelerator User Flow Support ----------------- -* [0] Inject & clear poison by HPA +* [2] Inject & clear poison by region offset Details ======= diff --git a/Documentation/driver-api/cxl/platform/bios-and-efi.rst b/Documentation/driver-api/cxl/platform/bios-and-efi.rst index 645322632cc9b..a9aa0ccd92af7 100644 --- a/Documentation/driver-api/cxl/platform/bios-and-efi.rst +++ b/Documentation/driver-api/cxl/platform/bios-and-efi.rst @@ -202,7 +202,7 @@ future and such a configuration should be avoided. Memory Holes ------------ -If your platform includes memory holes intersparsed between your CXL memory, it +If your platform includes memory holes interspersed between your CXL memory, it is recommended to utilize multiple decoders to cover these regions of memory, rather than try to program the decoders to accept the entire range and expect Linux to manage the overlap. diff --git a/MAINTAINERS b/MAINTAINERS index 9a83ca44c7d29..1c7561495148e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4498,7 +4498,7 @@ F: drivers/net/ethernet/netronome/nfp/bpf/ BPF JIT for POWERPC (32-BIT AND 64-BIT) M: Hari Bathini -M: Christophe Leroy +M: Christophe Leroy (CS GROUP) R: Naveen N Rao L: bpf@vger.kernel.org S: Supported @@ -9871,7 +9871,7 @@ F: drivers/spi/spi-fsl-qspi.c FREESCALE QUICC ENGINE LIBRARY M: Qiang Zhao -M: Christophe Leroy +M: Christophe Leroy (CS GROUP) L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/soc/fsl/qe/ @@ -9924,7 +9924,7 @@ S: Maintained F: drivers/tty/serial/ucc_uart.c FREESCALE SOC DRIVERS -M: Christophe Leroy +M: Christophe Leroy (CS GROUP) L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@ -14117,7 +14117,7 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Madhavan Srinivasan M: Michael Ellerman R: Nicholas Piggin -R: Christophe Leroy +R: Christophe Leroy (CS GROUP) L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki @@ -14173,7 +14173,7 @@ F: Documentation/devicetree/bindings/powerpc/fsl/ F: arch/powerpc/platforms/85xx/ LINUX FOR POWERPC EMBEDDED PPC8XX AND PPC83XX -M: Christophe Leroy +M: Christophe Leroy (CS GROUP) L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/8xx/ @@ -21722,7 +21722,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: Documentation/devicetree/bindings/iommu/riscv,iommu.yaml F: drivers/iommu/riscv/ -RISC-V MICROCHIP FPGA SUPPORT +RISC-V MICROCHIP SUPPORT M: Conor Dooley M: Daire McNamara L: linux-riscv@lists.infradead.org @@ -21748,6 +21748,8 @@ F: drivers/pci/controller/plda/pcie-microchip-host.c F: drivers/pwm/pwm-microchip-core.c F: drivers/reset/reset-mpfs.c F: drivers/rtc/rtc-mpfs.c +F: drivers/soc/microchip/mpfs-control-scb.c +F: drivers/soc/microchip/mpfs-mss-top-sysreg.c F: drivers/soc/microchip/mpfs-sys-controller.c F: drivers/spi/spi-microchip-core-qspi.c F: drivers/spi/spi-microchip-core.c @@ -24048,10 +24050,13 @@ F: drivers/staging/ STANDALONE CACHE CONTROLLER DRIVERS M: Conor Dooley +M: Jonathan Cameron S: Maintained T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ F: Documentation/devicetree/bindings/cache/ F: drivers/cache +F: include/linux/cache_coherency.h +F: lib/cache_maint.c STARFIRE/DURALAN NETWORK DRIVER M: Ion Badulescu diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1684ec2454369..70f023a98af99 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CC_PLATFORM + select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE @@ -146,6 +147,7 @@ config ARM64 select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_CACHE_MAINTENANCE select GENERIC_CPU_DEVICES select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 8834c76f91c9e..292c7202faed9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -368,7 +368,7 @@ bool cpu_cache_has_invalidate_memregion(void) } EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM"); -int cpu_cache_invalidate_memregion(int res_desc) +int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len) { if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion())) return -ENXIO; diff --git a/debian.master/config/annotations b/debian.master/config/annotations index b05e77739d76b..c27e5965589ab 100644 --- a/debian.master/config/annotations +++ b/debian.master/config/annotations @@ -10048,7 +10048,6 @@ CONFIG_PCENGINES_APU2 policy<{'amd64': 'm'}> CONFIG_PCI policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': 'y', 'riscv64': 'y', 's390x': 'y'}> CONFIG_PCI200SYN policy<{'amd64': 'm', 'arm64': 'm', 'armhf': 'm', 'ppc64el': 'm', 'riscv64': 'm'}> CONFIG_PCIEAER policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': '-', 'riscv64': 'y', 's390x': 'y'}> -CONFIG_PCIEAER_CXL policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'riscv64': 'y'}> CONFIG_PCIEAER_INJECT policy<{'amd64': 'n', 'arm64': 'n', 'armhf': 'n', 'ppc64el': '-', 'riscv64': 'n', 's390x': 'n'}> CONFIG_PCIEASPM policy<{'amd64': 'y', 'arm64': 'y', 'armhf': 'y', 'ppc64el': 'y', 'riscv64': 'y', 's390x': 'y'}> CONFIG_PCIEASPM_PERFORMANCE policy<{'amd64': 'n', 'arm64': 'n', 'armhf': 'n', 'ppc64el': 'n', 'riscv64': 'n', 's390x': 'n'}> diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations index 4fb025e692b3a..9340e319beca8 100644 --- a/debian.nvidia-6.17/config/annotations +++ b/debian.nvidia-6.17/config/annotations @@ -204,6 +204,53 @@ CONFIG_UBUNTU_ODM_DRIVERS note<'Disable all Ubuntu ODM dri CONFIG_ULTRASOC_SMB policy<{'arm64': 'n'}> CONFIG_ULTRASOC_SMB note<'Required for Grace enablement'> +CONFIG_CXL_BUS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_BUS note<'Enable CXL bus support built-in; required for CXL Type-2 device and RAS support'> + +CONFIG_CXL_PCI policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PCI note<'Enable CXL PCI management built-in; auto-selects CXL_MEM; required for CXL Type-2 device support'> + +CONFIG_CXL_MEM policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_MEM note<'Auto-selected by CXL_PCI; required for CXL memory expansion and Type-2 device support'> + +CONFIG_CXL_PORT policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PORT note<'Required for CXL port enumeration; defaults to CXL_BUS value'> + +CONFIG_FWCTL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_FWCTL note<'Selected by CXL_BUS when CXL_FEATURES is enabled; required for CXL feature mailbox access'> + +CONFIG_CXL_RAS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_RAS note<'New def_bool replacing PCIEAER_CXL; auto-enabled with ACPI_APEI_GHES+PCIEAER+CXL_BUS; CXL RAS error handling support'> + +CONFIG_SFC_CXL policy<{'amd64': 'n', 'arm64': 'n'}> +CONFIG_SFC_CXL note<'Solarflare SFC9100-family CXL Type-2 device support; not needed for NVIDIA platforms'> + +CONFIG_DEV_DAX policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_DEV_DAX note<'Override debian.master m->y; required built-in for DEV_DAX_CXL=y'> + +CONFIG_DEV_DAX_CXL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_DEV_DAX_CXL note<'Override debian.master m->y; CXL RAM region DAX access; depends on CXL_BUS+CXL_REGION+DEV_DAX'> + +CONFIG_DEV_DAX_KMEM policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_DEV_DAX_KMEM note<'Override debian.master m->y; map CXL DAX devices as System-RAM'> + +CONFIG_ACPI_APEI_EINJ policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ note<'Required for CONFIG_ACPI_APEI_EINJ_CXL'> + +CONFIG_ACPI_APEI_EINJ_CXL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ_CXL note<'CXL protocol error injection support via APEI EINJ'> + +CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION note<'Override debian.master amd64-only; arm64 selects this via arch/arm64/Kconfig since 4d873c5dc3ed'> + +CONFIG_GENERIC_CPU_CACHE_MAINTENANCE policy<{'amd64': '-', 'arm64': 'y'}> +CONFIG_GENERIC_CPU_CACHE_MAINTENANCE note<'Selected by arm64 via arch/arm64/Kconfig since 4d873c5dc3ed; not selected by x86'> + +CONFIG_CACHEMAINT_FOR_HOTPLUG policy<{'amd64': '-', 'arm64': 'n'}> +CONFIG_CACHEMAINT_FOR_HOTPLUG note<'Optional HiSilicon HHA cache maintenance driver; depends on GENERIC_CPU_CACHE_MAINTENANCE; not needed for NVIDIA platforms'> + +CONFIG_PCI_CXL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_PCI_CXL note<'Hidden bool; auto-enabled by CXL_BUS; PCI core CXL DVSEC and HDM state save/restore support'> # ---- Annotations without notes ---- diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 9085375830605..77a81627aaefd 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -74,7 +74,6 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; - bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -368,35 +367,6 @@ static void hmat_update_target_access(struct memory_target *target, } } -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access) -{ - struct memory_target *target; - int pxm; - - if (nid == NUMA_NO_NODE) - return -EINVAL; - - pxm = node_to_pxm(nid); - guard(mutex)(&target_lock); - target = find_mem_target(pxm); - if (!target) - return -ENODEV; - - hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, - coord->read_latency, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, - coord->write_latency, access); - hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); - target->ext_updated = true; - - return 0; -} -EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); - static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; @@ -773,10 +743,6 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; - /* Don't update if an external agent has changed the data. */ - if (target->ext_updated) - return; - /* Don't update for generic port if there's no device handle */ if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && @@ -944,12 +910,13 @@ static void hmat_register_target(struct memory_target *target) * Register generic port perf numbers. The nid may not be * initialized and is still NUMA_NO_NODE. */ - mutex_lock(&target_lock); - if (*(u16 *)target->gen_port_device_handle) { - hmat_update_generic_target(target); - target->registered = true; + scoped_guard(mutex, &target_lock) { + if (*(u16 *)target->gen_port_device_handle) { + hmat_update_generic_target(target); + target->registered = true; + return; + } } - mutex_unlock(&target_lock); hmat_hotplug_target(target); } diff --git a/drivers/base/node.c b/drivers/base/node.c index 67b01d5797377..3e2329ccb618d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } EXPORT_SYMBOL_GPL(node_set_perf_attrs); +/** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + /** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig index db51386c663a6..1518449d47b51 100644 --- a/drivers/cache/Kconfig +++ b/drivers/cache/Kconfig @@ -1,9 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Cache Drivers" + +menuconfig CACHEMAINT_FOR_DMA + bool "Cache management for noncoherent DMA" + depends on RISCV + default y + help + These drivers implement support for noncoherent DMA master devices + on platforms that lack the standard CPU interfaces for this. + +if CACHEMAINT_FOR_DMA config AX45MP_L2_CACHE bool "Andes Technology AX45MP L2 Cache controller" - depends on RISCV select RISCV_NONSTANDARD_CACHE_OPS help Support for the L2 cache controller on Andes Technology AX45MP platforms. @@ -16,7 +24,6 @@ config SIFIVE_CCACHE config STARFIVE_STARLINK_CACHE bool "StarFive StarLink Cache controller" - depends on RISCV depends on ARCH_STARFIVE depends on 64BIT select RISCV_DMA_NONCOHERENT @@ -24,4 +31,26 @@ config STARFIVE_STARLINK_CACHE help Support for the StarLink cache controller IP from StarFive. -endmenu +endif #CACHEMAINT_FOR_DMA + +menuconfig CACHEMAINT_FOR_HOTPLUG + bool "Cache management for memory hot plug like operations" + depends on GENERIC_CPU_CACHE_MAINTENANCE + help + These drivers implement cache management for flows where it is necessary + to flush data from all host caches. + +if CACHEMAINT_FOR_HOTPLUG + +config HISI_SOC_HHA + tristate "HiSilicon Hydra Home Agent (HHA) device driver" + depends on (ARM64 && ACPI) || COMPILE_TEST + help + The Hydra Home Agent (HHA) is responsible for cache coherency + on the SoC. This drivers enables the cache maintenance functions of + the HHA. + + This driver can be built as a module. If so, the module will be + called hisi_soc_hha. + +endif #CACHEMAINT_FOR_HOTPLUG diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile index 55c5e851034da..b3362b15d6c15 100644 --- a/drivers/cache/Makefile +++ b/drivers/cache/Makefile @@ -3,3 +3,5 @@ obj-$(CONFIG_AX45MP_L2_CACHE) += ax45mp_cache.o obj-$(CONFIG_SIFIVE_CCACHE) += sifive_ccache.o obj-$(CONFIG_STARFIVE_STARLINK_CACHE) += starfive_starlink_cache.o + +obj-$(CONFIG_HISI_SOC_HHA) += hisi_soc_hha.o diff --git a/drivers/cache/hisi_soc_hha.c b/drivers/cache/hisi_soc_hha.c new file mode 100644 index 0000000000000..25ff0f5ae79b3 --- /dev/null +++ b/drivers/cache/hisi_soc_hha.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for HiSilicon Hydra Home Agent (HHA). + * + * Copyright (c) 2025 HiSilicon Technologies Co., Ltd. + * Author: Yicong Yang + * Yushan Wang + * + * A system typically contains multiple HHAs. Each is responsible for a subset + * of the physical addresses in the system, but interleave can make the mapping + * from a particular cache line to a responsible HHA complex. As such no + * filtering is done in the driver, with the hardware being responsible for + * responding with success for even if it was not responsible for any addresses + * in the range on which the operation was requested. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define HISI_HHA_CTRL 0x5004 +#define HISI_HHA_CTRL_EN BIT(0) +#define HISI_HHA_CTRL_RANGE BIT(1) +#define HISI_HHA_CTRL_TYPE GENMASK(3, 2) +#define HISI_HHA_START_L 0x5008 +#define HISI_HHA_START_H 0x500c +#define HISI_HHA_LEN_L 0x5010 +#define HISI_HHA_LEN_H 0x5014 + +/* The maintain operation performs in a 128 Byte granularity */ +#define HISI_HHA_MAINT_ALIGN 128 + +#define HISI_HHA_POLL_GAP_US 10 +#define HISI_HHA_POLL_TIMEOUT_US 50000 + +struct hisi_soc_hha { + /* Must be first element */ + struct cache_coherency_ops_inst cci; + /* Locks HHA instance to forbid overlapping access. */ + struct mutex lock; + void __iomem *base; +}; + +static bool hisi_hha_cache_maintain_wait_finished(struct hisi_soc_hha *soc_hha) +{ + u32 val; + + return !readl_poll_timeout_atomic(soc_hha->base + HISI_HHA_CTRL, val, + !(val & HISI_HHA_CTRL_EN), + HISI_HHA_POLL_GAP_US, + HISI_HHA_POLL_TIMEOUT_US); +} + +static int hisi_soc_hha_wbinv(struct cache_coherency_ops_inst *cci, + struct cc_inval_params *invp) +{ + struct hisi_soc_hha *soc_hha = + container_of(cci, struct hisi_soc_hha, cci); + phys_addr_t top, addr = invp->addr; + size_t size = invp->size; + u32 reg; + + if (!size) + return -EINVAL; + + addr = ALIGN_DOWN(addr, HISI_HHA_MAINT_ALIGN); + top = ALIGN(addr + size, HISI_HHA_MAINT_ALIGN); + size = top - addr; + + guard(mutex)(&soc_hha->lock); + + if (!hisi_hha_cache_maintain_wait_finished(soc_hha)) + return -EBUSY; + + /* + * Hardware will search for addresses ranging [addr, addr + size - 1], + * last byte included, and perform maintenance in 128 byte granules + * on those cachelines which contain the addresses. If a given instance + * is either not responsible for a cacheline or that cacheline is not + * currently present then the search will fail, no operation will be + * necessary and the device will report success. + */ + size -= 1; + + writel(lower_32_bits(addr), soc_hha->base + HISI_HHA_START_L); + writel(upper_32_bits(addr), soc_hha->base + HISI_HHA_START_H); + writel(lower_32_bits(size), soc_hha->base + HISI_HHA_LEN_L); + writel(upper_32_bits(size), soc_hha->base + HISI_HHA_LEN_H); + + reg = FIELD_PREP(HISI_HHA_CTRL_TYPE, 1); /* Clean Invalid */ + reg |= HISI_HHA_CTRL_RANGE | HISI_HHA_CTRL_EN; + writel(reg, soc_hha->base + HISI_HHA_CTRL); + + return 0; +} + +static int hisi_soc_hha_done(struct cache_coherency_ops_inst *cci) +{ + struct hisi_soc_hha *soc_hha = + container_of(cci, struct hisi_soc_hha, cci); + + guard(mutex)(&soc_hha->lock); + if (!hisi_hha_cache_maintain_wait_finished(soc_hha)) + return -ETIMEDOUT; + + return 0; +} + +static const struct cache_coherency_ops hha_ops = { + .wbinv = hisi_soc_hha_wbinv, + .done = hisi_soc_hha_done, +}; + +static int hisi_soc_hha_probe(struct platform_device *pdev) +{ + struct hisi_soc_hha *soc_hha; + struct resource *mem; + int ret; + + soc_hha = cache_coherency_ops_instance_alloc(&hha_ops, + struct hisi_soc_hha, cci); + if (!soc_hha) + return -ENOMEM; + + platform_set_drvdata(pdev, soc_hha); + + mutex_init(&soc_hha->lock); + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + ret = -ENOMEM; + goto err_free_cci; + } + + soc_hha->base = ioremap(mem->start, resource_size(mem)); + if (!soc_hha->base) { + ret = dev_err_probe(&pdev->dev, -ENOMEM, + "failed to remap io memory"); + goto err_free_cci; + } + + ret = cache_coherency_ops_instance_register(&soc_hha->cci); + if (ret) + goto err_iounmap; + + return 0; + +err_iounmap: + iounmap(soc_hha->base); +err_free_cci: + cache_coherency_ops_instance_put(&soc_hha->cci); + return ret; +} + +static void hisi_soc_hha_remove(struct platform_device *pdev) +{ + struct hisi_soc_hha *soc_hha = platform_get_drvdata(pdev); + + cache_coherency_ops_instance_unregister(&soc_hha->cci); + iounmap(soc_hha->base); + cache_coherency_ops_instance_put(&soc_hha->cci); +} + +static const struct acpi_device_id hisi_soc_hha_ids[] = { + { "HISI0511", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_soc_hha_ids); + +static struct platform_driver hisi_soc_hha_driver = { + .driver = { + .name = "hisi_soc_hha", + .acpi_match_table = hisi_soc_hha_ids, + }, + .probe = hisi_soc_hha_probe, + .remove = hisi_soc_hha_remove, +}; + +module_platform_driver(hisi_soc_hha_driver); + +MODULE_IMPORT_NS("CACHE_COHERENCY"); +MODULE_DESCRIPTION("HiSilicon Hydra Home Agent driver supporting cache maintenance"); +MODULE_AUTHOR("Yicong Yang "); +MODULE_AUTHOR("Yushan Wang "); +MODULE_LICENSE("GPL"); diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 48b7314afdb88..5b5aa941ad2fb 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -22,6 +22,7 @@ if CXL_BUS config CXL_PCI tristate "PCI manageability" default CXL_BUS + select CXL_MEM help The CXL specification defines a "CXL memory device" sub-class in the PCI "memory controller" base class of devices. Device's identified by @@ -89,7 +90,6 @@ config CXL_PMEM config CXL_MEM tristate "CXL: Memory Expansion" - depends on CXL_PCI default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System @@ -233,4 +233,8 @@ config CXL_MCE def_bool y depends on X86_MCE && MEMORY_FAILURE +config CXL_RAS + def_bool y + depends on ACPI_APEI_GHES && PCIEAER && CXL_BUS + endif diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 87f0ed3f3f51f..e65dfae42bded 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -11,38 +11,52 @@ #include "cxlpci.h" #include "cxl.h" -struct cxl_cxims_data { - int nr_maps; - u64 xormaps[] __counted_by(nr_maps); -}; - static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); +#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1) +static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = { + [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4 +}; + +static const int valid_hbiw[] = { 1, 2, 3, 4, 6, 8, 12, 16 }; -static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) +u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw) { - struct cxl_cxims_data *cximsd = cxlrd->platform_data; - int hbiw = cxlrd->cxlsd.nr_targets; + int nr_maps_to_apply = -1; u64 val; int pos; - /* No xormaps for host bridge interleave ways of 1 or 3 */ - if (hbiw == 1 || hbiw == 3) - return hpa; + /* + * Strictly validate hbiw since this function is used for testing and + * that nullifies any expectation of trusted parameters from the CXL + * Region Driver. + */ + for (int i = 0; i < ARRAY_SIZE(valid_hbiw); i++) { + if (valid_hbiw[i] == hbiw) { + nr_maps_to_apply = hbiw_to_nr_maps[hbiw]; + break; + } + } + if (nr_maps_to_apply == -1 || nr_maps_to_apply > cximsd->nr_maps) + return ULLONG_MAX; /* - * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore - * the position bit to its value before the xormap was applied at - * HPA->DPA translation. + * In regions using XOR interleave arithmetic the CXL HPA may not + * be the same as the SPA. This helper performs the SPA->CXL HPA + * or the CXL HPA->SPA translation. Since XOR is self-inverting, + * so is this function. + * + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the + * xormaps will toggle a position bit. * * pos is the lowest set bit in an XORMAP - * val is the XORALLBITS(HPA & XORMAP) + * val is the XORALLBITS(addr & XORMAP) * * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS * as an operation that outputs a single bit by XORing all the - * bits in the input (hpa & xormap). Implement XORALLBITS using + * bits in the input (addr & xormap). Implement XORALLBITS using * hweight64(). If the hamming weight is even the XOR of those * bits results in val==0, if odd the XOR result is val==1. */ @@ -51,11 +65,19 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) if (!cximsd->xormaps[i]) continue; pos = __ffs(cximsd->xormaps[i]); - val = (hweight64(hpa & cximsd->xormaps[i]) & 1); - hpa = (hpa & ~(1ULL << pos)) | (val << pos); + val = (hweight64(addr & cximsd->xormaps[i]) & 1); + addr = (addr & ~(1ULL << pos)) | (val << pos); } - return hpa; + return addr; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_do_xormap_calc, "cxl_translate"); + +static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) +{ + struct cxl_cxims_data *cximsd = cxlrd->platform_data; + + return cxl_do_xormap_calc(cximsd, addr, cxlrd->cxlsd.nr_targets); } struct cxl_cxims_context { @@ -113,9 +135,9 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_DEVMEM) flags |= CXL_DECODER_F_TYPE2; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM) flags |= CXL_DECODER_F_TYPE3; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE) flags |= CXL_DECODER_F_RAM; @@ -335,7 +357,7 @@ static int add_or_reset_cxl_resource(struct resource *parent, struct resource *r return rc; } -static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) +static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) { struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; struct range *hpa = &cxld->hpa_range; @@ -345,12 +367,14 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; + /* Explicitly initialize cache size to 0 at the beginning */ + cxlrd->cache_size = 0; res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); if (rc) - return rc; + return; /* * The cache range is expected to be within the CFMWS. @@ -362,34 +386,10 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) dev_warn(&cxld->dev, "Extended Linear Cache size %pa != CXL size %pa. No Support!", &cache_size, &size); - return -ENXIO; - } - - cxlrd->cache_size = cache_size; - - return 0; -} - -static void cxl_setup_extended_linear_cache(struct cxl_root_decoder *cxlrd) -{ - int rc; - - rc = cxl_acpi_set_cache_size(cxlrd); - if (!rc) return; - - if (rc != -EOPNOTSUPP) { - /* - * Failing to support extended linear cache region resize does not - * prevent the region from functioning. Only causes cxl list showing - * incorrect region size. - */ - dev_warn(cxlrd->cxlsd.cxld.dev.parent, - "Extended linear cache calculation failed rc:%d\n", rc); } - /* Ignoring return code */ - cxlrd->cache_size = 0; + cxlrd->cache_size = cache_size; } DEFINE_FREE(put_cxlrd, struct cxl_root_decoder *, @@ -398,7 +398,6 @@ DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_cfmws_context *ctx) { - int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; @@ -416,8 +415,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, rc = eig_to_granularity(cfmws->granularity, &ig); if (rc) return rc; - for (i = 0; i < ways; i++) - target_map[i] = cfmws->interleave_targets[i]; struct resource *res __free(del_cxl_resource) = alloc_cxl_resource( cfmws->base_hpa, cfmws->window_size, ctx->id++); @@ -443,6 +440,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, .end = cfmws->base_hpa + cfmws->window_size - 1, }; cxld->interleave_ways = ways; + for (i = 0; i < ways; i++) + cxld->target_map[i] = cfmws->interleave_targets[i]; /* * Minimize the x1 granularity to advertise support for any * valid region granularity @@ -451,8 +450,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; - cxl_setup_extended_linear_cache(cxlrd); - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { if (ways != 1 && ways != 3) { cxims_ctx = (struct cxl_cxims_context) { @@ -468,14 +465,15 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, return -EINVAL; } } + cxlrd->ops.hpa_to_spa = cxl_apply_xor_maps; + cxlrd->ops.spa_to_hpa = cxl_apply_xor_maps; } - cxlrd->qos_class = cfmws->qtg_id; + cxl_setup_extended_linear_cache(cxlrd); - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) - cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + cxlrd->qos_class = cfmws->qtg_id; - rc = cxl_decoder_add(cxld, target_map); + rc = cxl_decoder_add(cxld); if (rc) return rc; diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 5ad8fef210b5c..b37f38d502d8c 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,9 +14,10 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o -cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o +cxl_core-$(CONFIG_CXL_RAS) += ras.o +cxl_core-$(CONFIG_CXL_RAS) += ras_rch.o diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c0af645425f4a..7120b5f2e31fe 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -338,7 +338,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { - if (host_bridge == cxlsd->target[i]->dport_dev) + if (cxlsd->target[i] && host_bridge == cxlsd->target[i]->dport_dev) return 1; } @@ -440,8 +440,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, } *tbl = (struct acpi_cdat_sslbis_table *)header; int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header); struct acpi_cdat_sslbis *sslbis; - struct cxl_port *port = arg; - struct device *dev = &port->dev; + struct cxl_dport *dport = arg; + struct device *dev = &dport->port->dev; int remain, entries, i; u16 len; @@ -467,8 +467,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id); __le64 le_base; __le16 le_val; - struct cxl_dport *dport; - unsigned long index; u16 dsp_id; u64 val; @@ -499,28 +497,27 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), sslbis->data_type); - xa_for_each(&port->dports, index, dport) { - if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || - dsp_id == dport->port_id) { - cxl_access_coordinate_set(dport->coord, - sslbis->data_type, - val); - } + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) { + cxl_access_coordinate_set(dport->coord, + sslbis->data_type, val); + return 0; } } return 0; } -void cxl_switch_parse_cdat(struct cxl_port *port) +void cxl_switch_parse_cdat(struct cxl_dport *dport) { + struct cxl_port *port = dport->port; int rc; if (!port->cdat.table) return; rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, - port, port->cdat.table, port->cdat.length); + dport, port->cdat.table, port->cdat.length); rc = cdat_table_parse_output(rc); if (rc) dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); @@ -829,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr, cxl_coordinates_combine(coords, coords, ctx->coord); /* - * Take the min of the calculated bandwdith and the upstream + * Take the min of the calculated bandwidth and the upstream * switch SSLBIS bandwidth if there's a parent switch */ if (!is_root) @@ -952,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa) /** * cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region * @cxlr: The region being operated on - * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance + * @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance */ static void cxl_region_update_bandwidth(struct cxl_region *cxlr, struct xarray *input_xa) @@ -1075,14 +1072,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } - -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access) -{ - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); -} - -bool cxl_need_node_perf_attrs_update(int nid) -{ - return !acpi_node_backed_by_real_pxm(nid); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2669f251d6775..f3d6e4e6ad81b 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -89,6 +89,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, struct dentry *cxl_debugfs_create_dir(const char *dir); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode); +struct cxl_memdev_state; +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); @@ -128,6 +130,8 @@ extern struct cxl_rwsem cxl_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); void cxl_mbox_init(void); +void cxl_reset_sysfs_init(void); +void cxl_reset_sysfs_exit(void); enum cxl_poison_trace_type { CXL_POISON_TRACE_LIST, @@ -135,18 +139,46 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +enum poison_cmd_enabled_bits; +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd); + long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); -bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +#ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base); +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); +#else +static inline int cxl_ras_init(void) +{ + return 0; +} +static inline void cxl_ras_exit(void) { } +static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +{ + return false; +} +static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { } +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif /* CONFIG_CXL_RAS */ + int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_hdm; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); +int cxl_port_get_possible_dports(struct cxl_port *port); + #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid); @@ -159,5 +191,6 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, size_t feat_data_size, u32 feat_flag, u16 offset, u16 *return_code); #endif - +resource_size_t cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/edac.c b/drivers/cxl/core/edac.c index 79994ca9bc9f3..81160260e26b7 100644 --- a/drivers/cxl/core/edac.c +++ b/drivers/cxl/core/edac.c @@ -1988,6 +1988,40 @@ static int cxl_memdev_soft_ppr_init(struct cxl_memdev *cxlmd, return 0; } +static void err_rec_free(void *_cxlmd) +{ + struct cxl_memdev *cxlmd = _cxlmd; + struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; + struct cxl_event_gen_media *rec_gen_media; + struct cxl_event_dram *rec_dram; + unsigned long index; + + cxlmd->err_rec_array = NULL; + xa_for_each(&array_rec->rec_dram, index, rec_dram) + kfree(rec_dram); + xa_destroy(&array_rec->rec_dram); + + xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) + kfree(rec_gen_media); + xa_destroy(&array_rec->rec_gen_media); + kfree(array_rec); +} + +static int devm_cxl_memdev_setup_err_rec(struct cxl_memdev *cxlmd) +{ + struct cxl_mem_err_rec *array_rec = + kzalloc(sizeof(*array_rec), GFP_KERNEL); + + if (!array_rec) + return -ENOMEM; + + xa_init(&array_rec->rec_gen_media); + xa_init(&array_rec->rec_dram); + cxlmd->err_rec_array = array_rec; + + return devm_add_action_or_reset(&cxlmd->dev, err_rec_free, cxlmd); +} + int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) { struct edac_dev_feature ras_features[CXL_NR_EDAC_DEV_FEATURES]; @@ -2038,15 +2072,9 @@ int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) } if (repair_inst) { - struct cxl_mem_err_rec *array_rec = - devm_kzalloc(&cxlmd->dev, sizeof(*array_rec), - GFP_KERNEL); - if (!array_rec) - return -ENOMEM; - - xa_init(&array_rec->rec_gen_media); - xa_init(&array_rec->rec_dram); - cxlmd->err_rec_array = array_rec; + rc = devm_cxl_memdev_setup_err_rec(cxlmd); + if (rc) + return rc; } } @@ -2088,22 +2116,4 @@ int devm_cxl_region_edac_register(struct cxl_region *cxlr) } EXPORT_SYMBOL_NS_GPL(devm_cxl_region_edac_register, "CXL"); -void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) -{ - struct cxl_mem_err_rec *array_rec = cxlmd->err_rec_array; - struct cxl_event_gen_media *rec_gen_media; - struct cxl_event_dram *rec_dram; - unsigned long index; - - if (!IS_ENABLED(CONFIG_CXL_EDAC_MEM_REPAIR) || !array_rec) - return; - - xa_for_each(&array_rec->rec_dram, index, rec_dram) - kfree(rec_dram); - xa_destroy(&array_rec->rec_dram); - xa_for_each(&array_rec->rec_gen_media, index, rec_gen_media) - kfree(rec_gen_media); - xa_destroy(&array_rec->rec_gen_media); -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_edac_release, "CXL"); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e930191057c04..70da3daac3178 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "cxlmem.h" #include "core.h" @@ -21,12 +22,11 @@ struct cxl_rwsem cxl_rwsem = { .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), }; -static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map) +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) { int rc; - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -50,12 +50,9 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, * are claimed and passed to the single dport. Disable the range until the first * CXL region is enumerated / activated. */ -int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; - struct cxl_dport *dport = NULL; - int single_port_map[1]; - unsigned long index; struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); /* @@ -71,13 +68,8 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) - break; - single_port_map[0] = dport->port_id; - - return add_hdm_decoder(port, &cxlsd->cxld, single_port_map); + return add_hdm_decoder(port, &cxlsd->cxld); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL"); static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) { @@ -147,8 +139,8 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) * @port: cxl_port to map * @info: cached DVSEC range register info */ -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) +static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) { struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; @@ -197,13 +189,12 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, */ if (should_emulate_decoders(info)) { dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, - info->ranges > 1 ? "s" : ""); + str_plural(info->ranges)); cxlhdm->decoder_count = info->ranges; } return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, "CXL"); static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth) { @@ -556,6 +547,12 @@ bool cxl_resource_contains_addr(const struct resource *res, const resource_size_ return resource_contains(res, &_addr); } +/** + * cxl_dpa_free - release DPA (Device Physical Address) + * @cxled: endpoint decoder linked to the DPA + * + * Returns 0 or error. + */ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); @@ -582,6 +579,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) devm_cxl_dpa_release(cxled); return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dpa_free, "CXL"); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode) @@ -613,6 +611,82 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, return 0; } +static int find_free_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end + 1); +} + +static struct cxl_endpoint_decoder * +cxl_find_free_decoder(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct device *dev; + + guard(rwsem_read)(&cxl_rwsem.dpa); + dev = device_find_child(&endpoint->dev, NULL, + find_free_decoder); + if (!dev) + return NULL; + + return to_cxl_endpoint_decoder(dev); +} + +/** + * cxl_request_dpa - search and reserve DPA given input constraints + * @cxlmd: memdev with an endpoint port with available decoders + * @mode: CXL partition mode (ram vs pmem) + * @alloc: dpa size required + * + * Returns a pointer to a 'struct cxl_endpoint_decoder' on success or + * an errno encoded pointer on failure. + * + * Given that a region needs to allocate from limited HPA capacity it + * may be the case that a device has more mappable DPA capacity than + * available HPA. The expectation is that @alloc is a driver known + * value based on the device capacity but which could not be fully + * available due to HPA constraints. + * + * Returns a pinned cxl_decoder with at least @alloc bytes of capacity + * reserved, or an error pointer. The caller is also expected to own the + * lifetime of the memdev registration associated with the endpoint to + * pin the decoder registered as well. + */ +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc) +{ + int rc; + + if (!IS_ALIGNED(alloc, SZ_256M)) + return ERR_PTR(-EINVAL); + + struct cxl_endpoint_decoder *cxled __free(put_cxled) = + cxl_find_free_decoder(cxlmd); + + if (!cxled) + return ERR_PTR(-ENODEV); + + rc = cxl_dpa_set_part(cxled, mode); + if (rc) + return ERR_PTR(rc); + + rc = cxl_dpa_alloc(cxled, alloc); + if (rc) + return ERR_PTR(rc); + + return no_free_ptr(cxled); +} +EXPORT_SYMBOL_NS_GPL(cxl_request_dpa, "CXL"); + static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); @@ -696,6 +770,45 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +static int find_committed_endpoint_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == port->hdm_end; +} + +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct cxl_endpoint_decoder *cxled; + struct device *cxled_dev; + + if (!endpoint) + return NULL; + + guard(rwsem_read)(&cxl_rwsem.dpa); + cxled_dev = device_find_child(&endpoint->dev, NULL, + find_committed_endpoint_decoder); + + if (!cxled_dev) + return NULL; + + cxled = to_cxl_endpoint_decoder(cxled_dev); + *cxlr = cxled->cxld.region; + + put_device(cxled_dev); + return cxled; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_committed_decoder, "CXL"); + static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) { u16 eig; @@ -854,14 +967,13 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) scoped_guard(rwsem_read, &cxl_rwsem.dpa) setup_hw_decoder(cxld, hdm); - port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); if (rc) { dev_dbg(&port->dev, "%s: error %d committing decoder\n", dev_name(&cxld->dev), rc); - cxld->reset(cxld); return rc; } + port->commit_end++; cxld->flags |= CXL_DECODER_F_ENABLE; return 0; @@ -915,6 +1027,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld) if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) return; + if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + return; + if (port->commit_end == id) cxl_port_commit_reap(cxld); else @@ -973,7 +1088,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( rc = devm_cxl_dpa_reserve(cxled, *dpa_base, len, 0); if (rc) { dev_err(&port->dev, - "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n", port->id, cxld->id, *dpa_base, *dpa_base + len - 1, rc); return rc; } @@ -984,7 +1099,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( } static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map, void __iomem *hdm, int which, + void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { struct cxl_endpoint_decoder *cxled = NULL; @@ -1104,7 +1219,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); target_list.value = (hi << 32) + lo; for (i = 0; i < cxld->interleave_ways; i++) - target_map[i] = target_list.target_id[i]; + cxld->target_map[i] = target_list.target_id[i]; return 0; } @@ -1125,7 +1240,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip); if (rc) { dev_err(&port->dev, - "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)", + "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx: %d\n", port->id, cxld->id, *dpa_base, *dpa_base + dpa_size + skip - 1, rc); return rc; @@ -1169,8 +1284,8 @@ static void cxl_settle_decoders(struct cxl_hdm *cxlhdm) * @cxlhdm: Structure to populate with HDM capabilities * @info: cached DVSEC range register info */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; @@ -1180,7 +1295,6 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxl_settle_decoders(cxlhdm); for (i = 0; i < cxlhdm->decoder_count; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; @@ -1208,8 +1322,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxlsd->cxld; } - rc = init_hdm_decoder(port, cxld, target_map, hdm, i, - &dpa_base, info); + rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info); if (rc) { if (rc == -ENOSPC) continue; @@ -1219,7 +1332,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, put_device(&cxld->dev); return rc; } - rc = add_hdm_decoder(port, cxld, target_map); + rc = add_hdm_decoder(port, cxld); if (rc) { dev_warn(&port->dev, "Failed to add decoder%d.%d\n", port->id, i); @@ -1229,4 +1342,71 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, "CXL"); + +/** + * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + cxlhdm = devm_cxl_setup_hdm(port, NULL); + if (!IS_ERR(cxlhdm)) + return devm_cxl_enumerate_decoders(cxlhdm, NULL); + + if (PTR_ERR(cxlhdm) != -ENODEV) { + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + if (cxl_port_get_possible_dports(port) == 1) { + dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); + return devm_cxl_add_passthrough_decoder(port); + } + + dev_err(&port->dev, "HDM decoder capability not found\n"); + return -ENXIO; +} +EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL"); + +/** + * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_endpoint_dvsec_info info = { .port = port }; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + int rc; + + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + rc = cxl_dvsec_rr_decode(cxlds, &info); + if (rc < 0) + return rc; + + cxlhdm = devm_cxl_setup_hdm(port, &info); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) == -ENODEV) + dev_err(&port->dev, "HDM decoder registers not found\n"); + return PTR_ERR(cxlhdm); + } + + rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); + if (rc) + return rc; + + return devm_cxl_enumerate_decoders(cxlhdm, &info); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL"); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656f..d57a0c2d39fb6 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1144,7 +1144,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL"); * * See CXL @8.2.9.5.2.1 Get Partition Info */ -static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_partition_info pi; @@ -1300,55 +1300,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return -EBUSY; } -static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) -{ - int i = info->nr_partitions; - - if (size == 0) - return; - - info->part[i].range = (struct range) { - .start = start, - .end = start + size - 1, - }; - info->part[i].mode = mode; - info->nr_partitions++; -} - -int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) -{ - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; - int rc; - - if (!cxlds->media_ready) { - info->size = 0; - return 0; - } - - info->size = mds->total_bytes; - - if (mds->partition_align_bytes == 0) { - add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->volatile_only_bytes, - mds->persistent_only_bytes, CXL_PARTMODE_PMEM); - return 0; - } - - rc = cxl_mem_get_partition_info(mds); - if (rc) { - dev_err(dev, "Failed to query partition information\n"); - return rc; - } - - add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, - CXL_PARTMODE_PMEM); - - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); - int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; @@ -1514,23 +1465,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec) { struct cxl_memdev_state *mds; int rc; - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, + dvsec, struct cxl_memdev_state, cxlds, + true); if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } mutex_init(&mds->event.log_lock); - mds->cxlds.dev = dev; - mds->cxlds.reg_map.host = dev; - mds->cxlds.cxl_mbox.host = dev; - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); if (rc == -EOPNOTSUPP) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index c569e00a511f4..1b43763b8e20e 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "trace.h" #include "core.h" @@ -27,7 +28,6 @@ static void cxl_memdev_release(struct device *dev) struct cxl_memdev *cxlmd = to_cxl_memdev(dev); ida_free(&cxl_memdev_ida, cxlmd->id); - devm_cxl_memdev_edac_release(cxlmd); kfree(cxlmd); } @@ -200,6 +200,14 @@ static ssize_t security_erase_store(struct device *dev, static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + return test_bit(cmd, mds->poison.enabled_cmds); +} + static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -276,7 +284,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) return 0; } -int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_inject_poison inject; @@ -288,13 +296,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -324,9 +327,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_inject_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); -int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_clear_poison clear; @@ -338,13 +356,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -383,6 +396,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_clear_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); static struct attribute *cxl_memdev_attributes[] = { @@ -549,12 +577,85 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +static const struct device_type cxl_accel_memdev_type = { + .name = "cxl_accel_memdev", + .release = cxl_memdev_release, + .devnode = cxl_memdev_devnode, +}; + bool is_cxl_memdev(const struct device *dev) { - return dev->type == &cxl_memdev_type; + return (dev->type == &cxl_memdev_type || + dev->type == &cxl_accel_memdev_type); } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) +{ + int i = info->nr_partitions; + + if (size == 0) + return; + + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; +} + +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = cxlds->dev; + int rc; + + if (!cxlds->media_ready) { + info->size = 0; + return 0; + } + + info->size = mds->total_bytes; + + if (mds->partition_align_bytes == 0) { + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; + } + + rc = cxl_mem_get_partition_info(mds); + if (rc) { + dev_err(dev, "Failed to query partition information\n"); + return rc; + } + + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); + +/** + * cxl_set_capacity: initialize dpa by a driver without a mailbox. + * + * @cxlds: pointer to cxl_dev_state + * @capacity: device volatile memory size + */ +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity) +{ + struct cxl_dpa_info range_info = { + .size = capacity, + }; + + add_part(&range_info, 0, capacity, CXL_PARTMODE_RAM); + return cxl_dpa_setup(cxlds, &range_info); +} +EXPORT_SYMBOL_NS_GPL(cxl_set_capacity, "CXL"); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @mds: The device state to operate on @@ -614,14 +715,56 @@ static void detach_memdev(struct work_struct *work) struct cxl_memdev *cxlmd; cxlmd = container_of(work, typeof(*cxlmd), detach_work); - device_release_driver(&cxlmd->dev); + + /* + * When the creator of @cxlmd sets ->attach it indicates CXL operation + * is required. In that case, @cxlmd detach escalates to parent device + * detach. + */ + if (cxlmd->attach) + device_release_driver(cxlmd->dev.parent); + else + device_release_driver(&cxlmd->dev); put_device(&cxlmd->dev); } static struct lock_class_key cxl_memdev_key; +static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev, + enum cxl_devtype type, u64 serial, u16 dvsec, + bool has_mbox) +{ + *cxlds = (struct cxl_dev_state) { + .dev = dev, + .type = type, + .serial = serial, + .cxl_dvsec = dvsec, + .reg_map.host = dev, + .reg_map.resource = CXL_RESOURCE_NONE, + }; + + if (has_mbox) + cxlds->cxl_mbox.host = dev; +} + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox) +{ + struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!cxlds) + return NULL; + + cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox); + return cxlds; +} +EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL"); + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, - const struct file_operations *fops) + const struct file_operations *fops, + const struct cxl_memdev_attach *attach) { struct cxl_memdev *cxlmd; struct device *dev; @@ -637,6 +780,8 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, goto err; cxlmd->id = rc; cxlmd->depth = -1; + cxlmd->attach = attach; + cxlmd->endpoint = ERR_PTR(-ENXIO); dev = &cxlmd->dev; device_initialize(dev); @@ -644,7 +789,10 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; + if (cxlds->type == CXL_DEVTYPE_DEVMEM) + dev->type = &cxl_accel_memdev_type; + else + dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); INIT_WORK(&cxlmd->detach_work, detach_memdev); @@ -1023,50 +1171,84 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds) +/* + * Activate ioctl operations, no cxl_memdev_rwsem manipulation needed as this is + * ordered with cdev_add() publishing the device. + */ +static int cxlmd_add(struct cxl_memdev *cxlmd, struct cxl_dev_state *cxlds) +{ + int rc; + + cxlmd->cxlds = cxlds; + cxlds->cxlmd = cxlmd; + + rc = cdev_device_add(&cxlmd->cdev, &cxlmd->dev); + if (rc) { + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(&cxlmd->dev); + return rc; + } + + return 0; +} + +DEFINE_FREE(put_cxlmd, struct cxl_memdev *, + if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) + +static struct cxl_memdev *cxl_memdev_autoremove(struct cxl_memdev *cxlmd) +{ + int rc; + + /* + * If @attach is provided fail if the driver is not attached upon + * return. Note that failure here could be the result of a race to + * teardown the CXL port topology. I.e. cxl_mem_probe() could have + * succeeded and then cxl_mem unbound before the lock is acquired. + */ + guard(device)(&cxlmd->dev); + if (cxlmd->attach && !cxlmd->dev.driver) { + cxl_memdev_unregister(cxlmd); + return ERR_PTR(-ENXIO); + } + + rc = devm_add_action_or_reset(cxlmd->cxlds->dev, cxl_memdev_unregister, + cxlmd); + if (rc) + return ERR_PTR(rc); + + return cxlmd; +} + +/* + * Core helper for devm_cxl_add_memdev() that wants to both create a device and + * assert to the caller that upon return cxl_mem::probe() has been invoked. + */ +struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach) { - struct cxl_memdev *cxlmd; struct device *dev; - struct cdev *cdev; int rc; - cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); + struct cxl_memdev *cxlmd __free(put_cxlmd) = + cxl_memdev_alloc(cxlds, &cxl_memdev_fops, attach); if (IS_ERR(cxlmd)) return cxlmd; dev = &cxlmd->dev; rc = dev_set_name(dev, "mem%d", cxlmd->id); if (rc) - goto err; - - /* - * Activate ioctl operations, no cxl_memdev_rwsem manipulation - * needed as this is ordered with cdev_add() publishing the device. - */ - cxlmd->cxlds = cxlds; - cxlds->cxlmd = cxlmd; - - cdev = &cxlmd->cdev; - rc = cdev_device_add(cdev, dev); - if (rc) - goto err; + return ERR_PTR(rc); - rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); + rc = cxlmd_add(cxlmd, cxlds); if (rc) return ERR_PTR(rc); - return cxlmd; -err: - /* - * The cdev was briefly live, shutdown any ioctl operations that - * saw that state. - */ - cxl_memdev_shutdown(dev); - put_device(dev); - return ERR_PTR(rc); + return cxl_memdev_autoremove(no_free_ptr(cxlmd)); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); +EXPORT_SYMBOL_FOR_MODULES(__devm_cxl_add_memdev, "cxl_mem"); static void sanitize_teardown_notifier(void *data) { diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index b50551601c2e4..497d99b8908d0 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -4,8 +4,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -13,6 +16,9 @@ #include "core.h" #include "trace.h" +/* Initial sibling array capacity: covers max non-ARI functions per slot */ +#define CXL_RESET_SIBLINGS_INIT 8 + /** * DOC: cxl core pci * @@ -24,84 +30,52 @@ static unsigned short media_ready_timeout = 60; module_param(media_ready_timeout, ushort, 0644); MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); -struct cxl_walk_context { - struct pci_bus *bus; - struct cxl_port *port; +static int pci_get_port_num(struct pci_dev *pdev) +{ + u32 lnkcap; int type; - int error; - int count; -}; -static int match_add_dports(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct cxl_register_map map; - struct cxl_dport *dport; - u32 lnkcap, port_num; - int rc; + type = pci_pcie_type(pdev); + if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT) + return -EINVAL; - if (pdev->bus != ctx->bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != ctx->type) - return 0; if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, &lnkcap)) - return 0; - - rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); - if (rc) - dev_dbg(&port->dev, "failed to find component registers\n"); - - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource); - if (IS_ERR(dport)) { - ctx->error = PTR_ERR(dport); - return PTR_ERR(dport); - } - ctx->count++; + return -ENXIO; - return 0; + return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); } /** - * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port - * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated + * __devm_cxl_add_dport_by_dev - allocate a dport by dport device + * @port: cxl_port that hosts the dport + * @dport_dev: 'struct device' of the dport * - * Returns a positive number of dports enumerated or a negative error - * code. + * Returns the allocated dport on success or ERR_PTR() of -errno on error */ -int devm_cxl_port_enumerate_dports(struct cxl_port *port) +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) { - struct pci_bus *bus = cxl_port_to_pci_bus(port); - struct cxl_walk_context ctx; - int type; + struct cxl_register_map map; + struct pci_dev *pdev; + int port_num, rc; - if (!bus) - return -ENXIO; + if (!dev_is_pci(dport_dev)) + return ERR_PTR(-EINVAL); - if (pci_is_root_bus(bus)) - type = PCI_EXP_TYPE_ROOT_PORT; - else - type = PCI_EXP_TYPE_DOWNSTREAM; + pdev = to_pci_dev(dport_dev); + port_num = pci_get_port_num(pdev); + if (port_num < 0) + return ERR_PTR(port_num); - ctx = (struct cxl_walk_context) { - .port = port, - .bus = bus, - .type = type, - }; - pci_walk_bus(bus, match_add_dports, &ctx); + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + return ERR_PTR(rc); - if (ctx.count == 0) - return -ENODEV; - if (ctx.error) - return ctx.error; - return ctx.count; + device_lock_assert(&port->dev); + return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL"); +EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) { @@ -118,12 +92,12 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) i = 1; do { rc = pci_read_config_dword(pdev, - d + CXL_DVSEC_RANGE_SIZE_LOW(id), + d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); + valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp); if (valid) break; msleep(1000); @@ -153,11 +127,11 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) /* Check MEM ACTIVE bit, up to 60s timeout by default */ for (i = media_ready_timeout; i; i--) { rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp); if (active) break; msleep(1000); @@ -186,11 +160,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) u16 cap; rc = pci_read_config_word(pdev, - d + CXL_DVSEC_CAP_OFFSET, &cap); + d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); for (i = 0; i < hdm_count; i++) { rc = cxl_dvsec_mem_range_valid(cxlds, i); if (rc) @@ -218,16 +192,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) u16 ctrl; int rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc < 0) return rc; - if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val) return 1; - ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE; ctrl |= val; - rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, ctrl); if (rc < 0) return rc; @@ -243,7 +217,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) { int rc; - rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE); if (rc < 0) return rc; if (rc > 0) @@ -305,11 +279,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return -ENXIO; } - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP, &cap); if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; } @@ -320,7 +294,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * driver is for a spec defined class code which must be CXL.mem * capable, there is no point in continuing to enable CXL.mem. */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT, cap); if (!hdm_count || hdm_count > 2) return -EINVAL; @@ -329,11 +303,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL, &ctrl); if (rc) return rc; - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -346,35 +320,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return rc; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp); if (rc) return rc; size = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp); if (rc) return rc; - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW; if (!size) { continue; } rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp); if (rc) return rc; base = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp); if (rc) return rc; - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW; info->dvsec_range[ranges++] = (struct range) { .start = base, @@ -664,324 +638,6 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - void __iomem *addr; - u32 status; - - if (!ras_base) - return; - - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } -} - -static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); -} - -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(void __iomem *ras_base, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!ras_base) - return false; - - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_ras(cxlds, cxlds->regs.ras); -} - -#ifdef CONFIG_PCIEAER_CXL - -static void cxl_dport_map_rch_aer(struct cxl_dport *dport) -{ - resource_size_t aer_phys; - struct device *host; - u16 aer_cap; - - aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); - if (aer_cap) { - host = dport->reg_map.host; - aer_phys = aer_cap + dport->rcrb.base; - dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); - } -} - -static void cxl_dport_map_ras(struct cxl_dport *dport) -{ - struct cxl_register_map *map = &dport->reg_map; - struct device *dev = dport->dport_dev; - - if (!map->component_map.ras.valid) - dev_dbg(dev, "RAS registers not found\n"); - else if (cxl_map_component_regs(map, &dport->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS))) - dev_dbg(dev, "Failed to map RAS capability.\n"); -} - -static void cxl_disable_rch_root_ints(struct cxl_dport *dport) -{ - void __iomem *aer_base = dport->regs.dport_aer; - u32 aer_cmd_mask, aer_cmd; - - if (!aer_base) - return; - - /* - * Disable RCH root port command interrupts. - * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors - * - * This sequence may not be necessary. CXL spec states disabling - * the root cmd register's interrupts is required. But, PCI spec - * shows these are disabled by default on reset. - */ - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); -} - -/** - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport - * @dport: the cxl_dport that needs to be initialized - * @host: host device for devm operations - */ -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) -{ - dport->reg_map.host = host; - cxl_dport_map_ras(dport); - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); - - if (!host_bridge->native_aer) - return; - - cxl_dport_map_rch_aer(dport); - cxl_disable_rch_root_ints(dport); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); - -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); -} - -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_ras(cxlds, dport->regs.ras); -} - -/* - * Copy the AER capability registers using 32 bit read accesses. - * This is necessary because RCRB AER capability is MMIO mapped. Clear the - * status after copying. - * - * @aer_base: base address of AER capability block in RCRB - * @aer_regs: destination for copying AER capability - */ -static bool cxl_rch_get_aer_info(void __iomem *aer_base, - struct aer_capability_regs *aer_regs) -{ - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); - u32 *aer_regs_buf = (u32 *)aer_regs; - int n; - - if (!aer_base) - return false; - - /* Use readl() to guarantee 32-bit accesses */ - for (n = 0; n < read_cnt; n++) - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); - - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); - - return true; -} - -/* Get AER severity. Return false if there is no error. */ -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, - int *severity) -{ - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) - *severity = AER_FATAL; - else - *severity = AER_NONFATAL; - return true; - } - - if (aer_regs->cor_status & ~aer_regs->cor_mask) { - *severity = AER_CORRECTABLE; - return true; - } - - return false; -} - -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct aer_capability_regs aer_regs; - struct cxl_dport *dport; - int severity; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return; - - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) - return; - - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) - return; - - pci_print_aer(pdev, severity, &aer_regs); - - if (severity == AER_CORRECTABLE) - cxl_handle_rdport_cor_ras(cxlds, dport); - else - cxl_handle_rdport_ras(cxlds, dport); -} - -#else -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } -#endif - -void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - - cxl_handle_endpoint_cor_ras(cxlds); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); - -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_endpoint_ras(cxlds); - } - - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} -EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) @@ -1046,6 +702,68 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); +static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, + struct cxl_register_map *map, + struct cxl_dport *dport) +{ + resource_size_t component_reg_phys; + + *map = (struct cxl_register_map) { + .host = &pdev->dev, + .resource = CXL_RESOURCE_NONE, + }; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); + if (component_reg_phys == CXL_RESOURCE_NONE) + return -ENXIO; + + map->resource = component_reg_phys; + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + return 0; +} + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + int rc; + + rc = cxl_find_regblock(pdev, type, map); + + /* + * If the Register Locator DVSEC does not exist, check if it + * is an RCH and try to extract the Component Registers from + * an RCRB. + */ + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { + return rc; + } + + return cxl_setup_regs(map); +} +EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL"); + int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) { int speed, bw; @@ -1100,7 +818,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev) is_port = false; dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); @@ -1116,14 +834,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) switch (phase) { case 1: - offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE; break; case 2: - offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE; break; default: return -EINVAL; @@ -1169,3 +887,585 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return 0; } + +struct cxl_walk_context { + struct pci_bus *bus; + struct cxl_port *port; + int type; + int error; + int count; +}; + +static int count_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + int type = pci_pcie_type(pdev); + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + + ctx->count++; + return 0; +} + +int cxl_port_get_possible_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) { + dev_err(&port->dev, "No PCI bus found for port %s\n", + dev_name(&port->dev)); + return -ENXIO; + } + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, count_dports, &ctx); + + return ctx.count; +} + +/* + * CXL Reset support - core-provided reset logic for CXL devices. + * + * These functions implement the CXL reset sequence. + */ + +/* + * If CXL memory backed by this decoder is online as System RAM, offline + * and remove it per CXL spec requirements before issuing CXL Reset. + * Returns 0 if memory was not online or was successfully offlined. + */ +static int __maybe_unused cxl_offline_memory(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + struct cxl_region_params *p; + int rc; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + cxlr = cxled->cxld.region; + if (!cxlr) + return 0; + + p = &cxlr->params; + if (!p->res) + return 0; + + if (walk_iomem_res_desc(IORES_DESC_NONE, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + p->res->start, p->res->end, NULL, NULL) <= 0) + return 0; + + dev_info(dev, "Offlining CXL memory [%pr] for reset\n", p->res); + +#ifdef CONFIG_MEMORY_HOTREMOVE + rc = offline_and_remove_memory(p->res->start, resource_size(p->res)); + if (rc) { + dev_err(dev, + "Failed to offline CXL memory [%pr]: %d\n", + p->res, rc); + return rc; + } +#else + dev_err(dev, "Memory hotremove not supported, cannot offline CXL memory\n"); + rc = -EOPNOTSUPP; + return rc; +#endif + + return 0; +} + +static int __maybe_unused cxl_reset_prepare_memdev(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint; + struct device *dev; + + if (!cxlmd || !cxlmd->cxlds) + return -ENODEV; + + dev = cxlmd->cxlds->dev; + endpoint = cxlmd->endpoint; + if (!endpoint) + return 0; + + return device_for_each_child(&endpoint->dev, NULL, + cxl_offline_memory); +} + +static int __maybe_unused cxl_decoder_flush_cache(struct device *dev, void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_region *cxlr; + struct resource *res; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + cxlr = cxled->cxld.region; + if (!cxlr || !cxlr->params.res) + return 0; + + res = cxlr->params.res; + cpu_cache_invalidate_memregion(res->start, resource_size(res)); + return 0; +} + +static int __maybe_unused cxl_reset_flush_cpu_caches(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint; + + if (!cxlmd) + return 0; + + endpoint = cxlmd->endpoint; + if (!endpoint || IS_ERR(endpoint)) + return 0; + + if (!cpu_cache_has_invalidate_memregion()) + return 0; + + device_for_each_child(&endpoint->dev, NULL, cxl_decoder_flush_cache); + return 0; +} + +/* + * Serialize all CXL reset operations globally. + */ +static DEFINE_MUTEX(cxl_reset_mutex); + +struct cxl_reset_context { + struct pci_dev *target; + struct pci_dev **pci_functions; + int pci_func_count; + int pci_func_cap; +}; + +/* + * Check if a sibling function is non-CXL using the Non-CXL Function Map + * DVSEC. Returns true if fn is listed as non-CXL, false otherwise (including + * on any read failure). + */ +static bool cxl_is_non_cxl_function(struct pci_dev *pdev, + u16 func_map_dvsec, int fn) +{ + int reg, bit; + u32 map; + + if (pci_ari_enabled(pdev->bus)) { + reg = fn / 32; + bit = fn % 32; + } else { + reg = fn; + bit = PCI_SLOT(pdev->devfn); + } + + if (pci_read_config_dword(pdev, + func_map_dvsec + PCI_DVSEC_CXL_FUNCTION_MAP_REG + (reg * 4), + &map)) + return false; + + return map & BIT(bit); +} + +struct cxl_reset_walk_ctx { + struct cxl_reset_context *ctx; + u16 func_map_dvsec; + bool ari; +}; + +static int cxl_reset_collect_sibling(struct pci_dev *func, void *data) +{ + struct cxl_reset_walk_ctx *wctx = data; + struct cxl_reset_context *ctx = wctx->ctx; + struct pci_dev *pdev = ctx->target; + u16 dvsec, cap; + int fn; + + if (func == pdev) + return 0; + + if (!wctx->ari && + PCI_SLOT(func->devfn) != PCI_SLOT(pdev->devfn)) + return 0; + + fn = wctx->ari ? func->devfn : PCI_FUNC(func->devfn); + if (wctx->func_map_dvsec && + cxl_is_non_cxl_function(pdev, wctx->func_map_dvsec, fn)) + return 0; + + /* Only coordinate with siblings that have CXL.cachemem */ + dvsec = pci_find_dvsec_capability(func, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return 0; + if (pci_read_config_word(func, dvsec + PCI_DVSEC_CXL_CAP, &cap)) + return 0; + if (!(cap & (PCI_DVSEC_CXL_CACHE_CAPABLE | + PCI_DVSEC_CXL_MEM_CAPABLE))) + return 0; + + /* Grow sibling array; double capacity for ARI devices when running out of space */ + if (ctx->pci_func_count >= ctx->pci_func_cap) { + struct pci_dev **new; + int new_cap = ctx->pci_func_cap ? ctx->pci_func_cap * 2 + : CXL_RESET_SIBLINGS_INIT; + + new = krealloc(ctx->pci_functions, + new_cap * sizeof(*new), GFP_KERNEL); + if (!new) + return 1; + ctx->pci_functions = new; + ctx->pci_func_cap = new_cap; + } + + pci_dev_get(func); + ctx->pci_functions[ctx->pci_func_count++] = func; + return 0; +} + +static void cxl_pci_functions_reset_prepare(struct cxl_reset_context *ctx) +{ + struct pci_dev *pdev = ctx->target; + struct cxl_reset_walk_ctx wctx; + int i; + + ctx->pci_func_count = 0; + ctx->pci_functions = NULL; + ctx->pci_func_cap = 0; + + wctx.ctx = ctx; + wctx.ari = pci_ari_enabled(pdev->bus); + wctx.func_map_dvsec = pci_find_dvsec_capability(pdev, + PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_FUNCTION_MAP); + + /* Collect CXL.cachemem siblings under pci_bus_sem */ + pci_walk_bus(pdev->bus, cxl_reset_collect_sibling, &wctx); + + /* Lock and save/disable siblings outside pci_bus_sem */ + for (i = 0; i < ctx->pci_func_count; i++) { + pci_dev_lock(ctx->pci_functions[i]); + pci_dev_save_and_disable(ctx->pci_functions[i]); + } +} + +static void cxl_pci_functions_reset_done(struct cxl_reset_context *ctx) +{ + int i; + + for (i = 0; i < ctx->pci_func_count; i++) { + pci_dev_restore(ctx->pci_functions[i]); + pci_dev_unlock(ctx->pci_functions[i]); + pci_dev_put(ctx->pci_functions[i]); + } + kfree(ctx->pci_functions); + ctx->pci_functions = NULL; + ctx->pci_func_count = 0; +} + +/* + * CXL device reset execution + */ +static int cxl_dev_reset(struct pci_dev *pdev, int dvsec) +{ + static const u32 reset_timeout_ms[] = { 10, 100, 1000, 10000, 100000 }; + u16 cap, ctrl2, status2; + u32 timeout_ms; + int rc, idx; + + if (!pci_wait_for_pending_transaction(pdev)) + pci_err(pdev, "timed out waiting for pending transactions\n"); + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + /* + * Disable caching and initiate cache writeback+invalidation if the + * device supports it. Poll for completion. + * Per CXL r3.2 section 9.6, software may use the cache size from + * DVSEC CXL Capability2 to compute a suitable timeout; we use a + * default of 10ms. + */ + if (cap & PCI_DVSEC_CXL_CACHE_WBI_CAPABLE) { + u32 wbi_poll_us = 100; + s32 wbi_remaining_us = 10000; + + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_INIT_CACHE_WBI; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + + do { + usleep_range(wbi_poll_us, wbi_poll_us + 1); + wbi_remaining_us -= wbi_poll_us; + rc = pci_read_config_word(pdev, + dvsec + PCI_DVSEC_CXL_STATUS2, + &status2); + if (rc) + return rc; + } while (!(status2 & PCI_DVSEC_CXL_CACHE_INV) && + wbi_remaining_us > 0); + + if (!(status2 & PCI_DVSEC_CXL_CACHE_INV)) { + pci_err(pdev, "CXL cache WB+I timed out\n"); + return -ETIMEDOUT; + } + } else if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE) { + ctrl2 |= PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + } + + if (cap & PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + &ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_RST_MEM_CLR_EN; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + ctrl2); + if (rc) + return rc; + } + + idx = FIELD_GET(PCI_DVSEC_CXL_RST_TIMEOUT, cap); + if (idx >= ARRAY_SIZE(reset_timeout_ms)) + idx = ARRAY_SIZE(reset_timeout_ms) - 1; + timeout_ms = reset_timeout_ms[idx]; + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + ctrl2 |= PCI_DVSEC_CXL_INIT_CXL_RST; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2); + if (rc) + return rc; + + msleep(timeout_ms); + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_STATUS2, + &status2); + if (rc) + return rc; + + if (status2 & PCI_DVSEC_CXL_RST_ERR) { + pci_err(pdev, "CXL reset error\n"); + return -EIO; + } + + if (!(status2 & PCI_DVSEC_CXL_RST_DONE)) { + pci_err(pdev, "CXL reset timeout\n"); + return -ETIMEDOUT; + } + + rc = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, &ctrl2); + if (rc) + return rc; + + ctrl2 &= ~PCI_DVSEC_CXL_DISABLE_CACHING; + rc = pci_write_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, ctrl2); + if (rc) + return rc; + + return 0; +} + +static int match_memdev_by_parent(struct device *dev, const void *parent) +{ + return is_cxl_memdev(dev) && dev->parent == parent; +} + +static int cxl_do_reset(struct pci_dev *pdev) +{ + struct cxl_reset_context ctx = { .target = pdev }; + struct cxl_memdev *cxlmd = NULL; + struct device *memdev = NULL; + int dvsec, rc; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return -ENODEV; + + memdev = bus_find_device(&cxl_bus_type, NULL, &pdev->dev, + match_memdev_by_parent); + if (memdev) { + cxlmd = to_cxl_memdev(memdev); + guard(device)(&cxlmd->dev); + } + + mutex_lock(&cxl_reset_mutex); + pci_dev_lock(pdev); + + if (cxlmd) { + rc = cxl_reset_prepare_memdev(cxlmd); + if (rc) + goto out_unlock; + + cxl_reset_flush_cpu_caches(cxlmd); + } + + pci_dev_save_and_disable(pdev); + cxl_pci_functions_reset_prepare(&ctx); + + rc = cxl_dev_reset(pdev, dvsec); + + cxl_pci_functions_reset_done(&ctx); + + pci_dev_restore(pdev); + +out_unlock: + pci_dev_unlock(pdev); + mutex_unlock(&cxl_reset_mutex); + + if (memdev) + put_device(memdev); + + return rc; +} + +/* + * CXL reset sysfs attribute management. + * + * The cxl_reset attribute is added to PCI devices that advertise CXL Reset + * capability. Managed entirely by the CXL module via subsys_interface on + * pci_bus_type, avoiding cross-module symbol dependencies between the PCI + * core (built-in) and CXL (potentially modular). + * + * subsys_interface handles existing devices at register time and hot-plug + * add/remove automatically. On unregister, remove_dev runs for all tracked + * devices under bus core serialization. + */ + +static bool pci_cxl_reset_capable(struct pci_dev *pdev) +{ + int dvsec; + u16 cap; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return false; + + if (pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CAP, &cap)) + return false; + + if (!(cap & PCI_DVSEC_CXL_CACHE_CAPABLE) || + !(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) + return false; + + return !!(cap & PCI_DVSEC_CXL_RST_CAPABLE); +} + +static ssize_t cxl_reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int rc; + + if (!sysfs_streq(buf, "1")) + return -EINVAL; + + rc = cxl_do_reset(pdev); + return rc ? rc : count; +} +static DEVICE_ATTR_WO(cxl_reset); + +static umode_t cxl_reset_attr_is_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj)); + + if (!pci_cxl_reset_capable(pdev)) + return 0; + + return a->mode; +} + +static struct attribute *cxl_reset_attrs[] = { + &dev_attr_cxl_reset.attr, + NULL, +}; + +static const struct attribute_group cxl_reset_attr_group = { + .attrs = cxl_reset_attrs, + .is_visible = cxl_reset_attr_is_visible, +}; + +static int cxl_reset_add_dev(struct device *dev, + struct subsys_interface *sif) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pci_cxl_reset_capable(pdev)) + return 0; + + return sysfs_create_group(&dev->kobj, &cxl_reset_attr_group); +} + +static void cxl_reset_remove_dev(struct device *dev, + struct subsys_interface *sif) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pci_cxl_reset_capable(pdev)) + return; + + sysfs_remove_group(&dev->kobj, &cxl_reset_attr_group); +} + +static struct subsys_interface cxl_reset_interface = { + .name = "cxl_reset", + .subsys = &pci_bus_type, + .add_dev = cxl_reset_add_dev, + .remove_dev = cxl_reset_remove_dev, +}; + +void cxl_reset_sysfs_init(void) +{ + int rc; + + rc = subsys_interface_register(&cxl_reset_interface); + if (rc) + pr_warn("CXL: failed to register cxl_reset interface (%d)\n", + rc); +} + +void cxl_reset_sysfs_exit(void) +{ + subsys_interface_unregister(&cxl_reset_interface); +} diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8853415c106a9..e7b1e6fa0ea09 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -237,12 +237,13 @@ static void cxlmd_release_nvdimm(void *_cxlmd) /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm - * @parent_port: parent port for the (to be added) @cxlmd endpoint port - * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations + * @host: host device for devm operations + * @port: any port in the CXL topology to find the nvdimm-bridge device + * @cxlmd: parent of the to be created cxl_nvdimm device * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, struct cxl_memdev *cxlmd) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -250,7 +251,7 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct device *dev; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(parent_port); + cxl_nvb = cxl_find_nvdimm_bridge(port); if (!cxl_nvb) return -ENODEV; @@ -270,10 +271,10 @@ int devm_cxl_add_nvdimm(struct cxl_port *parent_port, if (rc) goto err; - dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev)); + dev_dbg(host, "register %s\n", dev_name(dev)); /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */ - return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd); + return devm_add_action_or_reset(host, cxlmd_release_nvdimm, cxlmd); err: put_device(dev); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 29197376b18e3..929caeec5c954 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,15 @@ static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +/* + * The terminal device in PCI is NULL and @platform_bus + * for platform devices (for cxl_test) + */ +static bool is_cxl_host_bridge(struct device *dev) +{ + return (!dev || dev == &platform_bus); +} + int cxl_num_decoders_committed(struct cxl_port *port) { lockdep_assert_held(&cxl_rwsem.region); @@ -740,6 +750,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, xa_init(&port->dports); xa_init(&port->endpoints); xa_init(&port->regions); + port->component_reg_phys = CXL_RESOURCE_NONE; device_initialize(dev); lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); @@ -812,16 +823,18 @@ DEFINE_DEBUGFS_ATTRIBUTE(cxl_einj_inject_fops, NULL, cxl_einj_inject, static void cxl_debugfs_create_dport_dir(struct cxl_dport *dport) { + struct cxl_port *parent = parent_port_of(dport->port); struct dentry *dir; if (!einj_cxl_is_initialized()) return; /* - * dport_dev needs to be a PCIe port for CXL 2.0+ ports because - * EINJ expects a dport SBDF to be specified for 2.0 error injection. + * Protocol error injection is only available for CXL 2.0+ root ports + * and CXL 1.1 downstream ports */ - if (!dport->rch && !dev_is_pci(dport->dport_dev)) + if (!dport->rch && + !(dev_is_pci(dport->dport_dev) && parent && is_cxl_root(parent))) return; dir = cxl_debugfs_create_dir(dev_name(dport->dport_dev)); @@ -858,9 +871,7 @@ static int cxl_port_add(struct cxl_port *port, if (rc) return rc; - rc = cxl_port_setup_regs(port, component_reg_phys); - if (rc) - return rc; + port->component_reg_phys = component_reg_phys; } else { rc = dev_set_name(dev, "root%d", port->id); if (rc) @@ -1173,6 +1184,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1348,21 +1373,6 @@ static struct cxl_port *find_cxl_port(struct device *dport_dev, return port; } -static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, - struct device *dport_dev, - struct cxl_dport **dport) -{ - struct cxl_find_port_ctx ctx = { - .dport_dev = dport_dev, - .parent_port = parent_port, - .dport = dport, - }; - struct cxl_port *port; - - port = __find_cxl_port(&ctx); - return port; -} - /* * All users of grandparent() are using it to walk PCIe-like switch port * hierarchy. A PCIe switch is comprised of a bridge device representing the @@ -1423,7 +1433,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); * through ->remove(). This "bottom-up" removal selectively removes individual * child ports manually. This depends on devm_cxl_add_port() to not change is * devm action registration order, and for dports to have already been - * destroyed by reap_dports(). + * destroyed by del_dports(). */ static void delete_switch_port(struct cxl_port *port) { @@ -1432,18 +1442,24 @@ static void delete_switch_port(struct cxl_port *port) devm_release_action(port->dev.parent, unregister_port, port); } -static void reap_dports(struct cxl_port *port) +static void del_dport(struct cxl_dport *dport) +{ + struct cxl_port *port = dport->port; + + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); +} + +static void del_dports(struct cxl_port *port) { struct cxl_dport *dport; unsigned long index; device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) { - devm_release_action(&port->dev, cxl_dport_unlink, dport); - devm_release_action(&port->dev, cxl_dport_remove, dport); - devm_kfree(&port->dev, dport); - } + xa_for_each(&port->dports, index, dport) + del_dport(dport); } struct detach_ctx { @@ -1501,7 +1517,7 @@ static void cxl_detach_ep(void *data) */ died = true; port->dead = true; - reap_dports(port); + del_dports(port); } device_unlock(&port->dev); @@ -1532,16 +1548,157 @@ static resource_size_t find_component_registers(struct device *dev) return map.resource; } +static int match_port_by_uport(struct device *dev, const void *data) +{ + const struct device *uport_dev = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + return uport_dev == port->uport_dev; +} + +/* + * Function takes a device reference on the port device. Caller should do a + * put_device() when done. + */ +static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) +{ + struct device *dev; + + dev = bus_find_device(&cxl_bus_type, NULL, uport_dev, match_port_by_uport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static int update_decoder_targets(struct device *dev, void *data) +{ + struct cxl_dport *dport = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + int i; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + guard(rwsem_write)(&cxl_rwsem.region); + + for (i = 0; i < cxld->interleave_ways; i++) { + if (cxld->target_map[i] == dport->port_id) { + cxlsd->target[i] = dport; + dev_dbg(dev, "dport%d found in target list, index %d\n", + dport->port_id, i); + return 1; + } + } + + return 0; +} + +DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T)) +static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + int rc; + + device_lock_assert(&port->dev); + if (!port->dev.driver) + return ERR_PTR(-ENXIO); + + dport = cxl_find_dport_by_dev(port, dport_dev); + if (dport) { + dev_dbg(&port->dev, "dport%d:%s already exists\n", + dport->port_id, dev_name(dport_dev)); + return ERR_PTR(-EBUSY); + } + + struct cxl_dport *new_dport __free(del_cxl_dport) = + devm_cxl_add_dport_by_dev(port, dport_dev); + if (IS_ERR(new_dport)) + return new_dport; + + cxl_switch_parse_cdat(new_dport); + + if (ida_is_empty(&port->decoder_ida)) { + rc = devm_cxl_switch_port_decoders_setup(port); + if (rc) + return ERR_PTR(rc); + dev_dbg(&port->dev, "first dport%d:%s added with decoders\n", + new_dport->port_id, dev_name(dport_dev)); + return no_free_ptr(new_dport); + } + + /* New dport added, update the decoder targets */ + device_for_each_child(&port->dev, new_dport, update_decoder_targets); + + dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id, + dev_name(dport_dev)); + + return no_free_ptr(new_dport); +} + +static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, + struct cxl_port *parent_port, + struct cxl_dport *parent_dport, + struct device *uport_dev, + struct device *dport_dev) +{ + resource_size_t component_reg_phys; + + device_lock_assert(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(ep_dev, + "port %s:%s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev), + dev_name(dport_dev)); + } + + struct cxl_port *port __free(put_cxl_port) = + find_cxl_port_by_uport(uport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_dport); + if (IS_ERR(port)) + return ERR_CAST(port); + + /* + * retry to make sure a port is found. a port device + * reference is taken. + */ + port = find_cxl_port_by_uport(uport_dev); + if (!port) + return ERR_PTR(-ENODEV); + + dev_dbg(ep_dev, "created port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport_dev)); + } else { + /* + * Port was created before right before this function is + * called. Signal the caller to deal with it. + */ + return ERR_PTR(-EAGAIN); + } + + guard(device)(&port->dev); + return cxl_port_add_dport(port, dport_dev); +} + static int add_port_attach_ep(struct cxl_memdev *cxlmd, struct device *uport_dev, struct device *dport_dev) { struct device *dparent = grandparent(dport_dev); struct cxl_dport *dport, *parent_dport; - resource_size_t component_reg_phys; int rc; - if (!dparent) { + if (is_cxl_host_bridge(dparent)) { /* * The iteration reached the topology root without finding the * CXL-root 'cxl_port' on a previous iteration, fail for now to @@ -1553,42 +1710,31 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, } struct cxl_port *parent_port __free(put_cxl_port) = - find_cxl_port(dparent, &parent_dport); + find_cxl_port_by_uport(dparent->parent); if (!parent_port) { /* iterate to create this parent_port */ return -EAGAIN; } - /* - * Definition with __free() here to keep the sequence of - * dereferencing the device of the port before the parent_port releasing. - */ - struct cxl_port *port __free(put_cxl_port) = NULL; scoped_guard(device, &parent_port->dev) { - if (!parent_port->dev.driver) { - dev_warn(&cxlmd->dev, - "port %s:%s disabled, failed to enumerate CXL.mem\n", - dev_name(&parent_port->dev), dev_name(uport_dev)); - return -ENXIO; + parent_dport = cxl_find_dport_by_dev(parent_port, dparent); + if (!parent_dport) { + parent_dport = cxl_port_add_dport(parent_port, dparent); + if (IS_ERR(parent_dport)) + return PTR_ERR(parent_dport); } - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) { - component_reg_phys = find_component_registers(uport_dev); - port = devm_cxl_add_port(&parent_port->dev, uport_dev, - component_reg_phys, parent_dport); - if (IS_ERR(port)) - return PTR_ERR(port); - - /* retry find to pick up the new dport information */ - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) - return -ENXIO; + dport = devm_cxl_create_port(&cxlmd->dev, parent_port, + parent_dport, uport_dev, + dport_dev); + if (IS_ERR(dport)) { + /* Port already exists, restart iteration */ + if (PTR_ERR(dport) == -EAGAIN) + return 0; + return PTR_ERR(dport); } } - dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", - dev_name(&port->dev), dev_name(port->uport_dev)); rc = cxl_add_ep(dport, &cxlmd->dev); if (rc == -EBUSY) { /* @@ -1601,6 +1747,25 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return rc; } +static struct cxl_dport *find_or_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + dport = cxl_find_dport_by_dev(port, dport_dev); + if (!dport) { + dport = cxl_port_add_dport(port, dport_dev); + if (IS_ERR(dport)) + return dport; + + /* New dport added, restart iteration */ + return ERR_PTR(-EAGAIN); + } + + return dport; +} + int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) { struct device *dev = &cxlmd->dev; @@ -1629,11 +1794,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct device *uport_dev; struct cxl_dport *dport; - /* - * The terminal "grandparent" in PCI is NULL and @platform_bus - * for platform devices - */ - if (!dport_dev || dport_dev == &platform_bus) + if (is_cxl_host_bridge(dport_dev)) return 0; uport_dev = dport_dev->parent; @@ -1647,12 +1808,26 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) dev_name(iter), dev_name(dport_dev), dev_name(uport_dev)); struct cxl_port *port __free(put_cxl_port) = - find_cxl_port(dport_dev, &dport); + find_cxl_port_by_uport(uport_dev); if (port) { dev_dbg(&cxlmd->dev, "found already registered port %s:%s\n", dev_name(&port->dev), dev_name(port->uport_dev)); + + /* + * RP port enumerated by cxl_acpi without dport will + * have the dport added here. + */ + scoped_guard(device, &port->dev) { + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); + } + } + rc = cxl_add_ep(dport, &cxlmd->dev); /* @@ -1704,24 +1879,24 @@ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd, EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, "CXL"); static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, - struct cxl_port *port, int *target_map) + struct cxl_port *port) { + struct cxl_decoder *cxld = &cxlsd->cxld; int i; - if (!target_map) - return 0; - device_lock_assert(&port->dev); if (xa_empty(&port->dports)) - return -EINVAL; + return 0; guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); + struct cxl_dport *dport = find_dport(port, cxld->target_map[i]); - if (!dport) - return -ENXIO; + if (!dport) { + /* dport may be activated later */ + continue; + } cxlsd->target[i] = dport; } @@ -1910,9 +2085,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); /** * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * Certain types of decoders may not have any targets. The main example of this * is an endpoint device. A more awkward example is a hostbridge whose root @@ -1926,7 +2098,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); * Return: Negative error code if the decoder wasn't properly configured; else * returns 0. */ -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add_locked(struct cxl_decoder *cxld) { struct cxl_port *port; struct device *dev; @@ -1947,7 +2119,7 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) if (!is_endpoint_decoder(dev)) { struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev); - rc = decoder_populate_targets(cxlsd, port, target_map); + rc = decoder_populate_targets(cxlsd, port); if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { dev_err(&port->dev, "Failed to populate active decoder targets\n"); @@ -1966,9 +2138,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); /** * cxl_decoder_add - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * This is the unlocked variant of cxl_decoder_add_locked(). * See cxl_decoder_add_locked(). @@ -1976,7 +2145,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); * Context: Process context. Takes and releases the device lock of the port that * owns the @cxld. */ -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add(struct cxl_decoder *cxld) { struct cxl_port *port; @@ -1989,7 +2158,7 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); guard(device)(&port->dev); - return cxl_decoder_add_locked(cxld, target_map); + return cxl_decoder_add_locked(cxld); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); @@ -2336,6 +2505,8 @@ static __init int cxl_core_init(void) if (rc) goto err_ras; + cxl_reset_sysfs_init(); + return 0; err_ras: @@ -2351,6 +2522,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { + cxl_reset_sysfs_exit(); cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 2731ba3a07993..72908f3ced775 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "trace.h" static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, @@ -124,3 +125,178 @@ void cxl_ras_exit(void) cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); cancel_work_sync(&cxl_cper_prot_err_work); } + +static void cxl_dport_map_ras(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); +} + +/** + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + * @host: host device for devm operations + */ +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) +{ + dport->reg_map.host = host; + cxl_dport_map_ras(dport); + + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); + + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); + cxl_disable_rch_root_ints(dport); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); + +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) +{ + void __iomem *addr; + u32 status; + + if (!ras_base) + return; + + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); + } +} + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(void __iomem *ras_base, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +{ + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!ras_base) + return false; + + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(ras_base, hl); + trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + } + + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c new file mode 100644 index 0000000000000..0a8b3b9b63884 --- /dev/null +++ b/drivers/cxl/core/ras_rch.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include "cxl.h" +#include "core.h" +#include "cxlmem.h" + +void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + resource_size_t aer_phys; + struct device *host; + u16 aer_cap; + + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { + host = dport->reg_map.host; + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = + devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } +} + +void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + int severity; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + if (severity == AER_CORRECTABLE) + cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); + else + cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); +} diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index adebbb1db5078..b66d663deb8fd 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2,6 +2,7 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include #include +#include #include #include #include @@ -10,6 +11,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -30,6 +32,12 @@ * 3. Decoder targets */ +/* + * nodemask that sets per node when the access_coordinates for the node has + * been updated by the CXL memory hotplug notifier. + */ +static nodemask_t nodemask_region_seen = NODE_MASK_NONE; + static struct cxl_region *to_cxl_region(struct device *dev); #define __ACCESS_ATTR_RO(_level, _name) { \ @@ -228,7 +236,10 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) return -ENXIO; } - cpu_cache_invalidate_memregion(IORES_DESC_CXL); + if (!cxlr->params.res) + return -ENXIO; + cpu_cache_invalidate_memregion(cxlr->params.res->start, + resource_size(cxlr->params.res)); return 0; } @@ -237,6 +248,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count) struct cxl_region_params *p = &cxlr->params; int i; + if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags)) + return; + /* * Before region teardown attempt to flush, evict any data cached for * this region, or scream loudly about missing arch / platform support @@ -411,6 +425,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, return len; } + if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags)) + return -EPERM; + rc = queue_reset(cxlr); if (rc) return rc; @@ -453,21 +470,6 @@ static ssize_t commit_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(commit); -static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, - int n) -{ - struct device *dev = kobj_to_dev(kobj); - struct cxl_region *cxlr = to_cxl_region(dev); - - /* - * Support tooling that expects to find a 'uuid' attribute for all - * regions regardless of mode. - */ - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) - return 0444; - return a->mode; -} - static ssize_t interleave_ways_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -483,22 +485,14 @@ static ssize_t interleave_ways_show(struct device *dev, static const struct attribute_group *get_cxl_region_target_group(void); -static ssize_t interleave_ways_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_ways(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - unsigned int val, save; - int rc; + int save, rc; u8 iw; - rc = kstrtouint(buf, 0, &val); - if (rc) - return rc; - rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -513,9 +507,7 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; @@ -523,10 +515,31 @@ static ssize_t interleave_ways_store(struct device *dev, save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) { + if (rc) p->interleave_ways = save; + + return rc; +} + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_ways(cxlr, val); + if (rc) return rc; - } return len; } @@ -546,21 +559,14 @@ static ssize_t interleave_granularity_show(struct device *dev, return sysfs_emit(buf, "%d\n", p->interleave_granularity); } -static ssize_t interleave_granularity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_granularity(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - int rc, val; + int rc; u16 ig; - rc = kstrtoint(buf, 0, &val); - if (rc) - return rc; - rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -576,14 +582,32 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; - + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; p->interleave_granularity = val; + return 0; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_granularity(cxlr, val); + if (rc) + return rc; return len; } @@ -703,6 +727,170 @@ static int free_hpa(struct cxl_region *cxlr) return 0; } +struct cxlrd_max_context { + struct device * const *host_bridges; + int interleave_ways; + unsigned long flags; + resource_size_t max_hpa; + struct cxl_root_decoder *cxlrd; +}; + +static int find_max_hpa(struct device *dev, void *data) +{ + struct cxlrd_max_context *ctx = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + struct resource *res, *prev; + struct cxl_decoder *cxld; + resource_size_t free = 0; + resource_size_t max; + int found = 0; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + cxld = &cxlsd->cxld; + + if ((cxld->flags & ctx->flags) != ctx->flags) { + dev_dbg(dev, "flags not matching: %08lx vs %08lx\n", + cxld->flags, ctx->flags); + return 0; + } + + for (int i = 0; i < ctx->interleave_ways; i++) { + for (int j = 0; j < ctx->interleave_ways; j++) { + if (ctx->host_bridges[i] == cxlsd->target[j]->dport_dev) { + found++; + break; + } + } + } + + if (found != ctx->interleave_ways) { + dev_dbg(dev, + "Not enough host bridges. Found %d for %d interleave ways requested\n", + found, ctx->interleave_ways); + return 0; + } + + /* + * Walk the root decoder resource range relying on cxl_rwsem.region to + * preclude sibling arrival/departure and find the largest free space + * gap. + */ + lockdep_assert_held_read(&cxl_rwsem.region); + res = cxlrd->res->child; + + /* With no resource child the whole parent resource is available */ + if (!res) + max = resource_size(cxlrd->res); + else + max = 0; + + for (prev = NULL; res; prev = res, res = res->sibling) { + if (!prev && res->start == cxlrd->res->start && + res->end == cxlrd->res->end) { + max = resource_size(cxlrd->res); + break; + } + /* + * Sanity check for preventing arithmetic problems below as a + * resource with size 0 could imply using the end field below + * when set to unsigned zero - 1 or all f in hex. + */ + if (prev && !resource_size(prev)) + continue; + + if (!prev && res->start > cxlrd->res->start) { + free = res->start - cxlrd->res->start; + max = max(free, max); + } + if (prev && res->start > prev->end + 1) { + free = res->start - prev->end + 1; + max = max(free, max); + } + } + + if (prev && prev->end + 1 < cxlrd->res->end + 1) { + free = cxlrd->res->end + 1 - prev->end + 1; + max = max(free, max); + } + + dev_dbg(cxlrd_dev(cxlrd), "found %pa bytes of free space\n", &max); + if (max > ctx->max_hpa) { + if (ctx->cxlrd) + put_device(cxlrd_dev(ctx->cxlrd)); + get_device(cxlrd_dev(cxlrd)); + ctx->cxlrd = cxlrd; + ctx->max_hpa = max; + } + return 0; +} + +/** + * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints + * @cxlmd: the mem device requiring the HPA + * @interleave_ways: number of entries in @host_bridges + * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and Type2 device + * @max_avail_contig: output parameter of max contiguous bytes available in the + * returned decoder + * + * Returns a pointer to a struct cxl_root_decoder + * + * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given + * in (@max_avail_contig))' is a point in time snapshot. If by the time the + * caller goes to use this decoder and its capacity is reduced then caller needs + * to loop and retry. + * + * The returned root decoder has an elevated reference count that needs to be + * put with cxl_put_root_decoder(cxlrd). + */ +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max_avail_contig) +{ + struct cxlrd_max_context ctx = { + .flags = flags, + .interleave_ways = interleave_ways, + }; + struct cxl_port *root_port; + struct cxl_port *endpoint; + + endpoint = cxlmd->endpoint; + if (!endpoint) { + dev_dbg(&cxlmd->dev, "endpoint not linked to memdev\n"); + return ERR_PTR(-ENXIO); + } + + ctx.host_bridges = &endpoint->host_bridge; + + struct cxl_root *root __free(put_cxl_root) = find_cxl_root(endpoint); + if (!root) { + dev_dbg(&endpoint->dev, "endpoint is not related to a root port\n"); + return ERR_PTR(-ENXIO); + } + + root_port = &root->port; + scoped_guard(rwsem_read, &cxl_rwsem.region) + device_for_each_child(&root_port->dev, &ctx, find_max_hpa); + + if (!ctx.cxlrd) + return ERR_PTR(-ENOMEM); + + *max_avail_contig = ctx.max_hpa; + return ctx.cxlrd; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, "CXL"); + +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd) +{ + put_device(cxlrd_dev(cxlrd)); +} +EXPORT_SYMBOL_NS_GPL(cxl_put_root_decoder, "CXL"); + static ssize_t size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -746,6 +934,21 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(size); +static ssize_t extended_linear_cache_size_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + struct cxl_region_params *p = &cxlr->params; + ssize_t rc; + + ACQUIRE(rwsem_read_intr, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &rwsem))) + return rc; + return sysfs_emit(buf, "%#llx\n", p->cache_size); +} +static DEVICE_ATTR_RO(extended_linear_cache_size); + static struct attribute *cxl_region_attrs[] = { &dev_attr_uuid.attr, &dev_attr_commit.attr, @@ -754,9 +957,34 @@ static struct attribute *cxl_region_attrs[] = { &dev_attr_resource.attr, &dev_attr_size.attr, &dev_attr_mode.attr, + &dev_attr_extended_linear_cache_size.attr, NULL, }; +static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct cxl_region *cxlr = to_cxl_region(dev); + + /* + * Support tooling that expects to find a 'uuid' attribute for all + * regions regardless of mode. + */ + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) + return 0444; + + /* + * Don't display extended linear cache attribute if there is no + * extended linear cache. + */ + if (a == &dev_attr_extended_linear_cache_size.attr && + cxlr->params.cache_size == 0) + return 0; + + return a->mode; +} + static const struct attribute_group cxl_region_group = { .attrs = cxl_region_attrs, .is_visible = cxl_region_visible, @@ -830,16 +1058,16 @@ static int match_free_decoder(struct device *dev, const void *data) return 1; } -static bool region_res_match_cxl_range(const struct cxl_region_params *p, - const struct range *range) +static bool spa_maps_hpa(const struct cxl_region_params *p, + const struct range *range) { if (!p->res) return false; /* - * If an extended linear cache region then the CXL range is assumed - * to be fronted by the DRAM range in current known implementation. - * This assumption will be made until a variant implementation exists. + * The extended linear cache region is constructed by a 1:1 ratio + * where the SPA maps equal amounts of DRAM and CXL HPA capacity with + * CXL decoders at the high end of the SPA range. */ return p->res->start + p->cache_size == range->start && p->res->end == range->end; @@ -857,7 +1085,7 @@ static int match_auto_decoder(struct device *dev, const void *data) cxld = to_cxl_decoder(dev); r = &cxld->hpa_range; - if (region_res_match_cxl_range(p, r)) + if (spa_maps_hpa(p, r)) return 1; return 0; @@ -1051,6 +1279,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr, return 0; } +static void cxl_region_set_lock(struct cxl_region *cxlr, + struct cxl_decoder *cxld) +{ + if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags)) + return; + + set_bit(CXL_REGION_F_LOCK, &cxlr->flags); + clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags); +} + /** * cxl_port_attach_region() - track a region's interest in a port by endpoint * @port: port to add a new region reference 'struct cxl_region_ref' @@ -1162,6 +1400,8 @@ static int cxl_port_attach_region(struct cxl_port *port, } } + cxl_region_set_lock(cxlr, cxld); + rc = cxl_rr_ep_add(cxl_rr, cxled); if (rc) { dev_dbg(&cxlr->dev, @@ -1315,57 +1555,119 @@ static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig) return 0; } +static inline u64 get_selector(u64 ways, u64 gran) +{ + if (!is_power_of_2(ways)) + ways /= 3; + + if (!is_power_of_2(ways) || !is_power_of_2(gran)) + return 0; + + return (ways - 1) * gran; +} + static int cxl_port_setup_targets(struct cxl_port *port, struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); - int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos; struct cxl_port *parent_port = to_cxl_port(port->dev.parent); struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_ep *ep = cxl_ep_load(port, cxlmd); struct cxl_region_params *p = &cxlr->params; struct cxl_decoder *cxld = cxl_rr->decoder; - struct cxl_switch_decoder *cxlsd; + struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(&cxld->dev); struct cxl_port *iter = port; - u16 eig, peig; - u8 eiw, peiw; + int ig, iw = cxl_rr->nr_targets, rc, pos = cxled->pos; + int distance, parent_distance; + u64 selector, cxlr_sel; + u16 eig; + u8 eiw; /* * While root level decoders support x3, x6, x12, switch level * decoders only support powers of 2 up to x16. */ - if (!is_power_of_2(cxl_rr->nr_targets)) { + if (!is_power_of_2(iw)) { dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), - cxl_rr->nr_targets); + dev_name(port->uport_dev), dev_name(&port->dev), iw); return -EINVAL; } - cxlsd = to_cxl_switch_decoder(&cxld->dev); - if (cxl_rr->nr_targets_set) { - int i, distance = 1; - struct cxl_region_ref *cxl_rr_iter; + if (iw > 8 || iw > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s:%s: ways: %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), iw, cxlsd->nr_targets); + return -ENXIO; + } - /* - * The "distance" between peer downstream ports represents which - * endpoint positions in the region interleave a given port can - * host. - * - * For example, at the root of a hierarchy the distance is - * always 1 as every index targets a different host-bridge. At - * each subsequent switch level those ports map every Nth region - * position where N is the width of the switch == distance. - */ - do { - cxl_rr_iter = cxl_rr_load(iter, cxlr); - distance *= cxl_rr_iter->nr_targets; - iter = to_cxl_port(iter->dev.parent); - } while (!is_cxl_root(iter)); - distance *= cxlrd->cxlsd.cxld.interleave_ways; + /* + * Calculate the effective granularity and ways to determine + * HPA bits used as target selectors of the interleave set. + * Use this to check if the root decoder and all subsequent + * HDM decoders only use bits from that range as selectors. + * + * The "distance" between peer downstream ports represents which + * endpoint positions in the region interleave a given port can + * host. + * + * For example, at the root of a hierarchy the distance is + * always 1 as every index targets a different host-bridge. At + * each subsequent switch level those ports map every Nth region + * position where N is the width of the switch == distance. + */ + + /* Start with the root decoders selector and distance. */ + selector = get_selector(cxlrd->cxlsd.cxld.interleave_ways, + cxlrd->cxlsd.cxld.interleave_granularity); + distance = cxlrd->cxlsd.cxld.interleave_ways; + if (!is_power_of_2(distance)) + distance /= 3; + + for (iter = parent_port; !is_cxl_root(iter); + iter = to_cxl_port(iter->dev.parent)) { + struct cxl_region_ref *cxl_rr_iter = cxl_rr_load(iter, cxlr); + struct cxl_decoder *cxld_iter = cxl_rr_iter->decoder; + u64 cxld_sel; + + if (cxld_iter->interleave_ways == 1) + continue; + + cxld_sel = get_selector(cxld_iter->interleave_ways, + cxld_iter->interleave_granularity); + + if (cxld_sel & selector) { + dev_dbg(&cxlr->dev, "%s:%s: overlapping selectors: %#llx:%#llx\n", + dev_name(iter->uport_dev), + dev_name(&iter->dev), cxld_sel, selector); + return -ENXIO; + } + + selector |= cxld_sel; + distance *= cxl_rr_iter->nr_targets; + } + + parent_distance = distance; + distance *= iw; - for (i = 0; i < cxl_rr->nr_targets_set; i++) + /* The combined selector bits must fit the region selector. */ + cxlr_sel = get_selector(p->interleave_ways, + p->interleave_granularity); + + if ((cxlr_sel & selector) != selector) { + dev_dbg(&cxlr->dev, "%s:%s: invalid selectors: %#llx:%#llx\n", + dev_name(iter->uport_dev), + dev_name(&iter->dev), cxlr_sel, selector); + return -ENXIO; + } + + /* Calculate remaining selector bits available for use. */ + selector = cxlr_sel & ~selector; + + if (cxl_rr->nr_targets_set) { + for (int i = 0; i < cxl_rr->nr_targets_set; i++) if (ep->dport == cxlsd->target[i]) { rc = check_last_peer(cxled, ep, cxl_rr, distance); @@ -1376,88 +1678,41 @@ static int cxl_port_setup_targets(struct cxl_port *port, goto add_target; } - if (is_cxl_root(parent_port)) { + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + ig = cxld->interleave_granularity; + else /* + * Set the interleave granularity with each interleave + * level to a multiple of it's parent port interleave + * ways. Beginning with the granularity of the root + * decoder set to the region granularity (starting + * with the inner selector bits of the HPA), the + * granularity is increased with each level. Calculate + * this using the parent distance and region + * granularity. + * * Root decoder IG is always set to value in CFMWS which * may be different than this region's IG. We can use the * region's IG here since interleave_granularity_store() * does not allow interleaved host-bridges with * root IG != region IG. */ - parent_ig = p->interleave_granularity; - parent_iw = cxlrd->cxlsd.cxld.interleave_ways; - /* - * For purposes of address bit routing, use power-of-2 math for - * switch ports. - */ - if (!is_power_of_2(parent_iw)) - parent_iw /= 3; - } else { - struct cxl_region_ref *parent_rr; - struct cxl_decoder *parent_cxld; + ig = p->interleave_granularity * parent_distance; - parent_rr = cxl_rr_load(parent_port, cxlr); - parent_cxld = parent_rr->decoder; - parent_ig = parent_cxld->interleave_granularity; - parent_iw = parent_cxld->interleave_ways; - } - - rc = granularity_to_eig(parent_ig, &peig); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n", - dev_name(parent_port->uport_dev), - dev_name(&parent_port->dev), parent_ig); - return rc; - } - - rc = ways_to_eiw(parent_iw, &peiw); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n", - dev_name(parent_port->uport_dev), - dev_name(&parent_port->dev), parent_iw); - return rc; - } - - iw = cxl_rr->nr_targets; - rc = ways_to_eiw(iw, &eiw); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), iw); - return rc; - } - - /* - * Interleave granularity is a multiple of @parent_port granularity. - * Multiplier is the parent port interleave ways. - */ - rc = granularity_to_eig(parent_ig * parent_iw, &eig); - if (rc) { - dev_dbg(&cxlr->dev, - "%s: invalid granularity calculation (%d * %d)\n", - dev_name(&parent_port->dev), parent_ig, parent_iw); - return rc; - } - - rc = eig_to_granularity(eig, &ig); - if (rc) { - dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), - 256 << eig); - return rc; - } - - if (iw > 8 || iw > cxlsd->nr_targets) { - dev_dbg(&cxlr->dev, - "%s:%s:%s: ways: %d overflows targets: %d\n", - dev_name(port->uport_dev), dev_name(&port->dev), - dev_name(&cxld->dev), iw, cxlsd->nr_targets); - return -ENXIO; + rc = ways_to_eiw(iw, &eiw); + if (!rc) + rc = granularity_to_eig(ig, &eig); + + if (rc || (iw > 1 && ~selector & get_selector(iw, ig))) { + dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d:%d:%#llx\n", + dev_name(port->uport_dev), dev_name(&port->dev), + iw, ig, selector); + return -ENXIO; } if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || - (iw > 1 && cxld->interleave_granularity != ig) || - !region_res_match_cxl_range(p, &cxld->hpa_range) || + !spa_maps_hpa(p, &cxld->hpa_range) || ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) { dev_err(&cxlr->dev, "%s:%s %s expected iw: %d ig: %d %pr\n", @@ -1468,9 +1723,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, dev_name(port->uport_dev), dev_name(&port->dev), __func__, cxld->interleave_ways, cxld->interleave_granularity, - (cxld->flags & CXL_DECODER_F_ENABLE) ? - "enabled" : - "disabled", + str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE), cxld->hpa_range.start, cxld->hpa_range.end); return -ENXIO; } @@ -1510,11 +1763,12 @@ static int cxl_port_setup_targets(struct cxl_port *port, cxl_rr->nr_targets_set); return -ENXIO; } - } else + } else { cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; - inc = 1; + cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id; + } + cxl_rr->nr_targets_set++; out_target_set: - cxl_rr->nr_targets_set += inc; dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n", dev_name(port->uport_dev), dev_name(&port->dev), cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport_dev), @@ -2125,7 +2379,9 @@ __cxl_decoder_detach(struct cxl_region *cxlr, cxled->part = -1; if (p->state > CXL_CONFIG_ACTIVE) { - cxl_region_decode_reset(cxlr, p->interleave_ways); + if (!test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) + cxl_region_decode_reset(cxlr, p->interleave_ways); + p->state = CXL_CONFIG_ACTIVE; } @@ -2385,9 +2641,8 @@ static struct cxl_region *to_cxl_region(struct device *dev) return container_of(dev, struct cxl_region, dev); } -static void unregister_region(void *_cxlr) +void cxl_unregister_region(struct cxl_region *cxlr) { - struct cxl_region *cxlr = _cxlr; struct cxl_region_params *p = &cxlr->params; int i; @@ -2404,6 +2659,14 @@ static void unregister_region(void *_cxlr) cxl_region_iomem_release(cxlr); put_device(&cxlr->dev); } +EXPORT_SYMBOL_NS_GPL(cxl_unregister_region, "CXL"); + +static void __unregister_region(void *_cxlr) +{ + struct cxl_region *cxlr = _cxlr; + + return cxl_unregister_region(cxlr); +} static struct lock_class_key cxl_region_key; @@ -2431,6 +2694,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i dev->bus = &cxl_bus_type; dev->type = &cxl_region_type; cxlr->id = id; + cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld); return cxlr; } @@ -2442,14 +2706,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { if (cxlr->coord[i].read_bandwidth) { - rc = 0; - if (cxl_need_node_perf_attrs_update(nid)) - node_set_perf_attrs(nid, &cxlr->coord[i], i); - else - rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); - - if (rc == 0) - cset++; + node_update_perf_attrs(nid, &cxlr->coord[i], i); + cset++; } } @@ -2487,6 +2745,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid != region_nid) return NOTIFY_DONE; + /* No action needed if node bit already set */ + if (node_test_and_set(nid, nodemask_region_seen)) + return NOTIFY_DONE; + if (!cxl_region_update_coordinates(cxlr, nid)) return NOTIFY_DONE; @@ -2556,7 +2818,7 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, if (rc) goto err; - rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); + rc = devm_add_action_or_reset(port->uport_dev, __unregister_region, cxlr); if (rc) return ERR_PTR(rc); @@ -2569,6 +2831,29 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(rc); } +/** + * cxl_get_region_range - obtain range linked to a CXL region + * + * @region: a pointer to struct cxl_region + * @range: a pointer to a struct range to be set + * + * Returns 0 or error. + */ +int cxl_get_region_range(struct cxl_region *region, struct range *range) +{ + if (WARN_ON_ONCE(!region)) + return -ENODEV; + + if (!region->params.res) + return -ENOSPC; + + range->start = region->params.res->start; + range->end = region->params.res->end; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_region_range, "CXL"); + static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) { return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); @@ -2587,7 +2872,8 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_partition_mode mode, int id) + enum cxl_partition_mode mode, int id, + enum cxl_decoder_type target_type) { int rc; @@ -2609,7 +2895,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); + return devm_cxl_add_region(cxlrd, id, mode, target_type); } static ssize_t create_region_store(struct device *dev, const char *buf, @@ -2623,7 +2909,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, mode, id); + cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); @@ -2675,6 +2961,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name) return to_cxl_region(region_dev); } +static void drop_region(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_port *port = cxlrd_to_port(cxlrd); + + devm_release_action(port->uport_dev, __unregister_region, cxlr); +} + static ssize_t delete_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -2687,7 +2981,7 @@ static ssize_t delete_region_store(struct device *dev, if (IS_ERR(cxlr)) return PTR_ERR(cxlr); - devm_release_action(port->uport_dev, unregister_region, cxlr); + devm_release_action(port->uport_dev, __unregister_region, cxlr); put_device(&cxlr->dev); return len; @@ -2918,28 +3212,119 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) return false; } -u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, - u64 dpa) +#define CXL_POS_ZERO 0 +/** + * cxl_validate_translation_params + * @eiw: encoded interleave ways + * @eig: encoded interleave granularity + * @pos: position in interleave + * + * Callers pass CXL_POS_ZERO when no position parameter needs validating. + * + * Returns: 0 on success, -EINVAL on first invalid parameter + */ +int cxl_validate_translation_params(u8 eiw, u16 eig, int pos) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); - u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa; - struct cxl_region_params *p = &cxlr->params; - struct cxl_endpoint_decoder *cxled = NULL; - u16 eig = 0; - u8 eiw = 0; - int pos; + int ways, gran; - for (int i = 0; i < p->nr_targets; i++) { - cxled = p->targets[i]; - if (cxlmd == cxled_to_memdev(cxled)) - break; + if (eiw_to_ways(eiw, &ways)) { + pr_debug("%s: invalid eiw=%u\n", __func__, eiw); + return -EINVAL; } - if (!cxled || cxlmd != cxled_to_memdev(cxled)) + if (eig_to_granularity(eig, &gran)) { + pr_debug("%s: invalid eig=%u\n", __func__, eig); + return -EINVAL; + } + if (pos < 0 || pos >= ways) { + pr_debug("%s: invalid pos=%d for ways=%u\n", __func__, pos, + ways); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_validate_translation_params, "cxl_translate"); + +u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig) +{ + u64 dpa_offset, bits_lower, bits_upper, temp; + int ret; + + ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO); + if (ret) return ULLONG_MAX; - pos = cxled->pos; - ways_to_eiw(p->interleave_ways, &eiw); - granularity_to_eig(p->interleave_granularity, &eig); + /* + * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 + * Lower bits [IG+7:0] pass through unchanged + * (eiw < 8) + * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) + * Clear the position bits to isolate upper section, then + * reverse the left shift by eiw that occurred during DPA->HPA + * (eiw >= 8) + * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 + * Extract upper bits from the correct bit range and divide by 3 + * to recover the original DPA upper bits + */ + bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); + if (eiw < 8) { + temp = hpa_offset &= ~GENMASK_ULL(eig + eiw + 8 - 1, 0); + dpa_offset = temp >> eiw; + } else { + bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); + dpa_offset = bits_upper << (eig + 8); + } + dpa_offset |= bits_lower; + + return dpa_offset; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_dpa_offset, "cxl_translate"); + +int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig) +{ + unsigned int ways = 0; + u64 shifted, rem; + int pos, ret; + + ret = cxl_validate_translation_params(eiw, eig, CXL_POS_ZERO); + if (ret) + return ret; + + if (!eiw) + /* position is 0 if no interleaving */ + return 0; + + /* + * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 + * eiw < 8 + * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. + * Per spec "remove IW bits starting with bit position IG+8" + * eiw >= 8 + * Position is not explicitly stored in HPA_OFFSET bits. It is + * derived from the modulo operation of the upper bits using + * the total number of interleave ways. + */ + if (eiw < 8) { + pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); + } else { + shifted = hpa_offset >> (eig + 8); + eiw_to_ways(eiw, &ways); + div64_u64_rem(shifted, ways, &rem); + pos = rem; + } + + return pos; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_position, "cxl_translate"); + +u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig) +{ + u64 mask_upper, hpa_offset, bits_upper; + int ret; + + ret = cxl_validate_translation_params(eiw, eig, pos); + if (ret) + return ULLONG_MAX; /* * The device position in the region interleave set was removed @@ -2951,9 +3336,6 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, * 8.2.4.19.13 Implementation Note: Device Decode Logic */ - /* Remove the dpa base */ - dpa_offset = dpa - cxl_dpa_resource_start(cxled); - mask_upper = GENMASK_ULL(51, eig + 8); if (eiw < 8) { @@ -2968,26 +3350,312 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, /* The lower bits remain unchanged */ hpa_offset |= dpa_offset & GENMASK_ULL(eig + 7, 0); + return hpa_offset; +} +EXPORT_SYMBOL_FOR_MODULES(cxl_calculate_hpa_offset, "cxl_translate"); + +static int decode_pos(int region_ways, int hb_ways, int pos, int *pos_port, + int *pos_hb) +{ + int devices_per_hb; + + /* + * Decode for 3-6-12 way interleaves as defined in the CXL + * Spec 4.0 9.13.1.1 Legal Interleaving Configurations. + * Region creation should prevent invalid combinations but + * sanity check here to avoid a silent bad decode. + */ + switch (hb_ways) { + case 3: + if (region_ways != 3 && region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 6: + if (region_ways != 6 && region_ways != 12) + return -EINVAL; + break; + case 12: + if (region_ways != 12) + return -EINVAL; + break; + default: + return -EINVAL; + } + /* + * Each host bridge contributes an equal number of endpoints + * that are laid out contiguously per host bridge. Modulo + * selects the port within a host bridge and division selects + * the host bridge position. + */ + devices_per_hb = region_ways / hb_ways; + *pos_port = pos % devices_per_hb; + *pos_hb = pos / devices_per_hb; + + return 0; +} + +/* + * restore_parent() reconstruct the address in parent + * + * This math, specifically the bitmask creation 'mask = gran - 1' relies + * on the CXL Spec requirement that interleave granularity is always a + * power of two. + * + * [mask] isolate the offset with the granularity + * [addr & ~mask] remove the offset leaving the aligned portion + * [* ways] distribute across all interleave ways + * [+ (pos * gran)] add the positional offset + * [+ (addr & mask)] restore the masked offset + */ +static u64 restore_parent(u64 addr, u64 pos, u64 gran, u64 ways) +{ + u64 mask = gran - 1; + + return ((addr & ~mask) * ways) + (pos * gran) + (addr & mask); +} + +/* + * unaligned_dpa_to_hpa() translates a DPA to HPA when the region resource + * start address is not aligned at Host Bridge Interleave Ways * 256MB. + * + * Unaligned start addresses only occur with MOD3 interleaves. All power- + * of-two interleaves are guaranteed aligned. + */ +static u64 unaligned_dpa_to_hpa(struct cxl_decoder *cxld, + struct cxl_region_params *p, int pos, u64 dpa) +{ + int ways_port = p->interleave_ways / cxld->interleave_ways; + int gran_port = p->interleave_granularity; + int gran_hb = cxld->interleave_granularity; + int ways_hb = cxld->interleave_ways; + int pos_port, pos_hb, gran_shift; + u64 hpa_port = 0; + + /* Decode an endpoint 'pos' into port and host-bridge components */ + if (decode_pos(p->interleave_ways, ways_hb, pos, &pos_port, &pos_hb)) { + dev_dbg(&cxld->dev, "not supported for region ways:%d\n", + p->interleave_ways); + return ULLONG_MAX; + } + + /* Restore the port parent address if needed */ + if (gran_hb != gran_port) + hpa_port = restore_parent(dpa, pos_port, gran_port, ways_port); + else + hpa_port = dpa; + + /* + * Complete the HPA reconstruction by restoring the address as if + * each HB position is a candidate. Test against expected pos_hb + * to confirm match. + */ + gran_shift = ilog2(gran_hb); + for (int position = 0; position < ways_hb; position++) { + u64 shifted, hpa; + + hpa = restore_parent(hpa_port, position, gran_hb, ways_hb); + hpa += p->res->start; + + shifted = hpa >> gran_shift; + if (do_div(shifted, ways_hb) == pos_hb) + return hpa; + } + + dev_dbg(&cxld->dev, "fail dpa:%#llx region:%pr pos:%d\n", dpa, p->res, + pos); + dev_dbg(&cxld->dev, " port-w/g/p:%d/%d/%d hb-w/g/p:%d/%d/%d\n", + ways_port, gran_port, pos_port, ways_hb, gran_hb, pos_hb); + + return ULLONG_MAX; +} + +static bool region_is_unaligned_mod3(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + int hbiw = cxld->interleave_ways; + u64 rem; + + if (is_power_of_2(hbiw)) + return false; + + div64_u64_rem(p->res->start, (u64)hbiw * SZ_256M, &rem); + + return (rem != 0); +} + +u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, + u64 dpa) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = NULL; + u64 dpa_offset, hpa_offset, hpa; + bool unaligned = false; + u16 eig = 0; + u8 eiw = 0; + int pos; + + for (int i = 0; i < p->nr_targets; i++) { + if (cxlmd == cxled_to_memdev(p->targets[i])) { + cxled = p->targets[i]; + break; + } + } + if (!cxled) + return ULLONG_MAX; + + dpa_offset = dpa - cxl_dpa_resource_start(cxled); + + /* Unaligned calc for MOD3 interleaves not hbiw * 256MB aligned */ + unaligned = region_is_unaligned_mod3(cxlr); + if (unaligned) { + hpa = unaligned_dpa_to_hpa(cxld, p, cxled->pos, dpa_offset); + if (hpa == ULLONG_MAX) + return ULLONG_MAX; + + goto skip_aligned; + } + /* + * Aligned calc for all power-of-2 interleaves and for MOD3 + * interleaves that are aligned at hbiw * 256MB + */ + pos = cxled->pos; + ways_to_eiw(p->interleave_ways, &eiw); + granularity_to_eig(p->interleave_granularity, &eig); + + hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, eiw, eig); + /* Apply the hpa_offset to the region base address */ - hpa = hpa_offset + p->res->start + p->cache_size; + hpa = hpa_offset + p->res->start; + +skip_aligned: + hpa += p->cache_size; /* Root decoder translation overrides typical modulo decode */ - if (cxlrd->hpa_to_spa) - hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + if (cxlrd->ops.hpa_to_spa) + hpa = cxlrd->ops.hpa_to_spa(cxlrd, hpa); if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, "Addr trans fail: hpa 0x%llx not in region\n", hpa); return ULLONG_MAX; } - - /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + /* Chunk check applies to aligned modulo decodes only */ + if (!unaligned && !cxlrd->ops.hpa_to_spa && + !cxl_is_hpa_in_chunk(hpa, cxlr, pos)) return ULLONG_MAX; return hpa; } +struct dpa_result { + struct cxl_memdev *cxlmd; + u64 dpa; +}; + +static int unaligned_region_offset_to_dpa_result(struct cxl_region *cxlr, + u64 offset, + struct dpa_result *result) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p = &cxlr->params; + u64 interleave_width, interleave_index; + u64 gran, gran_offset, dpa_offset; + u64 hpa = p->res->start + offset; + u64 tmp = offset; + + /* + * Unaligned addresses are not algebraically invertible. Calculate + * a dpa_offset independent of the target device and then enumerate + * and test that dpa_offset against each candidate endpoint decoder. + */ + gran = cxld->interleave_granularity; + interleave_width = gran * cxld->interleave_ways; + interleave_index = div64_u64(offset, interleave_width); + gran_offset = do_div(tmp, gran); + + dpa_offset = interleave_index * gran + gran_offset; + + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + int pos = cxled->pos; + u64 test_hpa; + + test_hpa = unaligned_dpa_to_hpa(cxld, p, pos, dpa_offset); + if (test_hpa == hpa) { + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = + cxl_dpa_resource_start(cxled) + dpa_offset; + return 0; + } + } + dev_err(&cxlr->dev, + "failed to resolve HPA %#llx in unaligned MOD3 region\n", hpa); + + return -ENXIO; +} + +static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, + struct dpa_result *result) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_endpoint_decoder *cxled; + u64 hpa, hpa_offset, dpa_offset; + u16 eig = 0; + u8 eiw = 0; + int pos; + + lockdep_assert_held(&cxl_rwsem.region); + lockdep_assert_held(&cxl_rwsem.dpa); + + /* Input validation ensures valid ways and gran */ + granularity_to_eig(p->interleave_granularity, &eig); + ways_to_eiw(p->interleave_ways, &eiw); + + /* + * If the root decoder has SPA to CXL HPA callback, use it. Otherwise + * CXL HPA is assumed to equal SPA. + */ + if (cxlrd->ops.spa_to_hpa) { + hpa = cxlrd->ops.spa_to_hpa(cxlrd, p->res->start + offset); + hpa_offset = hpa - p->res->start; + } else { + hpa_offset = offset; + } + + if (region_is_unaligned_mod3(cxlr)) + return unaligned_region_offset_to_dpa_result(cxlr, offset, + result); + + pos = cxl_calculate_position(hpa_offset, eiw, eig); + if (pos < 0 || pos >= p->nr_targets) { + dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", + pos, p->nr_targets); + return -ENXIO; + } + + dpa_offset = cxl_calculate_dpa_offset(hpa_offset, eiw, eig); + + /* Look-up and return the result: a memdev and a DPA */ + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxled->pos != pos) + continue; + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset; + + return 0; + } + dev_err(&cxlr->dev, "No device found for position %d\n", pos); + + return -ENXIO; +} + static struct lock_class_key cxl_pmem_region_key; static int cxl_pmem_region_alloc(struct cxl_region *cxlr) @@ -3287,7 +3955,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - return region_res_match_cxl_range(p, r); + return spa_maps_hpa(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, @@ -3368,6 +4036,10 @@ static int __construct_region(struct cxl_region *cxlr, "Extended linear cache calculation failed rc:%d\n", rc); } + rc = sysfs_update_group(&cxlr->dev.kobj, &cxl_region_group); + if (rc) + return rc; + rc = insert_resource(cxlrd->res, res); if (rc) { /* @@ -3400,14 +4072,12 @@ static int __construct_region(struct cxl_region *cxlr, return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, - struct cxl_endpoint_decoder *cxled) +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc, part = READ_ONCE(cxled->part); + int part = READ_ONCE(cxled->part); struct cxl_region *cxlr; if (part < 0 || part >= cxlds->nr_partitions) { @@ -3420,26 +4090,138 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, - atomic_read(&cxlrd->region_id)); + atomic_read(&cxlrd->region_id), + cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - if (IS_ERR(cxlr)) { + if (IS_ERR(cxlr)) dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, PTR_ERR(cxlr)); + + return cxlr; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_region *cxlr; + int rc; + + cxlr = construct_region_begin(cxlrd, cxled); + if (IS_ERR(cxlr)) return cxlr; - } rc = __construct_region(cxlr, cxlrd, cxled); if (rc) { - devm_release_action(port->uport_dev, unregister_region, cxlr); + devm_release_action(port->uport_dev, __unregister_region, cxlr); return ERR_PTR(rc); } return cxlr; } +DEFINE_FREE(cxl_region_drop, struct cxl_region *, if (_T) drop_region(_T)) + +static struct cxl_region * +__construct_new_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, int ways) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p; + resource_size_t size = 0; + int rc, i; + + struct cxl_region *cxlr __free(cxl_region_drop) = + construct_region_begin(cxlrd, cxled[0]); + if (IS_ERR(cxlr)) + return cxlr; + + guard(rwsem_write)(&cxl_rwsem.region); + + /* + * Sanity check. This should not happen with an accel driver handling + * the region creation. + */ + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s unexpected region state\n", + dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev), + __func__); + return ERR_PTR(-EBUSY); + } + + rc = set_interleave_ways(cxlr, ways); + if (rc) + return ERR_PTR(rc); + + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity); + if (rc) + return ERR_PTR(rc); + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) { + for (i = 0; i < ways; i++) { + if (!cxled[i]->dpa_res) + return ERR_PTR(-EINVAL); + size += resource_size(cxled[i]->dpa_res); + } + + rc = alloc_hpa(cxlr, size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < ways; i++) { + rc = cxl_region_attach(cxlr, cxled[i], 0); + if (rc) + return ERR_PTR(rc); + } + } + + rc = cxl_region_decode_commit(cxlr); + if (rc) + return ERR_PTR(rc); + + p->state = CXL_CONFIG_COMMIT; + + return no_free_ptr(cxlr); +} + +/** + * cxl_create_region - Establish a region given an endpoint decoder + * @cxlrd: root decoder to allocate HPA + * @cxled: endpoint decoders with reserved DPA capacity + * @ways: interleave ways required + * + * Returns a fully formed region in the commit state and attached to the + * cxl_region driver. + */ +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways) +{ + struct cxl_region *cxlr; + + mutex_lock(&cxlrd->range_lock); + cxlr = __construct_new_region(cxlrd, cxled, ways); + mutex_unlock(&cxlrd->range_lock); + if (IS_ERR(cxlr)) + return cxlr; + + if (device_attach(&cxlr->dev) <= 0) { + dev_err(&cxlr->dev, "failed to create region\n"); + drop_region(cxlr); + return ERR_PTR(-ENODEV); + } + + return cxlr; +} +EXPORT_SYMBOL_NS_GPL(cxl_create_region, "CXL"); + static struct cxl_region * cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) { @@ -3547,6 +4329,107 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } +static void remove_debugfs(void *dentry) +{ + debugfs_remove_recursive(dentry); +} + +static int validate_region_offset(struct cxl_region *cxlr, u64 offset) +{ + struct cxl_region_params *p = &cxlr->params; + resource_size_t region_size; + u64 hpa; + + if (offset < p->cache_size) { + dev_err(&cxlr->dev, + "Offset %#llx is within extended linear cache %pa\n", + offset, &p->cache_size); + return -EINVAL; + } + + region_size = resource_size(p->res); + if (offset >= region_size) { + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", + offset, ®ion_size); + return -EINVAL; + } + + hpa = p->res->start + offset; + if (hpa < p->res->start || hpa > p->res->end) { + dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa, + p->res); + return -EINVAL; + } + + return 0; +} + +static int cxl_region_debugfs_poison_inject(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + offset -= cxlr->params.cache_size; + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_inject_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL, + cxl_region_debugfs_poison_inject, "%llx\n"); + +static int cxl_region_debugfs_poison_clear(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + offset -= cxlr->params.cache_size; + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_clear_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, + cxl_region_debugfs_poison_clear, "%llx\n"); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3576,12 +4459,20 @@ static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; + bool poison_supported = true; int rc; rc = cxl_region_can_probe(cxlr); if (rc) return rc; + /* + * HDM-D[B] (device-memory) regions have accelerator specific usage. + * Skip device-dax registration. + */ + if (cxlr->type == CXL_DECODER_DEVMEM) + return 0; + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. @@ -3599,6 +4490,31 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* Create poison attributes if all memdevs support the capabilities */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + + if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || + !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) { + poison_supported = false; + break; + } + } + + if (poison_supported) { + struct dentry *dentry; + + dentry = cxl_debugfs_create_dir(dev_name(dev)); + debugfs_create_file("inject_poison", 0200, dentry, cxlr, + &cxl_poison_inject_fops); + debugfs_create_file("clear_poison", 0200, dentry, cxlr, + &cxl_poison_clear_fops); + rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); + if (rc) + return rc; + } + switch (cxlr->mode) { case CXL_PARTMODE_PMEM: rc = devm_cxl_region_edac_register(cxlr); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 5ca7b0eed568b..20c2d9fbcfe7d 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -271,10 +272,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL"); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); - int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo); + int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo); u64 offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW); if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, @@ -311,15 +312,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty }; regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); + PCI_DVSEC_CXL_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size); - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1; + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; @@ -641,4 +642,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, return CXL_RESOURCE_NONE; return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM); } -EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 847e37be42c47..f84910ba7fa2b 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -12,6 +12,7 @@ #include #include #include +#include extern const struct nvdimm_security_ops *cxl_security_ops; @@ -23,63 +24,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops; * (port-driver, region-driver, nvdimm object-drivers... etc). */ -/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ -#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K - -/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ -#define CXL_CM_OFFSET 0x1000 -#define CXL_CM_CAP_HDR_OFFSET 0x0 -#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) -#define CM_CAP_HDR_CAP_ID 1 -#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) -#define CM_CAP_HDR_CAP_VERSION 1 -#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) -#define CM_CAP_HDR_CACHE_MEM_VERSION 1 -#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) -#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) - -#define CXL_CM_CAP_CAP_ID_RAS 0x2 -#define CXL_CM_CAP_CAP_ID_HDM 0x5 -#define CXL_CM_CAP_CAP_HDM_VERSION 1 - -/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ -#define CXL_HDM_DECODER_CAP_OFFSET 0x0 -#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) -#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) -#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) -#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) -#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 -#define CXL_HDM_DECODER_ENABLE BIT(1) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) -#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) -#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) -#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) -#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) -#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) -#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) -#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) -#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) -#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) -#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) -#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) - -/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ -#define CXL_DECODER_MIN_GRANULARITY 256 -#define CXL_DECODER_MAX_ENCODED_IG 6 - -static inline int cxl_hdm_decoder_count(u32 cap_hdr) -{ - int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); - - return val ? val * 2 : 1; -} - /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ static inline int eig_to_granularity(u16 eig, unsigned int *granularity) { @@ -201,139 +145,24 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* - * Using struct_group() allows for per register-block-type helper routines, - * without requiring block-type agnostic code to include the prefix. - */ -struct cxl_regs { - /* - * Common set of CXL Component register block base pointers - * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure - * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure - */ - struct_group_tagged(cxl_component_regs, component, - void __iomem *hdm_decoder; - void __iomem *ras; - ); - /* - * Common set of CXL Device register block base pointers - * @status: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers - */ - struct_group_tagged(cxl_device_regs, device_regs, - void __iomem *status, *mbox, *memdev; - ); - - struct_group_tagged(cxl_pmu_regs, pmu_regs, - void __iomem *pmu; - ); - - /* - * RCH downstream port specific RAS register - * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB - */ - struct_group_tagged(cxl_rch_regs, rch_regs, - void __iomem *dport_aer; - ); - - /* - * RCD upstream port specific PCIe cap register - * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB - */ - struct_group_tagged(cxl_rcd_regs, rcd_regs, - void __iomem *rcd_pcie_cap; - ); -}; - -struct cxl_reg_map { - bool valid; - int id; - unsigned long offset; - unsigned long size; -}; - -struct cxl_component_reg_map { - struct cxl_reg_map hdm_decoder; - struct cxl_reg_map ras; -}; - -struct cxl_device_reg_map { - struct cxl_reg_map status; - struct cxl_reg_map mbox; - struct cxl_reg_map memdev; -}; - -struct cxl_pmu_reg_map { - struct cxl_reg_map pmu; -}; - -/** - * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @host: device for devm operations and logging - * @base: virtual base of the register-block-BAR + @block_offset - * @resource: physical resource base of the register block - * @max_size: maximum mapping size to perform register search - * @reg_type: see enum cxl_regloc_type - * @component_map: cxl_reg_map for component registers - * @device_map: cxl_reg_maps for device registers - * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units - */ -struct cxl_register_map { - struct device *host; - void __iomem *base; - resource_size_t resource; - resource_size_t max_size; - u8 reg_type; - union { - struct cxl_component_reg_map component_map; - struct cxl_device_reg_map device_map; - struct cxl_pmu_reg_map pmu_map; - }; -}; - void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(const struct cxl_register_map *map, - struct cxl_component_regs *regs, - unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); #define CXL_INSTANCES_COUNT -1 -enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map, unsigned int index); -int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map); -int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; -resource_size_t cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport); int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 -/* - * cxl_decoder flags that define the type of memory / devices this - * decoder supports as well as configuration lock status See "CXL 2.0 - * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. - * Additionally indicate whether decoder settings were autodetected, - * user customized. - */ -#define CXL_DECODER_F_RAM BIT(0) -#define CXL_DECODER_F_PMEM BIT(1) -#define CXL_DECODER_F_TYPE2 BIT(2) -#define CXL_DECODER_F_TYPE3 BIT(3) -#define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_ENABLE BIT(5) -#define CXL_DECODER_F_MASK GENMASK(5, 0) - enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, CXL_DECODER_HOSTONLYMEM = 3, @@ -357,6 +186,9 @@ enum cxl_decoder_type { * @target_type: accelerator vs expander (type2 vs type3) selector * @region: currently assigned region for this decoder * @flags: memory type capabilities and locking + * @target_map: cached copy of hardware port-id list, available at init + * before all @dport objects have been instantiated. While + * dport id is 8bit, CFMWS interleave targets are 32bits. * @commit: device/decoder-type specific callback to commit settings to hw * @reset: device/decoder-type specific callback to reset hw settings */ @@ -369,6 +201,7 @@ struct cxl_decoder { enum cxl_decoder_type target_type; struct cxl_region *region; unsigned long flags; + u32 target_map[CXL_DECODER_MAX_INTERLEAVE]; int (*commit)(struct cxl_decoder *cxld); void (*reset)(struct cxl_decoder *cxld); }; @@ -419,27 +252,35 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); +/** + * struct cxl_rd_ops - CXL root decoder callback operations + * @hpa_to_spa: Convert host physical address to system physical address + * @spa_to_hpa: Convert system physical address to host physical address + */ +struct cxl_rd_ops { + u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa); + u64 (*spa_to_hpa)(struct cxl_root_decoder *cxlrd, u64 spa); +}; /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event - * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie + * @ops: CXL root decoder operations * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; resource_size_t cache_size; atomic_t region_id; - cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; + struct cxl_rd_ops ops; struct cxl_switch_decoder cxlsd; }; @@ -485,11 +326,6 @@ struct cxl_region_params { resource_size_t cache_size; }; -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic @@ -505,6 +341,14 @@ enum cxl_partition_mode { */ #define CXL_REGION_F_NEEDS_RESET 1 +/* + * Indicate whether this region is locked due to 1 or more decoders that have + * been locked. The approach of all or nothing is taken with regard to the + * locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is + * set. + */ +#define CXL_REGION_F_LOCK 2 + /** * struct cxl_region - CXL region * @dev: This region's device @@ -595,6 +439,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @component_reg_phys: Physical address of component register */ struct cxl_port { struct device dev; @@ -618,6 +463,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + resource_size_t component_reg_phys; }; /** @@ -724,6 +570,25 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport_dev == port->dev.parent; } +/* Address translation functions exported to cxl_translate test module only */ +int cxl_validate_translation_params(u8 eiw, u16 eig, int pos); +u64 cxl_calculate_hpa_offset(u64 dpa_offset, int pos, u8 eiw, u16 eig); +u64 cxl_calculate_dpa_offset(u64 hpa_offset, u8 eiw, u16 eig); +int cxl_calculate_position(u64 hpa_offset, u8 eiw, u16 eig); +struct cxl_cxims_data { + int nr_maps; + u64 xormaps[] __counted_by(nr_maps); +}; + +#if IS_ENABLED(CONFIG_CXL_ACPI) +u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw); +#else +static inline u64 cxl_do_xormap_calc(struct cxl_cxims_data *cximsd, u64 addr, int hbiw) +{ + return ULLONG_MAX; +} +#endif + int cxl_num_decoders_committed(struct cxl_port *port); bool is_cxl_port(const struct device *dev); struct cxl_port *to_cxl_port(const struct device *dev); @@ -739,10 +604,13 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct cxl_dport *parent_dport); struct cxl_root *devm_cxl_add_root(struct device *host, const struct cxl_root_ops *ops); +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport); struct cxl_root *find_cxl_root(struct cxl_port *port); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) +DEFINE_FREE(put_cxled, struct cxl_endpoint_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxld.dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) @@ -762,28 +630,23 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); -#ifdef CONFIG_PCIEAER_CXL -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); -#else -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, - struct device *host) { } -#endif - struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); + +#define cxlrd_dev(cxlrd) (&(cxlrd)->cxlsd.cxld.dev) + bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add(struct cxl_decoder *cxld); struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add_locked(struct cxl_decoder *cxld); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); static inline int cxl_root_decoder_autoremove(struct device *host, struct cxl_root_decoder *cxlrd) @@ -806,12 +669,10 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; -struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_add_passthrough_decoder(struct cxl_port *port); +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); + struct cxl_dev_state; int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); @@ -856,7 +717,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_port *port); struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); -int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct device *host, struct cxl_port *port, + struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port); #ifdef CONFIG_CXL_REGION @@ -890,7 +752,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); -void cxl_switch_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_dport *dport); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); @@ -905,6 +767,10 @@ void cxl_coordinates_combine(struct access_coordinate *out, struct access_coordinate *c2); bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); /* * Unit test builds overrides this to __weak, find the 'strong' version @@ -915,4 +781,21 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); + +/* + * Declaration for functions that are mocked by cxl_test that are called by + * cxl_core. The respective functions are defined as __foo() and called by + * cxl_core as foo(). The macros below ensures that those functions would + * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and + * tools/testing/cxl/exports.h for setting up the mock functions. The dance + * is done to avoid a circular dependency where cxl_core calls a function that + * ends up being a mock function and goes to * cxl_test where it calls a + * cxl_core function. + */ +#ifndef CXL_TEST_ENABLE +#define DECLARE_TESTABLE(x) __##x +#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) +#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup) +#endif + #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 751478dfc4106..c98db6f18aa29 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -43,6 +43,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @endpoint: connection to the CXL port topology for this memory device + * @attach: creator of this memdev depends on CXL link attach to operate * @id: id number of this memdev instance. * @depth: endpoint port depth * @scrub_cycle: current scrub cycle set for this device @@ -59,11 +60,12 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct cxl_port *endpoint; + const struct cxl_memdev_attach *attach; int id; int depth; u8 scrub_cycle; int scrub_region_id; - void *err_rec_array; + struct cxl_mem_err_rec *err_rec_array; }; static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) @@ -95,8 +97,8 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds); +struct cxl_memdev *__devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach); int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; @@ -105,8 +107,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); -#define CXL_NR_PARTITIONS_MAX 2 - struct cxl_dpa_info { u64 size; struct cxl_dpa_part_info { @@ -365,87 +365,6 @@ struct cxl_security_state { struct kernfs_node *sanitize_node; }; -/* - * enum cxl_devtype - delineate type-2 from a generic type-3 device - * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or - * HDM-DB, no requirement that this device implements a - * mailbox, or other memory-device-standard manageability - * flows. - * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with - * HDM-H and class-mandatory memory device registers - */ -enum cxl_devtype { - CXL_DEVTYPE_DEVMEM, - CXL_DEVTYPE_CLASSMEM, -}; - -/** - * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range: range for DPA address - * @coord: QoS performance data (i.e. latency, bandwidth) - * @cdat_coord: raw QoS performance data from CDAT - * @qos_class: QoS Class cookies - */ -struct cxl_dpa_perf { - struct range dpa_range; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; - int qos_class; -}; - -/** - * struct cxl_dpa_partition - DPA partition descriptor - * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) - * @perf: performance attributes of the partition from CDAT - * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... - */ -struct cxl_dpa_partition { - struct resource res; - struct cxl_dpa_perf perf; - enum cxl_partition_mode mode; -}; - -/** - * struct cxl_dev_state - The driver device state - * - * cxl_dev_state represents the CXL driver/device state. It provides an - * interface to mailbox commands as well as some cached data about the device. - * Currently only memory devices are represented. - * - * @dev: The device associated with this CXL state - * @cxlmd: The device representing the CXL.mem capabilities of @dev - * @reg_map: component and ras register mapping parameters - * @regs: Parsed register blocks - * @cxl_dvsec: Offset to the PCIe device DVSEC - * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) - * @media_ready: Indicate whether the device media is usable - * @dpa_res: Overall DPA resource tree for the device - * @part: DPA partition array - * @nr_partitions: Number of DPA partitions - * @serial: PCIe Device Serial Number - * @type: Generic Memory Class device or Vendor Specific Memory device - * @cxl_mbox: CXL mailbox context - * @cxlfs: CXL features context - */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - struct cxl_register_map reg_map; - struct cxl_regs regs; - int cxl_dvsec; - bool rcd; - bool media_ready; - struct resource dpa_res; - struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; - unsigned int nr_partitions; - u64 serial; - enum cxl_devtype type; - struct cxl_mailbox cxl_mbox; -#ifdef CONFIG_CXL_FEATURES - struct cxl_features_state *cxlfs; -#endif -}; - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { /* @@ -850,7 +769,8 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, @@ -869,13 +789,14 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); #ifdef CONFIG_CXL_EDAC_MEM_FEATURES int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); int devm_cxl_region_edac_register(struct cxl_region *cxlr); int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt); int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt); -void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd); #else static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) { return 0; } @@ -887,8 +808,6 @@ static inline int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, static inline int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt) { return 0; } -static inline void devm_cxl_memdev_edac_release(struct cxl_memdev *cxlmd) -{ return; } #endif #ifdef CONFIG_CXL_SUSPEND diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 67ad5b007498e..93df1b1fa3268 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -7,89 +7,12 @@ #define CXL_MEMORY_PROGIF 0x10 -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification. Names are taken straight from the specification with "CXL" and - * "DVSEC" redundancies removed. When obvious, abbreviations may be used. - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) - -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_CACHE_CAPABLE BIT(0) -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CACHE_WBI_CAPABLE BIT(6) -#define CXL_DVSEC_CXL_RST_CAPABLE BIT(7) -#define CXL_DVSEC_CXL_RST_TIMEOUT_MASK GENMASK(10, 8) -#define CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE BIT(11) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_CTRL2_OFFSET 0x10 -#define CXL_DVSEC_DISABLE_CACHING BIT(0) -#define CXL_DVSEC_INIT_CACHE_WBI BIT(1) -#define CXL_DVSEC_INIT_CXL_RESET BIT(2) -#define CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE BIT(3) -#define CXL_DVSEC_STATUS2_OFFSET 0x12 -#define CXL_DVSEC_CACHE_INVALID BIT(0) -#define CXL_DVSEC_CXL_RST_COMPLETE BIT(1) -#define CXL_DVSEC_CXL_RESET_ERR BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + ((i) * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + ((i) * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) - -#define CXL_DVSEC_RANGE_MAX 2 - -/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT_EXTENSIONS 3 - -/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) - -/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ -#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 - -/* CXL 2.0 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) - /* * NOTE: Currently all the functions which are enabled for CXL require their * vectors to be in the first 16. Use this as the default max. */ #define CXL_PCI_DEFAULT_MAX_VECTORS 16 -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_PMU, - CXL_REGLOC_RBI_TYPES -}; - /* * Table Access DOE, CDAT Read Entry Response * @@ -141,12 +64,36 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } -int devm_cxl_port_enumerate_dports(struct cxl_port *port); +/* + * Assume that the caller has already validated that @pdev has CXL + * capabilities, any RCiEP with CXL capabilities is treated as a + * Restricted CXL Device (RCD) and finds upstream port and endpoint + * registers in a Root Complex Register Block (RCRB). + */ +static inline bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); + +#ifdef CONFIG_CXL_RAS void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state); +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +#else +static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } + +static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + return PCI_ERS_RESULT_NONE; +} + +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, + struct device *host) { } +#endif + #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 6e6777b7bafb5..39687baedd1a9 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -45,44 +45,6 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data) return 0; } -static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport) -{ - struct cxl_port *parent_port = parent_dport->port; - struct cxl_port *endpoint, *iter, *down; - int rc; - - /* - * Now that the path to the root is established record all the - * intervening ports in the chain. - */ - for (iter = parent_port, down = NULL; !is_cxl_root(iter); - down = iter, iter = to_cxl_port(iter->dev.parent)) { - struct cxl_ep *ep; - - ep = cxl_ep_load(iter, cxlmd); - ep->next = down; - } - - /* Note: endpoint port component registers are derived from @cxlds */ - endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, - parent_dport); - if (IS_ERR(endpoint)) - return PTR_ERR(endpoint); - - rc = cxl_endpoint_autoremove(cxlmd, endpoint); - if (rc) - return rc; - - if (!endpoint->dev.driver) { - dev_err(&cxlmd->dev, "%s failed probe\n", - dev_name(&endpoint->dev)); - return -ENXIO; - } - - return 0; -} - static int cxl_debugfs_poison_inject(void *data, u64 dpa) { struct cxl_memdev *cxlmd = data; @@ -103,6 +65,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_debugfs_poison_clear, "%llx\n"); +static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds, + struct cxl_memdev *cxlmd, + struct dentry *dentry) +{ + /* + * Avoid poison debugfs for DEVMEM aka accelerators as they rely on + * cxl_memdev_state. + */ + if (!mds) + return; + + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -130,12 +112,7 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) - debugfs_create_file("inject_poison", 0200, dentry, cxlmd, - &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) - debugfs_create_file("clear_poison", 0200, dentry, cxlmd, - &cxl_poison_clear_fops); + cxl_memdev_poison_enable(mds, cxlmd, dentry); rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) @@ -153,7 +130,7 @@ static int cxl_mem_probe(struct device *dev) } if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { - rc = devm_cxl_add_nvdimm(parent_port, cxlmd); + rc = devm_cxl_add_nvdimm(dev, parent_port, cxlmd); if (rc) { if (rc == -ENODEV) dev_info(dev, "PMEM disabled by platform\n"); @@ -180,6 +157,12 @@ static int cxl_mem_probe(struct device *dev) return rc; } + if (cxlmd->attach) { + rc = cxlmd->attach->probe(cxlmd); + if (rc) + return rc; + } + rc = devm_cxl_memdev_edac_register(cxlmd); if (rc) dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc); @@ -201,6 +184,29 @@ static int cxl_mem_probe(struct device *dev) return devm_add_action_or_reset(dev, enable_suspend, NULL); } +/** + * devm_cxl_add_memdev - Add a CXL memory device + * @cxlds: CXL device state to associate with the memdev + * @attach: Caller depends on CXL topology attachment + * + * Upon return the device will have had a chance to attach to the + * cxl_mem driver, but may fail to attach if the CXL topology is not ready + * (hardware CXL link down, or software platform CXL root not attached). + * + * When @attach is NULL it indicates the caller wants the memdev to remain + * registered even if it does not immediately attach to the CXL hierarchy. When + * @attach is provided a cxl_mem_probe() failure leads to failure of this routine. + * + * The parent of the resulting device and the devm context for allocations is + * @cxlds->dev. + */ +struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach) +{ + return __devm_cxl_add_memdev(cxlds, attach); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); + static ssize_t trigger_poison_list_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -217,16 +223,24 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (a == &dev_attr_trigger_poison_list.attr) - if (!test_bit(CXL_POISON_ENABLED_LIST, - mds->poison.enabled_cmds)) - return 0; + if (!mds || + !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) + return false; + + return true; +} + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr && + !cxl_poison_attr_visible(kobj, a)) + return 0; return a->mode; } @@ -248,6 +262,7 @@ static struct cxl_driver cxl_mem_driver = { .probe = cxl_mem_probe, .id = CXL_DEVICE_MEMORY_EXPANDER, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_mem_groups, }, }; @@ -258,8 +273,3 @@ MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); -/* - * create_endpoint() wants to validate port driver attach immediately after - * endpoint registration. - */ -MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bd100ac31672d..7b4699fb88709 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "cxlmem.h" #include "cxlpci.h" @@ -136,7 +137,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) if (opcode == CXL_MBOX_OP_SANITIZE) { mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_node) - mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); + mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0); mutex_unlock(&cxl_mbox->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ @@ -465,76 +466,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return 0; } -/* - * Assume that any RCIEP that emits the CXL memory expander class code - * is an RCD - */ -static bool is_cxl_restricted(struct pci_dev *pdev) -{ - return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; -} - -static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map, - struct cxl_dport *dport) -{ - resource_size_t component_reg_phys; - - *map = (struct cxl_register_map) { - .host = &pdev->dev, - .resource = CXL_RESOURCE_NONE, - }; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - if (component_reg_phys == CXL_RESOURCE_NONE) - return -ENXIO; - - map->resource = component_reg_phys; - map->reg_type = CXL_REGLOC_RBI_COMPONENT; - map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; - - return 0; -} - -static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - int rc; - - rc = cxl_find_regblock(pdev, type, map); - - /* - * If the Register Locator DVSEC does not exist, check if it - * is an RCH and try to extract the Component Registers from - * an RCRB. - */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { - struct cxl_dport *dport; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - rc = cxl_rcrb_get_comp_regs(pdev, map, dport); - if (rc) - return rc; - - rc = cxl_dport_map_rcd_linkcap(pdev, dport); - if (rc) - return rc; - - } else if (rc) { - return rc; - } - - return cxl_setup_regs(map); -} - static int cxl_pci_ras_unmask(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); @@ -911,32 +842,25 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc, pmu_count; unsigned int i; bool irq_avail; - - /* - * Double check the anonymous union trickery in struct cxl_regs - * FIXME switch to struct_group() - */ - BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != - offsetof(struct cxl_regs, device_regs.memdev)); + u16 dvsec; rc = pcim_enable_device(pdev); if (rc) return rc; pci_set_master(pdev); - mds = cxl_memdev_state_create(&pdev->dev); + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); + + mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); - cxlds->serial = pci_get_dsn(pdev); - cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); - if (!cxlds->cxl_dvsec) - dev_warn(&pdev->dev, - "Device DVSEC not present, skip CXL.mem init\n"); rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) @@ -1006,7 +930,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_dbg(&pdev->dev, "No CXL Features discovered\n"); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index cf32dc50b7a61..7937e7e53797c 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -59,55 +59,20 @@ static int discover_region(struct device *dev, void *unused) static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_hdm *cxlhdm; - int rc; + /* Reset nr_dports for rebind of driver */ + port->nr_dports = 0; /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - - cxl_switch_parse_cdat(port); - - cxlhdm = devm_cxl_setup_hdm(port, NULL); - if (!IS_ERR(cxlhdm)) - return devm_cxl_enumerate_decoders(cxlhdm, NULL); - - if (PTR_ERR(cxlhdm) != -ENODEV) { - dev_err(&port->dev, "Failed to map HDM decoder capability\n"); - return PTR_ERR(cxlhdm); - } - - if (rc == 1) { - dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); - return devm_cxl_add_passthrough_decoder(port); - } - - dev_err(&port->dev, "HDM decoder capability not found\n"); - return -ENXIO; + return 0; } static int cxl_endpoint_port_probe(struct cxl_port *port) { - struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_hdm *cxlhdm; int rc; - rc = cxl_dvsec_rr_decode(cxlds, &info); - if (rc < 0) - return rc; - - cxlhdm = devm_cxl_setup_hdm(port, &info); - if (IS_ERR(cxlhdm)) { - if (PTR_ERR(cxlhdm) == -ENODEV) - dev_err(&port->dev, "HDM decoder registers not found\n"); - return PTR_ERR(cxlhdm); - } - /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); cxl_endpoint_parse_cdat(port); @@ -117,11 +82,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); - if (rc) - return rc; - - rc = devm_cxl_enumerate_decoders(cxlhdm, &info); + rc = devm_cxl_endpoint_decoders_setup(port); if (rc) return rc; @@ -195,10 +156,50 @@ static struct cxl_driver cxl_port_driver = { .probe = cxl_port_probe, .id = CXL_DEVICE_PORT, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_port_attribute_groups, }, }; +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport) +{ + struct cxl_port *parent_port = parent_dport->port; + struct cxl_port *endpoint, *iter, *down; + int rc; + + /* + * Now that the path to the root is established record all the + * intervening ports in the chain. + */ + for (iter = parent_port, down = NULL; !is_cxl_root(iter); + down = iter, iter = to_cxl_port(iter->dev.parent)) { + struct cxl_ep *ep; + + ep = cxl_ep_load(iter, cxlmd); + ep->next = down; + } + + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, + parent_dport); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_FOR_MODULES(devm_cxl_add_endpoint, "cxl_mem"); + static int __init cxl_port_init(void) { return cxl_driver_register(&cxl_port_driver); diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index c4c43434f3143..e959d9b4f4cef 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -66,6 +66,16 @@ config SFC_MCDI_LOGGING Driver-Interface) commands and responses, allowing debugging of driver/firmware interaction. The tracing is actually enabled by a sysfs file 'mcdi_logging' under the PCI device. +config SFC_CXL + bool "Solarflare SFC9100-family CXL support" + depends on SFC && CXL_BUS >= SFC + depends on CXL_REGION + default SFC + help + This enables SFC CXL support if the kernel is configuring CXL for + using CTPIO with CXL.mem. The SFC device with CXL support and + with a CXL-aware firmware can be used for minimizing latencies + when sending through CTPIO. source "drivers/net/ethernet/sfc/falcon/Kconfig" source "drivers/net/ethernet/sfc/siena/Kconfig" diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index d99039ec468d6..bb0f1891cde65 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -13,6 +13,7 @@ sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ tc_encap_actions.o tc_conntrack.o +sfc-$(CONFIG_SFC_CXL) += efx_cxl.o obj-$(CONFIG_SFC) += sfc.o obj-$(CONFIG_SFC_FALCON) += falcon/ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fcec81f862ec5..2bb6d3136c7c3 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -24,6 +24,7 @@ #include #include #include +#include "efx_cxl.h" /* Hardware control for EF10 architecture including 'Huntington'. */ @@ -106,7 +107,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V4_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V7_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -177,6 +178,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) efx->num_mac_stats); } + if (outlen < MC_CMD_GET_CAPABILITIES_V7_OUT_LEN) + nic_data->datapath_caps3 = 0; + else + nic_data->datapath_caps3 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V7_OUT_FLAGS3); + return 0; } @@ -919,6 +926,9 @@ static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) static void efx_ef10_remove(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif int rc; #ifdef CONFIG_SFC_SRIOV @@ -949,7 +959,12 @@ static void efx_ef10_remove(struct efx_nic *efx) efx_mcdi_rx_free_indir_table(efx); +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if (nic_data->wc_membase && !probe_data->cxl_pio_in_use) +#else if (nic_data->wc_membase) +#endif iounmap(nic_data->wc_membase); rc = efx_mcdi_free_vis(efx); @@ -1140,6 +1155,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) unsigned int channel_vis, pio_write_vi_base, max_vis; struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int uc_mem_map_size, wc_mem_map_size; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif void __iomem *membase; int rc; @@ -1263,8 +1281,25 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) iounmap(efx->membase); efx->membase = membase; - /* Set up the WC mapping if needed */ - if (wc_mem_map_size) { + if (!wc_mem_map_size) + goto skip_pio; + + /* Set up the WC mapping */ + +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if ((nic_data->datapath_caps3 & + (1 << MC_CMD_GET_CAPABILITIES_V7_OUT_CXL_CONFIG_ENABLE_LBN)) && + probe_data->cxl_pio_initialised) { + /* Using PIO through CXL mapping? */ + nic_data->pio_write_base = probe_data->cxl->ctpio_cxl + + (pio_write_vi_base * efx->vi_stride + + ER_DZ_TX_PIOBUF - uc_mem_map_size); + probe_data->cxl_pio_in_use = true; + } else +#endif + { + /* Using legacy PIO BAR mapping */ nic_data->wc_membase = ioremap_wc(efx->membase_phys + uc_mem_map_size, wc_mem_map_size); @@ -1279,12 +1314,13 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) nic_data->wc_membase + (pio_write_vi_base * efx->vi_stride + ER_DZ_TX_PIOBUF - uc_mem_map_size); - - rc = efx_ef10_link_piobufs(efx); - if (rc) - efx_ef10_free_piobufs(efx); } + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + +skip_pio: netif_dbg(efx, probe, efx->net_dev, "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n", &efx->membase_phys, efx->membase, uc_mem_map_size, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 112e55b98ed3b..537668278375b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -34,6 +34,7 @@ #include "selftest.h" #include "sriov.h" #include "efx_devlink.h" +#include "efx_cxl.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -981,12 +982,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); efx_fini_io(efx); + + probe_data = container_of(efx, struct efx_probe_data, efx); + efx_cxl_exit(probe_data); + pci_dbg(efx->pci_dev, "shutdown successful\n"); efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); - probe_data = container_of(efx, struct efx_probe_data, efx); kfree(probe_data); }; @@ -1190,6 +1194,15 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail2; + /* A successful cxl initialization implies a CXL region created to be + * used for PIO buffers. If there is no CXL support, or initialization + * fails, efx_cxl_pio_initialised will be false and legacy PIO buffers + * defined at specific PCI BAR regions will be used. + */ + rc = efx_cxl_init(probe_data); + if (rc) + pci_err(pci_dev, "CXL initialization failed with error %d\n", rc); + rc = efx_pci_probe_post_io(efx); if (rc) { /* On failure, retry once immediately. diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c new file mode 100644 index 0000000000000..c13e1f2bf7eaf --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include + +#include +#include +#include "net_driver.h" +#include "efx_cxl.h" +#include "efx.h" + +#define EFX_CTPIO_BUFFER_SIZE SZ_256M + +int efx_cxl_init(struct efx_probe_data *probe_data) +{ + struct efx_nic *efx = &probe_data->efx; + struct pci_dev *pci_dev = efx->pci_dev; + resource_size_t max_size; + struct efx_cxl *cxl; + struct range range; + u16 dvsec; + int rc; + + probe_data->cxl_pio_initialised = false; + + /* Is the device configured with and using CXL? */ + if (!pcie_is_cxl(pci_dev)) + return 0; + + dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) { + pci_err(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability not found\n"); + return 0; + } + + pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n"); + + /* Create a cxl_dev_state embedded in the cxl struct using cxl core api + * specifying no mbox available. + */ + cxl = devm_cxl_dev_state_create(&pci_dev->dev, CXL_DEVTYPE_DEVMEM, + pci_dev->dev.id, dvsec, struct efx_cxl, + cxlds, false); + + if (!cxl) + return -ENOMEM; + + rc = cxl_pci_setup_regs(pci_dev, CXL_REGLOC_RBI_COMPONENT, + &cxl->cxlds.reg_map); + if (rc) { + pci_err(pci_dev, "No component registers\n"); + return rc; + } + + if (!cxl->cxlds.reg_map.component_map.hdm_decoder.valid) { + pci_err(pci_dev, "Expected HDM component register not found\n"); + return -ENODEV; + } + + if (!cxl->cxlds.reg_map.component_map.ras.valid) { + pci_err(pci_dev, "Expected RAS component register not found\n"); + return -ENODEV; + } + + rc = cxl_map_component_regs(&cxl->cxlds.reg_map, + &cxl->cxlds.regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) { + pci_err(pci_dev, "Failed to map RAS capability.\n"); + return rc; + } + + /* + * Set media ready explicitly as there are neither mailbox for checking + * this state nor the CXL register involved, both not mandatory for + * type2. + */ + cxl->cxlds.media_ready = true; + + if (cxl_set_capacity(&cxl->cxlds, EFX_CTPIO_BUFFER_SIZE)) { + pci_err(pci_dev, "dpa capacity setup failed\n"); + return -ENODEV; + } + + cxl->cxlmd = devm_cxl_add_memdev(&cxl->cxlds, NULL); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pci_dev, "CXL accel memdev creation failed"); + return PTR_ERR(cxl->cxlmd); + } + + cxl->cxled = cxl_get_committed_decoder(cxl->cxlmd, &cxl->efx_region); + if (cxl->cxled) { + if (!cxl->efx_region) { + pci_err(pci_dev, "CXL found committed decoder without a region"); + return -ENODEV; + } + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, + "CXL getting regions params from a committed decoder failed"); + return rc; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + return -ENOMEM; + } + + cxl->hdm_was_committed = true; + } else { + cxl->cxlrd = cxl_get_hpa_freespace(cxl->cxlmd, 1, CXL_DECODER_F_RAM | + CXL_DECODER_F_TYPE2, &max_size); + if (IS_ERR(cxl->cxlrd)) { + dev_err(&pci_dev->dev, "cxl_get_hpa_freespace failed\n"); + return PTR_ERR(cxl->cxlrd); + } + + if (max_size < EFX_CTPIO_BUFFER_SIZE) { + dev_err(&pci_dev->dev, "%s: not enough free HPA space %pap < %u\n", + __func__, &max_size, EFX_CTPIO_BUFFER_SIZE); + cxl_put_root_decoder(cxl->cxlrd); + return -ENOSPC; + } + + cxl->cxled = cxl_request_dpa(cxl->cxlmd, CXL_PARTMODE_RAM, + EFX_CTPIO_BUFFER_SIZE); + if (IS_ERR(cxl->cxled)) { + pci_err(pci_dev, "CXL accel request DPA failed"); + cxl_put_root_decoder(cxl->cxlrd); + return PTR_ERR(cxl->cxled); + } + + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1); + if (IS_ERR(cxl->efx_region)) { + pci_err(pci_dev, "CXL accel create region failed"); + rc = PTR_ERR(cxl->efx_region); + goto err_region; + } + + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, "CXL getting regions params failed"); + goto err_map; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + rc = -ENOMEM; + goto err_map; + } + } + + probe_data->cxl = cxl; + probe_data->cxl_pio_initialised = true; + return 0; + +err_map: + cxl_unregister_region(cxl->efx_region); +err_region: + cxl_put_root_decoder(cxl->cxlrd); + cxl_dpa_free(cxl->cxled); + return rc; +} + +void efx_cxl_exit(struct efx_probe_data *probe_data) +{ + if (!probe_data->cxl) + return; + + iounmap(probe_data->cxl->ctpio_cxl); + + if (!probe_data->cxl->hdm_was_committed) { + cxl_dpa_free(probe_data->cxl->cxled); + cxl_put_root_decoder(probe_data->cxl->cxlrd); + } + cxl_unregister_region(probe_data->cxl->efx_region); +} + +MODULE_IMPORT_NS("CXL"); diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h new file mode 100644 index 0000000000000..9a92e386695bb --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CXL_H +#define EFX_CXL_H + +#ifdef CONFIG_SFC_CXL + +#include + +struct cxl_root_decoder; +struct cxl_port; +struct cxl_endpoint_decoder; +struct cxl_region; +struct efx_probe_data; + +struct efx_cxl { + struct cxl_dev_state cxlds; + struct cxl_memdev *cxlmd; + struct cxl_root_decoder *cxlrd; + struct cxl_port *endpoint; + struct cxl_endpoint_decoder *cxled; + bool hdm_was_committed; + struct cxl_region *efx_region; + void __iomem *ctpio_cxl; +}; + +int efx_cxl_init(struct efx_probe_data *probe_data); +void efx_cxl_exit(struct efx_probe_data *probe_data); +#else +static inline int efx_cxl_init(struct efx_probe_data *probe_data) { return 0; } +static inline void efx_cxl_exit(struct efx_probe_data *probe_data) {} +#endif +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index b98c259f672db..bea4eecdf842d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1197,14 +1197,26 @@ struct efx_nic { atomic_t n_rx_noskb_drops; }; +#ifdef CONFIG_SFC_CXL +struct efx_cxl; +#endif + /** * struct efx_probe_data - State after hardware probe * @pci_dev: The PCI device * @efx: Efx NIC details + * @cxl: details of related cxl objects + * @cxl_pio_initialised: cxl initialization outcome. + * @cxl_pio_in_use: PIO using CXL mapping */ struct efx_probe_data { struct pci_dev *pci_dev; struct efx_nic efx; +#ifdef CONFIG_SFC_CXL + struct efx_cxl *cxl; + bool cxl_pio_initialised; + bool cxl_pio_in_use; +#endif }; static inline struct efx_nic *efx_netdev_priv(struct net_device *dev) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 9fa5c4c713abd..c87cc9214690b 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -152,6 +152,8 @@ enum { * %MC_CMD_GET_CAPABILITIES response) * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps3: Further Capabilities of datapath firmware (FLAGS3 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot @@ -186,6 +188,7 @@ struct efx_ef10_nic_data { bool must_check_datapath_caps; u32 datapath_caps; u32 datapath_caps2; + u32 datapath_caps3; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; bool must_probe_vswitching; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 88dc062af5f84..42e982db5b049 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -110,7 +110,7 @@ static void nd_region_remove(struct device *dev) * here is ok. */ if (cpu_cache_has_invalidate_memregion()) - cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY); + cpu_cache_invalidate_all(); } static int child_notify(struct device *dev, void *data) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index de1ee5ebc8516..e27fc380f6c0b 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -90,7 +90,7 @@ static int nd_region_invalidate_memregion(struct nd_region *nd_region) } } - cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY); + cpu_cache_invalidate_all(); out: for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 9a249c65aedcd..d094f9532b74f 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -119,6 +119,10 @@ config XEN_PCIDEV_FRONTEND The PCI device frontend driver allows the kernel to import arbitrary PCI devices from a PCI backend to support PCI driver domains. +config PCI_CXL + bool + default y if CXL_BUS + config PCI_ATS bool diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 67647f1880fb8..8d39e070c6ec0 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_PCI_DOE) += doe.o obj-$(CONFIG_PCI_DYNAMIC_OF_NODES) += of_property.o obj-$(CONFIG_PCI_NPEM) += npem.o obj-$(CONFIG_PCIE_TPH) += tph.o +obj-$(CONFIG_PCI_CXL) += cxl.o # Endpoint library must be initialized before its users obj-$(CONFIG_PCI_ENDPOINT) += endpoint/ diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 6db45ae2cc8e3..ae3152be018a7 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -218,12 +218,12 @@ static bool pci_cxl_ats_always_on(struct pci_dev *pdev) u16 cap; offset = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PCIE_DEVICE); + PCI_DVSEC_CXL_DEVICE); if (!offset) return false; - pci_read_config_word(pdev, offset + CXL_DVSEC_CAP_OFFSET, &cap); - if (cap & CXL_DVSEC_CACHE_CAPABLE) + pci_read_config_word(pdev, offset + PCI_DVSEC_CXL_CAP, &cap); + if (cap & PCI_DVSEC_CXL_CACHE_CAPABLE) return true; return false; diff --git a/drivers/pci/cxl.c b/drivers/pci/cxl.c new file mode 100644 index 0000000000000..900d0316f38d0 --- /dev/null +++ b/drivers/pci/cxl.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CXL PCI state save/restore support. + * + * Saves and restores CXL DVSEC and HDM decoder registers across PCI resets + * and link disable/enable transitions. Hooked into pci_save_state() / + * pci_restore_state() via the PCI capability save chain. + */ +#include +#include +#include +#include +#include +#include +#include "pci.h" + +#define CXL_HDM_MAX_DECODERS 32 + +struct cxl_hdm_decoder_snapshot { + u32 base_lo; + u32 base_hi; + u32 size_lo; + u32 size_hi; + u32 ctrl; + u32 tl_lo; + u32 tl_hi; +}; + +struct cxl_pci_state { + /* DVSEC saved state */ + u16 dvsec; + u16 dvsec_ctrl; + u16 dvsec_ctrl2; + u32 range_base_hi[CXL_DVSEC_RANGE_MAX]; + u32 range_base_lo[CXL_DVSEC_RANGE_MAX]; + u16 dvsec_lock; + bool dvsec_valid; + + /* HDM decoder saved state */ + int hdm_bar; + unsigned long hdm_bar_offset; + unsigned long hdm_map_size; + u32 hdm_global_ctrl; + int hdm_count; + struct cxl_hdm_decoder_snapshot decoders[CXL_HDM_MAX_DECODERS]; + bool hdm_valid; +}; + +static void cxl_save_dvsec(struct pci_dev *pdev, struct cxl_pci_state *state) +{ + int rc_ctrl, rc_ctrl2; + u16 dvsec; + int i; + + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + return; + + state->dvsec = dvsec; + rc_ctrl = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL, + &state->dvsec_ctrl); + rc_ctrl2 = pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_CTRL2, + &state->dvsec_ctrl2); + if (rc_ctrl || rc_ctrl2) { + pci_warn(pdev, + "CXL: DVSEC read failed (ctrl rc=%d, ctrl2 rc=%d)\n", + rc_ctrl, rc_ctrl2); + return; + } + + for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) { + pci_read_config_dword(pdev, + dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), + &state->range_base_hi[i]); + pci_read_config_dword(pdev, + dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), + &state->range_base_lo[i]); + } + + pci_read_config_word(pdev, dvsec + PCI_DVSEC_CXL_LOCK, + &state->dvsec_lock); + + state->dvsec_valid = true; +} + +static u32 cxl_merge_rwl(u32 saved, u32 current_hw, u32 rwl_mask) +{ + return (current_hw & rwl_mask) | (saved & ~rwl_mask); +} + +static void cxl_restore_dvsec(struct pci_dev *pdev, + const struct cxl_pci_state *state) +{ + u16 lock_reg = 0; + int i; + + if (!state->dvsec_valid) + return; + + pci_read_config_word(pdev, state->dvsec + PCI_DVSEC_CXL_LOCK, + &lock_reg); + + if (lock_reg & PCI_DVSEC_CXL_LOCK_CONFIG) { + u16 hw_ctrl; + u32 hw_range_hi, hw_range_lo; + + pci_read_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_CTRL, + &hw_ctrl); + pci_write_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_CTRL, + cxl_merge_rwl(state->dvsec_ctrl, hw_ctrl, + PCI_DVSEC_CXL_CTRL_RWL)); + + pci_write_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_CTRL2, + state->dvsec_ctrl2); + + for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) { + pci_read_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), + &hw_range_hi); + pci_write_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), + cxl_merge_rwl(state->range_base_hi[i], + hw_range_hi, + PCI_DVSEC_CXL_RANGE_BASE_HI_RWL)); + + pci_read_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), + &hw_range_lo); + pci_write_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), + cxl_merge_rwl(state->range_base_lo[i], + hw_range_lo, + PCI_DVSEC_CXL_RANGE_BASE_LO_RWL)); + } + } else { + pci_write_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_CTRL, + state->dvsec_ctrl); + pci_write_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_CTRL2, + state->dvsec_ctrl2); + for (i = 0; i < CXL_DVSEC_RANGE_MAX; i++) { + pci_write_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), + state->range_base_hi[i]); + pci_write_config_dword(pdev, + state->dvsec + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), + state->range_base_lo[i]); + } + + pci_write_config_word(pdev, + state->dvsec + PCI_DVSEC_CXL_LOCK, + state->dvsec_lock); + } +} + +struct pci_cmd_saved { + struct pci_dev *pdev; + u16 cmd; +}; + +DEFINE_FREE(restore_pci_cmd, struct pci_cmd_saved, + if (!(_T.cmd & PCI_COMMAND_MEMORY)) + pci_write_config_word(_T.pdev, PCI_COMMAND, _T.cmd)) + +/** + * cxl_find_component_regblock - Find the Component Register Block via + * the Register Locator DVSEC + * @pdev: PCI device to scan + * @bir: output BAR index + * @offset: output offset within the BAR + * + * Parses the Register Locator DVSEC (ID 8) directly via PCI config space + * reads. No dependency on CXL module symbols. + * + * Return: 0 on success, -ENODEV if not found. + */ +static int cxl_find_component_regblock(struct pci_dev *pdev, + int *bir, u64 *offset) +{ + u32 regloc_size, regblocks; + u16 regloc; + int i; + + regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_REG_LOCATOR); + if (!regloc) + return -ENODEV; + + pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); + regloc_size = PCI_DVSEC_HEADER1_LEN(regloc_size); + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1) / 8; + + for (i = 0; i < regblocks; i++) { + u32 reg_lo, reg_hi; + unsigned int off; + + off = regloc + PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 + i * 8; + pci_read_config_dword(pdev, off, ®_lo); + pci_read_config_dword(pdev, off + 4, ®_hi); + + if (FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID, reg_lo) != + CXL_REGLOC_RBI_COMPONENT) + continue; + + *bir = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR, reg_lo); + *offset = ((u64)reg_hi << 32) | + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW); + return 0; + } + + return -ENODEV; +} + +/* + * Discover and map HDM decoder registers. + * Caller must pci_iounmap() the returned pointer. + */ +static void __iomem *cxl_hdm_map(struct pci_dev *pdev, int *bar_out, + unsigned long *offset_out, + unsigned long *size_out) +{ + int bir; + u64 reg_offset; + void __iomem *comp_base, *cm_base; + u32 cap_hdr; + int cap, cap_count; + unsigned long hdm_offset = 0, hdm_size = 0; + void __iomem *hdm; + + if (cxl_find_component_regblock(pdev, &bir, ®_offset)) + return NULL; + + comp_base = pci_iomap_range(pdev, bir, reg_offset, + CXL_CM_OFFSET + SZ_4K); + if (!comp_base) + return NULL; + + cm_base = comp_base + CXL_CM_OFFSET; + cap_hdr = readl(cm_base); + + if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_hdr) != CM_CAP_HDR_CAP_ID) { + pci_iounmap(pdev, comp_base); + return NULL; + } + + cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_hdr); + + for (cap = 1; cap <= cap_count; cap++) { + u32 hdr = readl(cm_base + cap * 4); + u16 cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr); + u32 cap_off = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr); + + if (cap_id != CXL_CM_CAP_CAP_ID_HDM) + continue; + + hdr = readl(cm_base + cap_off); + hdm_offset = CXL_CM_OFFSET + cap_off; + hdm_size = 0x20 * cxl_hdm_decoder_count(hdr) + 0x10; + break; + } + + pci_iounmap(pdev, comp_base); + + if (!hdm_size) + return NULL; + + hdm = pci_iomap_range(pdev, bir, reg_offset + hdm_offset, hdm_size); + if (!hdm) + return NULL; + + *bar_out = bir; + *offset_out = reg_offset + hdm_offset; + *size_out = hdm_size; + return hdm; +} + +static void cxl_save_hdm(struct pci_dev *pdev, void __iomem *hdm, + struct cxl_pci_state *state, int count) +{ + int i; + + state->hdm_count = min_t(int, count, CXL_HDM_MAX_DECODERS); + state->hdm_global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + for (i = 0; i < state->hdm_count; i++) { + struct cxl_hdm_decoder_snapshot *d = &state->decoders[i]; + + d->base_lo = readl(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)); + d->base_hi = readl(hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)); + d->size_lo = readl(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i)); + d->size_hi = readl(hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)); + d->ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + d->tl_lo = readl(hdm + CXL_HDM_DECODER0_TL_LOW(i)); + d->tl_hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(i)); + } +} + +static void cxl_restore_hdm(struct pci_dev *pdev, void __iomem *hdm, + const struct cxl_pci_state *state) +{ + int i; + + writel(state->hdm_global_ctrl, hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + for (i = 0; i < state->hdm_count; i++) { + const struct cxl_hdm_decoder_snapshot *d = &state->decoders[i]; + unsigned long timeout; + u32 ctrl; + + if (!(d->ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) + continue; + + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if ((ctrl & CXL_HDM_DECODER0_CTRL_LOCK) && + (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) + continue; + + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) { + ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT; + writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + } + + writel(d->base_lo, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(i)); + writel(d->base_hi, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i)); + writel(d->size_lo, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i)); + writel(d->size_hi, hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i)); + writel(d->tl_lo, hdm + CXL_HDM_DECODER0_TL_LOW(i)); + writel(d->tl_hi, hdm + CXL_HDM_DECODER0_TL_HIGH(i)); + + wmb(); + + ctrl = d->ctrl & ~(CXL_HDM_DECODER0_CTRL_COMMITTED | + CXL_HDM_DECODER0_CTRL_COMMIT_ERROR); + ctrl |= CXL_HDM_DECODER0_CTRL_COMMIT; + writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + + timeout = jiffies + msecs_to_jiffies(10); + for (;;) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) + break; + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMIT_ERROR) { + pci_warn(pdev, + "HDM decoder %d commit error on restore\n", + i); + break; + } + if (time_after(jiffies, timeout)) { + pci_warn(pdev, + "HDM decoder %d commit timeout on restore\n", + i); + break; + } + cpu_relax(); + } + } +} + +static void cxl_save_hdm_decoders(struct pci_dev *pdev, + struct cxl_pci_state *state) +{ + int hdm_bar; + unsigned long hdm_bar_offset, hdm_map_size; + void __iomem *hdm; + u16 cmd; + u32 cap; + struct pci_cmd_saved saved __free(restore_pci_cmd) = { + .pdev = pdev, .cmd = PCI_COMMAND_MEMORY, + }; + + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + saved.cmd = cmd; + if (!(cmd & PCI_COMMAND_MEMORY)) + pci_write_config_word(pdev, PCI_COMMAND, + cmd | PCI_COMMAND_MEMORY); + + hdm = cxl_hdm_map(pdev, &hdm_bar, &hdm_bar_offset, &hdm_map_size); + if (!hdm) + return; + + cap = readl(hdm + CXL_HDM_DECODER_CAP_OFFSET); + cxl_save_hdm(pdev, hdm, state, cxl_hdm_decoder_count(cap)); + state->hdm_bar = hdm_bar; + state->hdm_bar_offset = hdm_bar_offset; + state->hdm_map_size = hdm_map_size; + state->hdm_valid = true; + pci_iounmap(pdev, hdm); +} + +static void cxl_restore_hdm_decoders(struct pci_dev *pdev, + const struct cxl_pci_state *state) +{ + void __iomem *hdm; + u16 cmd; + struct pci_cmd_saved saved __free(restore_pci_cmd) = { + .pdev = pdev, .cmd = PCI_COMMAND_MEMORY, + }; + + if (!state->hdm_valid) + return; + + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + saved.cmd = cmd; + if (!(cmd & PCI_COMMAND_MEMORY)) + pci_write_config_word(pdev, PCI_COMMAND, + cmd | PCI_COMMAND_MEMORY); + + hdm = pci_iomap_range(pdev, state->hdm_bar, state->hdm_bar_offset, + state->hdm_map_size); + if (!hdm) { + pci_warn(pdev, "CXL: failed to map HDM for restore\n"); + return; + } + + cxl_restore_hdm(pdev, hdm, state); + pci_iounmap(pdev, hdm); +} + +void pci_allocate_cxl_save_buffer(struct pci_dev *dev) +{ + if (!pcie_is_cxl(dev)) + return; + + if (pci_add_virtual_ext_cap_save_buffer(dev, + PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL, + sizeof(struct cxl_pci_state))) + pci_err(dev, "unable to allocate CXL save buffer\n"); +} + +void pci_save_cxl_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + struct cxl_pci_state *state; + + save_state = pci_find_saved_ext_cap(pdev, + PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL); + if (!save_state) + return; + + state = (struct cxl_pci_state *)save_state->cap.data; + state->dvsec_valid = false; + state->hdm_valid = false; + + cxl_save_dvsec(pdev, state); + cxl_save_hdm_decoders(pdev, state); +} + +void pci_restore_cxl_state(struct pci_dev *pdev) +{ + struct pci_cap_saved_state *save_state; + struct cxl_pci_state *state; + + save_state = pci_find_saved_ext_cap(pdev, + PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL); + if (!save_state) + return; + + state = (struct cxl_pci_state *)save_state->cap.data; + if (!state->dvsec_valid && !state->hdm_valid) + return; + + cxl_restore_dvsec(pdev, state); + cxl_restore_hdm_decoders(pdev, state); +} diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9a6943688e6db..f12e6eca601c3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -30,7 +30,6 @@ #include #include #include -#include "../cxl/cxlpci.h" #include "pci.h" DEFINE_MUTEX(pci_slot_mutex); @@ -1818,6 +1817,7 @@ int pci_save_state(struct pci_dev *dev) pci_save_aer_state(dev); pci_save_ptm_state(dev); pci_save_tph_state(dev); + pci_save_cxl_state(dev); return pci_save_vc_state(dev); } EXPORT_SYMBOL(pci_save_state); @@ -1929,6 +1929,7 @@ void pci_restore_state(struct pci_dev *dev) pci_restore_aer_state(dev); pci_restore_config_space(dev); + pci_restore_cxl_state(dev); pci_restore_pcix_state(dev); pci_restore_msi_state(dev); @@ -3529,6 +3530,26 @@ int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size) return _pci_add_cap_save_buffer(dev, cap, true, size); } +int pci_add_virtual_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, + unsigned int size) +{ + struct pci_cap_saved_state *save_state; + + if (cap <= PCI_EXT_CAP_ID_MAX) + return -EINVAL; + + save_state = kzalloc(sizeof(*save_state) + size, GFP_KERNEL); + if (!save_state) + return -ENOMEM; + + save_state->cap.cap_nr = cap; + save_state->cap.cap_extended = true; + save_state->cap.size = size; + pci_add_saved_cap(dev, save_state); + + return 0; +} + /** * pci_allocate_cap_save_buffers - allocate buffers for saving capabilities * @dev: the PCI device @@ -3552,6 +3573,7 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) pci_err(dev, "unable to allocate suspend buffer for LTR\n"); pci_allocate_vc_save_buffers(dev); + pci_allocate_cxl_save_buffer(dev); } void pci_free_cap_save_buffers(struct pci_dev *dev) @@ -5134,151 +5156,6 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) return rc; } -static int cxl_reset_prepare(struct pci_dev *dev, u16 dvsec) -{ - u32 timeout_us = 100, timeout_tot_us = 10000; - u16 reg, cap; - int rc; - - if (!pci_wait_for_pending_transaction(dev)) - pci_err(dev, "timed out waiting for pending transaction; performing cxl reset anyway\n"); - - /* Check if the device is cache capable. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_CACHE_CAPABLE)) - return 0; - - /* Disable cache. WB and invalidate cache if capability is advertised */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg |= CXL_DVSEC_DISABLE_CACHING; - /* - * DEVCTL2 bits are written only once. So check WB+I capability while - * keeping disable caching set. - */ - if (cap & CXL_DVSEC_CACHE_WBI_CAPABLE) - reg |= CXL_DVSEC_INIT_CACHE_WBI; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* - * From Section 9.6: "Software may leverage the cache size reported in - * the DVSEC CXL Capability2 register to compute a suitable timeout - * value". - * Given there is no conversion factor for cache size -> timeout, - * setting timer for default 10ms. - */ - do { - if (timeout_tot_us == 0) - return -ETIMEDOUT; - usleep_range(timeout_us, timeout_us + 1); - timeout_tot_us -= timeout_us; - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - } while (!(reg & CXL_DVSEC_CACHE_INVALID)); - - return 0; -} - -static int cxl_reset_init(struct pci_dev *dev, u16 dvsec) -{ - /* - * Timeout values ref CXL Spec v3.2 Ch 8 Control and Status Registers, - * under section 8.1.3.1 DVSEC CXL Capability. - */ - u32 reset_timeouts_ms[] = { 10, 100, 1000, 10000, 100000 }; - u16 reg; - u32 timeout_ms; - int rc, ind; - - /* Check if CXL Reset MEM CLR is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - - if (reg & CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - } - - /* Read timeout value. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - ind = FIELD_GET(CXL_DVSEC_CXL_RST_TIMEOUT_MASK, reg); - timeout_ms = reset_timeouts_ms[ind]; - - /* Write reset config. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_INIT_CXL_RESET; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* Wait till timeout and then check reset status is complete. */ - msleep(timeout_ms); - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_STATUS2_OFFSET, ®); - if (rc) - return rc; - if (reg & CXL_DVSEC_CXL_RESET_ERR || - ~reg & CXL_DVSEC_CXL_RST_COMPLETE) - return -ETIMEDOUT; - - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg &= (~CXL_DVSEC_DISABLE_CACHING); - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - return 0; -} - -/** - * cxl_reset - initiate a cxl reset - * @dev: device to reset - * @probe: if true, return 0 if device can be reset this way - * - * Initiate a cxl reset on @dev. - */ -static int cxl_reset(struct pci_dev *dev, bool probe) -{ - u16 dvsec, reg; - int rc; - - dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PCIE_DEVICE); - if (!dvsec) - return -ENOTTY; - - /* Check if CXL Reset is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return -ENOTTY; - - if ((reg & CXL_DVSEC_CXL_RST_CAPABLE) == 0) - return -ENOTTY; - - if (probe) - return 0; - - rc = cxl_reset_prepare(dev, dvsec); - if (rc) - return rc; - - return cxl_reset_init(dev, dvsec); -} - void pci_dev_lock(struct pci_dev *dev) { /* block PM suspend, driver probe, etc. */ @@ -5307,7 +5184,15 @@ void pci_dev_unlock(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_dev_unlock); -static void pci_dev_save_and_disable(struct pci_dev *dev) +/** + * pci_dev_save_and_disable - Save device state and disable it + * @dev: PCI device to save and disable + * + * Save the PCI configuration state, invoke the driver's reset_prepare + * callback (if any), and disable the device by clearing the Command register. + * The device lock must be held by the caller. + */ +void pci_dev_save_and_disable(struct pci_dev *dev) { const struct pci_error_handlers *err_handler = dev->driver ? dev->driver->err_handler : NULL; @@ -5339,8 +5224,16 @@ static void pci_dev_save_and_disable(struct pci_dev *dev) */ pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); } +EXPORT_SYMBOL_GPL(pci_dev_save_and_disable); -static void pci_dev_restore(struct pci_dev *dev) +/** + * pci_dev_restore - Restore device state after reset + * @dev: PCI device to restore + * + * Restore the saved PCI configuration state and invoke the driver's + * reset_done callback (if any). The device lock must be held by the caller. + */ +void pci_dev_restore(struct pci_dev *dev) { const struct pci_error_handlers *err_handler = dev->driver ? dev->driver->err_handler : NULL; @@ -5357,6 +5250,7 @@ static void pci_dev_restore(struct pci_dev *dev) else if (dev->driver) pci_warn(dev, "reset done"); } +EXPORT_SYMBOL_GPL(pci_dev_restore); /* dev->reset_methods[] is a 0-terminated list of indices into this array */ const struct pci_reset_fn_method pci_reset_fn_methods[] = { @@ -5365,7 +5259,6 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = { { pci_dev_acpi_reset, .name = "acpi" }, { pcie_reset_flr, .name = "flr" }, { pci_af_flr, .name = "af_flr" }, - { cxl_reset, .name = "cxl_reset" }, { pci_pm_reset, .name = "pm" }, { pci_reset_bus_function, .name = "bus" }, { cxl_reset_bus_function, .name = "cxl_bus" }, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ee7b515125826..6167e0e204ade 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -119,15 +119,33 @@ struct pci_cap_saved_state { struct pci_cap_saved_data cap; }; +/* + * Virtual extended cap ID for CXL DVSEC state in the cap save chain. + */ +#define PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL 0xFFFF +static_assert(PCI_EXT_CAP_ID_MAX < PCI_EXT_CAP_ID_CXL_DVSEC_VIRTUAL); + void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size); int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size); +int pci_add_virtual_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, + unsigned int size); struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap); struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, u16 cap); +#ifdef CONFIG_PCI_CXL +void pci_allocate_cxl_save_buffer(struct pci_dev *dev); +void pci_save_cxl_state(struct pci_dev *dev); +void pci_restore_cxl_state(struct pci_dev *dev); +#else +static inline void pci_allocate_cxl_save_buffer(struct pci_dev *dev) { } +static inline void pci_save_cxl_state(struct pci_dev *dev) { } +static inline void pci_restore_cxl_state(struct pci_dev *dev) { } +#endif + #define PCI_PM_D2_DELAY 200 /* usec; see PCIe r4.0, sec 5.9.1 */ #define PCI_PM_D3HOT_WAIT 10 /* msec */ #define PCI_PM_D3COLD_WAIT 100 /* msec */ @@ -607,31 +625,56 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev) #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ +/** + * struct aer_err_info - AER Error Information + * @dev: Devices reporting error + * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log + * @__pad1: Padding for alignment + * @error_dev_num: Number of devices reporting an error + * @level: printk level to use in logging + * @id: Value from register PCI_ERR_ROOT_ERR_SRC + * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR + * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log + * @multi_error_valid: If multiple errors are reported + * @first_error: First reported error + * @__pad2: Padding for alignment + * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error + * @tlp_header_valid: Indicates if TLP field contains error information + * @status: COR/UNCOR error status + * @mask: COR/UNCOR mask + * @tlp: Transaction packet information + */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; - const char *level; /* printk level */ + const char *level; unsigned int id:16; - unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */ + unsigned int severity:2; + unsigned int root_ratelimit_print:1; unsigned int __pad1:4; unsigned int multi_error_valid:1; unsigned int first_error:5; - unsigned int __pad2:2; + unsigned int __pad2:1; + unsigned int is_cxl:1; unsigned int tlp_header_valid:1; - unsigned int status; /* COR/UNCOR Error Status */ - unsigned int mask; /* COR/UNCOR Error Mask */ - struct pcie_tlp_log tlp; /* TLP Header */ + unsigned int status; + unsigned int mask; + struct pcie_tlp_log tlp; }; int aer_get_device_error_info(struct aer_err_info *info, int i); void aer_print_error(struct aer_err_info *info, int i); +static inline const char *aer_err_bus(struct aer_err_info *info) +{ + return info->is_cxl ? "CXL" : "PCIe"; +} + int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 17919b99fa66a..207c2deae35ff 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -49,15 +49,6 @@ config PCIEAER_INJECT gotten from: https://github.com/intel/aer-inject.git -config PCIEAER_CXL - bool "PCI Express CXL RAS support" - default y - depends on PCIEAER && CXL_PCI - help - Enables CXL error handling. - - If unsure, say Y. - # # PCI Express ECRC # diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 173829aa02e60..b0b43a18c304b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o +obj-$(CONFIG_CXL_RAS) += aer_cxl_rch.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3dba9c0c6ae11..5331a1c908375 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -844,6 +844,7 @@ void aer_print_error(struct aer_err_info *info, int i) struct pci_dev *dev; int layer, agent, id; const char *level = info->level; + const char *bus_type = aer_err_bus(info); if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) return; @@ -853,22 +854,22 @@ void aer_print_error(struct aer_err_info *info, int i) pci_dev_aer_stats_incr(dev, info); trace_aer_event(pci_name(dev), (info->status & ~info->mask), - info->severity, info->tlp_header_valid, &info->tlp); + info->severity, info->tlp_header_valid, &info->tlp, bus_type); if (!info->ratelimit_print[i]) return; if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", - aer_error_severity_string[info->severity]); + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + bus_type, aer_error_severity_string[info->severity]); goto out; } layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], + aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", + bus_type, aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", @@ -902,6 +903,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { + const char *bus_type; int layer, agent, tlp_header_valid = 0; u32 status, mask; struct aer_err_info info = { @@ -922,10 +924,13 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.status = status; info.mask = mask; + info.is_cxl = pcie_is_cxl(dev); + + bus_type = aer_err_bus(&info); pci_dev_aer_stats_incr(dev, &info); - trace_aer_event(pci_name(dev), (status & ~mask), - aer_severity, tlp_header_valid, &aer->header_log); + trace_aer_event(pci_name(dev), (status & ~mask), aer_severity, + tlp_header_valid, &aer->header_log, bus_type); if (!aer_ratelimit(dev, info.severity)) return; @@ -1094,8 +1099,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - /** * pci_aer_unmask_internal_errors - unmask internal errors * @dev: pointer to the pci_dev data structure @@ -1106,7 +1109,7 @@ static bool find_source_device(struct pci_dev *parent, * Note: AER must be enabled and supported by the device which must be * checked in advance, e.g. with pcie_aer_is_native(). */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +void pci_aer_unmask_internal_errors(struct pci_dev *dev) { int aer = dev->aer_cap; u32 mask; @@ -1120,117 +1123,20 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev) pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); } -static bool is_cxl_mem_dev(struct pci_dev *dev) -{ - /* - * The capability, status, and control fields in Device 0, - * Function 0 DVSEC control the CXL functionality of the - * entire device (CXL 3.0, 8.1.3). - */ - if (dev->devfn != PCI_DEVFN(0, 0)) - return false; - - /* - * CXL Memory Devices must have the 502h class code set (CXL - * 3.0, 8.1.12.1). - */ - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) - return false; - - return true; -} - -static bool cxl_error_is_native(struct pci_dev *dev) -{ - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - - return (pcie_ports_native || host->native_aer); -} +/* + * Internal errors are too device-specific to enable generally, however for CXL + * their behavior is standardized for conveying CXL protocol errors. + */ +EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core"); -static bool is_internal_error(struct aer_err_info *info) +#ifdef CONFIG_CXL_RAS +bool is_aer_internal_error(struct aer_err_info *info) { if (info->severity == AER_CORRECTABLE) return info->status & PCI_ERR_COR_INTERNAL; return info->status & PCI_ERR_UNC_INTN; } - -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *info = (struct aer_err_info *)data; - const struct pci_error_handlers *err_handler; - - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) - return 0; - - /* Protect dev->driver */ - device_lock(&dev->dev); - - err_handler = dev->driver ? dev->driver->err_handler : NULL; - if (!err_handler) - goto out; - - if (info->severity == AER_CORRECTABLE) { - if (err_handler->cor_error_detected) - err_handler->cor_error_detected(dev); - } else if (err_handler->error_detected) { - if (info->severity == AER_NONFATAL) - err_handler->error_detected(dev, pci_channel_io_normal); - else if (info->severity == AER_FATAL) - err_handler->error_detected(dev, pci_channel_io_frozen); - } -out: - device_unlock(&dev->dev); - return 0; -} - -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) -{ - /* - * Internal errors of an RCEC indicate an AER error in an - * RCH's downstream port. Check and handle them in the CXL.mem - * device driver. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_internal_error(info)) - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); -} - -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) -{ - bool *handles_cxl = data; - - if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); - - /* Non-zero terminates iteration */ - return *handles_cxl; -} - -static bool handles_cxl_errors(struct pci_dev *rcec) -{ - bool handles_cxl = false; - - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && - pcie_aer_is_native(rcec)) - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); - - return handles_cxl; -} - -static void cxl_rch_enable_rcec(struct pci_dev *rcec) -{ - if (!handles_cxl_errors(rcec)) - return; - - pci_aer_unmask_internal_errors(rcec); - pci_info(rcec, "CXL: Internal errors unmasked"); -} - -#else -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } -static inline void cxl_rch_handle_error(struct pci_dev *dev, - struct aer_err_info *info) { } #endif /** @@ -1379,6 +1285,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i) /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; + info->is_cxl = pcie_is_cxl(dev); /* The device might not support AER */ if (!aer) diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c new file mode 100644 index 0000000000000..e471eefec9c40 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_rch.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include "../pci.h" +#include "portdrv.h" + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + guard(device)(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + return 0; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } + return 0; +} + +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_aer_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index bd29d1cc7b8bd..cc58bf2f2c844 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -123,4 +123,16 @@ static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ struct device *pcie_port_find_device(struct pci_dev *dev, u32 service); + +struct aer_err_info; + +#ifdef CONFIG_CXL_RAS +bool is_aer_internal_error(struct aer_err_info *info); +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); +void cxl_rch_enable_rcec(struct pci_dev *rcec); +#else +static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; } +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } +#endif /* CONFIG_CXL_RAS */ #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index eb084877bb043..06cb9081d4ac4 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1692,6 +1692,35 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) dev->is_thunderbolt = 1; } +static void set_pcie_cxl(struct pci_dev *dev) +{ + struct pci_dev *bridge; + u16 dvsec, cap; + + if (!pci_is_pcie(dev)) + return; + + /* + * Update parent's CXL state because alternate protocol training + * may have changed + */ + bridge = pci_upstream_bridge(dev); + if (bridge) + set_pcie_cxl(bridge); + + dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_FLEXBUS_PORT); + if (!dvsec) + return; + + pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS, + &cap); + + dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE, cap) || + FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM, cap); + +} + static void set_pcie_untrusted(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); @@ -2022,6 +2051,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_cxl(dev); + set_pcie_untrusted(dev); if (pci_is_pcie(dev)) diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c index b6e06c4d2117f..79681afea8c61 100644 --- a/drivers/soc/amlogic/meson-canvas.c +++ b/drivers/soc/amlogic/meson-canvas.c @@ -60,12 +60,9 @@ struct meson_canvas *meson_canvas_get(struct device *dev) return ERR_PTR(-ENODEV); canvas_pdev = of_find_device_by_node(canvas_node); - if (!canvas_pdev) { - of_node_put(canvas_node); - return ERR_PTR(-EPROBE_DEFER); - } - of_node_put(canvas_node); + if (!canvas_pdev) + return ERR_PTR(-EPROBE_DEFER); /* * If priv is NULL, it's probably because the canvas hasn't @@ -73,10 +70,9 @@ struct meson_canvas *meson_canvas_get(struct device *dev) * current state, this driver probe cannot return -EPROBE_DEFER */ canvas = dev_get_drvdata(&canvas_pdev->dev); - if (!canvas) { - put_device(&canvas_pdev->dev); + put_device(&canvas_pdev->dev); + if (!canvas) return ERR_PTR(-EINVAL); - } return canvas; } diff --git a/drivers/soc/amlogic/meson-gx-socinfo.c b/drivers/soc/amlogic/meson-gx-socinfo.c index 7549f1644e5ea..2a54ca43cd13e 100644 --- a/drivers/soc/amlogic/meson-gx-socinfo.c +++ b/drivers/soc/amlogic/meson-gx-socinfo.c @@ -46,6 +46,9 @@ static const struct meson_gx_soc_id { { "A5", 0x3c }, { "C3", 0x3d }, { "A4", 0x40 }, + { "S7", 0x46 }, + { "S7D", 0x47 }, + { "S6", 0x48 }, }; static const struct meson_gx_package_id { @@ -86,6 +89,9 @@ static const struct meson_gx_package_id { { "A311D2", 0x36, 0x1, 0xf }, { "A113X2", 0x3c, 0x1, 0xf }, { "A113L2", 0x40, 0x1, 0xf }, + { "S805X3", 0x46, 0x3, 0xf }, + { "S905X5M", 0x47, 0x1, 0xf }, + { "S905X5", 0x48, 0x1, 0xf }, }; static inline unsigned int socinfo_to_major(u32 socinfo) diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c index 49a0955e82d6c..1685da1da23d0 100644 --- a/drivers/soc/apple/mailbox.c +++ b/drivers/soc/apple/mailbox.c @@ -299,11 +299,18 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index) return ERR_PTR(-EPROBE_DEFER); mbox = platform_get_drvdata(pdev); - if (!mbox) - return ERR_PTR(-EPROBE_DEFER); + if (!mbox) { + mbox = ERR_PTR(-EPROBE_DEFER); + goto out_put_pdev; + } + + if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) { + mbox = ERR_PTR(-ENODEV); + goto out_put_pdev; + } - if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) - return ERR_PTR(-ENODEV); +out_put_pdev: + put_device(&pdev->dev); return mbox; } diff --git a/drivers/soc/apple/sart.c b/drivers/soc/apple/sart.c index afa1117368997..6952afc41308a 100644 --- a/drivers/soc/apple/sart.c +++ b/drivers/soc/apple/sart.c @@ -164,17 +164,11 @@ static int apple_sart_probe(struct platform_device *pdev) return 0; } -static void apple_sart_put_device(void *dev) -{ - put_device(dev); -} - struct apple_sart *devm_apple_sart_get(struct device *dev) { struct device_node *sart_node; struct platform_device *sart_pdev; struct apple_sart *sart; - int ret; sart_node = of_parse_phandle(dev->of_node, "apple,sart", 0); if (!sart_node) @@ -192,14 +186,11 @@ struct apple_sart *devm_apple_sart_get(struct device *dev) return ERR_PTR(-EPROBE_DEFER); } - ret = devm_add_action_or_reset(dev, apple_sart_put_device, - &sart_pdev->dev); - if (ret) - return ERR_PTR(ret); - device_link_add(dev, &sart_pdev->dev, DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER); + put_device(&sart_pdev->dev); + return sart; } EXPORT_SYMBOL_GPL(devm_apple_sart_get); diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 9be240999f877..6b392b3ad4b15 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -1073,7 +1073,7 @@ EXPORT_SYMBOL(qman_portal_set_iperiod); int qman_wq_alloc(void) { - qm_portal_wq = alloc_workqueue("qman_portal_wq", 0, 1); + qm_portal_wq = alloc_workqueue("qman_portal_wq", WQ_PERCPU, 1); if (!qm_portal_wq) return -ENOMEM; return 0; diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c index f4d3c2146f4f0..18131a5d5979a 100644 --- a/drivers/soc/fsl/qbman/qman_test_stash.c +++ b/drivers/soc/fsl/qbman/qman_test_stash.c @@ -219,7 +219,7 @@ static int allocate_frame_data(void) pcfg = qman_get_qm_portal_config(qman_dma_portal); - __frame_ptr = kmalloc(4 * HP_NUM_WORDS, GFP_KERNEL); + __frame_ptr = kmalloc_array(4, HP_NUM_WORDS, GFP_KERNEL); if (!__frame_ptr) return -ENOMEM; diff --git a/drivers/soc/microchip/Kconfig b/drivers/soc/microchip/Kconfig index 19f4b576f822b..bcf5546025610 100644 --- a/drivers/soc/microchip/Kconfig +++ b/drivers/soc/microchip/Kconfig @@ -9,3 +9,15 @@ config POLARFIRE_SOC_SYS_CTRL module will be called mpfs_system_controller. If unsure, say N. + +config POLARFIRE_SOC_SYSCONS + bool "PolarFire SoC (MPFS) syscon drivers" + default y + depends on ARCH_MICROCHIP + select MFD_CORE + help + These drivers add support for the syscons on PolarFire SoC (MPFS). + Without these drivers core parts of the kernel such as clocks + and resets will not function correctly. + + If unsure, and on a PolarFire SoC, say y. diff --git a/drivers/soc/microchip/Makefile b/drivers/soc/microchip/Makefile index 14489919fe4b3..1a3a1594b089b 100644 --- a/drivers/soc/microchip/Makefile +++ b/drivers/soc/microchip/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_POLARFIRE_SOC_SYS_CTRL) += mpfs-sys-controller.o +obj-$(CONFIG_POLARFIRE_SOC_SYSCONS) += mpfs-control-scb.o mpfs-mss-top-sysreg.o diff --git a/drivers/soc/microchip/mpfs-control-scb.c b/drivers/soc/microchip/mpfs-control-scb.c new file mode 100644 index 0000000000000..f0b84b1f49cbc --- /dev/null +++ b/drivers/soc/microchip/mpfs-control-scb.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +static const struct mfd_cell mpfs_control_scb_devs[] = { + MFD_CELL_NAME("mpfs-tvs"), +}; + +static int mpfs_control_scb_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + return mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_control_scb_devs, + ARRAY_SIZE(mpfs_control_scb_devs), NULL, 0, NULL); +} + +static const struct of_device_id mpfs_control_scb_of_match[] = { + { .compatible = "microchip,mpfs-control-scb", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpfs_control_scb_of_match); + +static struct platform_driver mpfs_control_scb_driver = { + .driver = { + .name = "mpfs-control-scb", + .of_match_table = mpfs_control_scb_of_match, + }, + .probe = mpfs_control_scb_probe, +}; +module_platform_driver(mpfs_control_scb_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Conor Dooley "); +MODULE_DESCRIPTION("PolarFire SoC control scb driver"); diff --git a/drivers/soc/microchip/mpfs-mss-top-sysreg.c b/drivers/soc/microchip/mpfs-mss-top-sysreg.c new file mode 100644 index 0000000000000..b2244e44ff0fa --- /dev/null +++ b/drivers/soc/microchip/mpfs-mss-top-sysreg.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +static const struct mfd_cell mpfs_mss_top_sysreg_devs[] = { + MFD_CELL_NAME("mpfs-reset"), +}; + +static int mpfs_mss_top_sysreg_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int ret; + + ret = mfd_add_devices(dev, PLATFORM_DEVID_NONE, mpfs_mss_top_sysreg_devs, + ARRAY_SIZE(mpfs_mss_top_sysreg_devs) , NULL, 0, NULL); + if (ret) + return ret; + + return devm_of_platform_populate(dev); +} + +static const struct of_device_id mpfs_mss_top_sysreg_of_match[] = { + { .compatible = "microchip,mpfs-mss-top-sysreg", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mpfs_mss_top_sysreg_of_match); + +static struct platform_driver mpfs_mss_top_sysreg_driver = { + .driver = { + .name = "mpfs-mss-top-sysreg", + .of_match_table = mpfs_mss_top_sysreg_of_match, + }, + .probe = mpfs_mss_top_sysreg_probe, +}; +module_platform_driver(mpfs_mss_top_sysreg_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Conor Dooley "); +MODULE_DESCRIPTION("PolarFire SoC mss top sysreg driver"); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 99fd1588ff382..eb787dfbd2fa0 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -560,8 +560,8 @@ struct acpi_cedt_cfmws_target_element { /* Values for Restrictions field above */ -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2 (1) -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3 (1<<1) +#define ACPI_CEDT_CFMWS_RESTRICT_DEVMEM (1) +#define ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM (1<<1) #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE (1<<2) #define ACPI_CEDT_CFMWS_RESTRICT_PMEM (1<<3) #define ACPI_CEDT_CFMWS_RESTRICT_FIXED (1<<4) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 0000000000000..7d0b09ff57681 --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. */ +/* Copyright(c) 2025 Advanced Micro Devices, Inc. */ + +#ifndef __CXL_CXL_H__ +#define __CXL_CXL_H__ + +#include +#include +#include +#include +#include + +/** + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + +struct device; + +/* + * Using struct_group() allows for per register-block-type helper routines, + * without requiring block-type agnostic code to include the prefix. + */ +struct cxl_regs { + /* + * Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure + */ + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + /* + * Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); +}; + +#define CXL_CM_CAP_CAP_ID_RAS 0x2 +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + +/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K + +/* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers */ +#define CXL_CM_OFFSET 0x1000 +#define CXL_CM_CAP_HDR_OFFSET 0x0 +#define CXL_CM_CAP_HDR_ID_MASK GENMASK(15, 0) +#define CM_CAP_HDR_CAP_ID 1 +#define CXL_CM_CAP_HDR_VERSION_MASK GENMASK(19, 16) +#define CM_CAP_HDR_CAP_VERSION 1 +#define CXL_CM_CAP_HDR_CACHE_MEM_VERSION_MASK GENMASK(23, 20) +#define CM_CAP_HDR_CACHE_MEM_VERSION 1 +#define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) +#define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) + +/* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ +#define CXL_HDM_DECODER_CAP_OFFSET 0x0 +#define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_INTERLEAVE_3_6_12_WAY BIT(11) +#define CXL_HDM_DECODER_INTERLEAVE_16_WAY BIT(12) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE BIT(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) +#define CXL_HDM_DECODER0_CTRL_COMMIT_ERROR BIT(11) +#define CXL_HDM_DECODER0_CTRL_HOSTONLY BIT(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) +#define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) +#define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) + +/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ +#define CXL_DECODER_MIN_GRANULARITY 256 +#define CXL_DECODER_MAX_ENCODED_IG 6 + +static inline int cxl_hdm_decoder_count(u32 cap_hdr) +{ + int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); + + return val ? val * 2 : 1; +} + +struct cxl_reg_map { + bool valid; + int id; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + +/** + * struct cxl_register_map - DVSEC harvested register block mapping parameters + * @host: device for devm operations and logging + * @base: virtual base of the register-block-BAR + @block_offset + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search + * @reg_type: see enum cxl_regloc_type + * @component_map: cxl_reg_map for component registers + * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + */ +struct cxl_register_map { + struct device *host; + void __iomem *base; + resource_size_t resource; + resource_size_t max_size; + u8 reg_type; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; + }; +}; + +/** + * struct cxl_dpa_perf - DPA performance property entry + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT + * @qos_class: QoS Class cookies + */ +struct cxl_dpa_perf { + struct range dpa_range; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; + int qos_class; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +/* + * cxl_decoder flags that define the type of memory / devices this + * decoder supports as well as configuration lock status See "CXL 2.0 + * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. + */ +#define CXL_DECODER_F_RAM BIT(0) +#define CXL_DECODER_F_PMEM BIT(1) +#define CXL_DECODER_F_TYPE2 BIT(2) +#define CXL_DECODER_F_TYPE3 BIT(3) +#define CXL_DECODER_F_LOCK BIT(4) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) + +struct cxl_memdev_attach { + int (*probe)(struct cxl_memdev *cxlmd); +}; + +/** + * struct cxl_dev_state - The driver device state + * + * cxl_dev_state represents the CXL driver/device state. It provides an + * interface to mailbox commands as well as some cached data about the device. + * Currently only memory devices are represented. + * + * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) + * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context + */ +struct cxl_dev_state { + /* public for Type2 drivers */ + struct device *dev; + struct cxl_memdev *cxlmd; + + /* private for Type2 drivers */ + struct cxl_register_map reg_map; + struct cxl_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; + u64 serial; + enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif +}; + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: drv_struct member as cxl_dev_state + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) + +/** + * cxl_map_component_regs - map cxl component registers + * + * @map: cxl register map to update with the mappings + * @regs: cxl component registers to work with + * @map_mask: cxl component regs to map + * + * Returns integer: success (0) or error (-ENOMEM) + * + * Made public for Type2 driver support. + */ +int cxl_map_component_regs(const struct cxl_register_map *map, + struct cxl_component_regs *regs, + unsigned long map_mask); +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); +struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds, + const struct cxl_memdev_attach *attach); +struct cxl_region; +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr); +struct range; +int cxl_get_region_range(struct cxl_region *region, struct range *range); +void cxl_unregister_region(struct cxl_region *cxlr); +struct cxl_port; +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max); +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc); +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways); +#endif /* __CXL_CXL_H__ */ diff --git a/include/cxl/pci.h b/include/cxl/pci.h new file mode 100644 index 0000000000000..edbf980c283f1 --- /dev/null +++ b/include/cxl/pci.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ + +#ifndef __CXL_CXL_PCI_H__ +#define __CXL_CXL_PCI_H__ + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, + CXL_REGLOC_RBI_TYPES +}; + +struct pci_dev; +struct cxl_register_map; + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +int cxl_setup_regs(struct cxl_register_map *map); +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 42cbeaba2a510..0c6087ea979b2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1637,18 +1637,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324e..df0f5c382286f 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -56,12 +56,14 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, diff --git a/include/linux/cache_coherency.h b/include/linux/cache_coherency.h new file mode 100644 index 0000000000000..cc81c5733e316 --- /dev/null +++ b/include/linux/cache_coherency.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Cache coherency maintenance operation device drivers + * + * Copyright Huawei 2025 + */ +#ifndef _LINUX_CACHE_COHERENCY_H_ +#define _LINUX_CACHE_COHERENCY_H_ + +#include +#include +#include + +struct cc_inval_params { + phys_addr_t addr; + size_t size; +}; + +struct cache_coherency_ops_inst; + +struct cache_coherency_ops { + int (*wbinv)(struct cache_coherency_ops_inst *cci, + struct cc_inval_params *invp); + int (*done)(struct cache_coherency_ops_inst *cci); +}; + +struct cache_coherency_ops_inst { + struct kref kref; + struct list_head node; + const struct cache_coherency_ops *ops; +}; + +int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci); +void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci); + +struct cache_coherency_ops_inst * +_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops, + size_t size); +/** + * cache_coherency_ops_instance_alloc - Allocate cache coherency ops instance + * @ops: Cache maintenance operations + * @drv_struct: structure that contains the struct cache_coherency_ops_inst + * @member: Name of the struct cache_coherency_ops_inst member in @drv_struct. + * + * This allocates a driver specific structure and initializes the + * cache_coherency_ops_inst embedded in the drv_struct. Upon success the + * pointer must be freed via cache_coherency_ops_instance_put(). + * + * Returns a &drv_struct * on success, %NULL on error. + */ +#define cache_coherency_ops_instance_alloc(ops, drv_struct, member) \ + ({ \ + static_assert(__same_type(struct cache_coherency_ops_inst, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_cache_coherency_ops_instance_alloc(ops, \ + sizeof(drv_struct)); \ + }) +void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci); + +#endif diff --git a/include/linux/memory.h b/include/linux/memory.h index 2a770e7c6ab1e..55f0a47c85ebf 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,13 +115,13 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 diff --git a/include/linux/memregion.h b/include/linux/memregion.h index c013214677897..a55f62cc52660 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -26,8 +26,10 @@ static inline void memregion_free(int id) /** * cpu_cache_invalidate_memregion - drop any CPU cached data for - * memregions described by @res_desc - * @res_desc: one of the IORES_DESC_* types + * memregion + * @start: start physical address of the target memory region. + * @len: length of the target memory region. -1 for all the regions of + * the target type. * * Perform cache maintenance after a memory event / operation that * changes the contents of physical memory in a cache-incoherent manner. @@ -46,7 +48,7 @@ static inline void memregion_free(int id) * the cache maintenance. */ #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION -int cpu_cache_invalidate_memregion(int res_desc); +int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len); bool cpu_cache_has_invalidate_memregion(void); #else static inline bool cpu_cache_has_invalidate_memregion(void) @@ -54,10 +56,16 @@ static inline bool cpu_cache_has_invalidate_memregion(void) return false; } -static inline int cpu_cache_invalidate_memregion(int res_desc) +static inline int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len) { WARN_ON_ONCE("CPU cache invalidation required"); return -ENXIO; } #endif + +static inline int cpu_cache_invalidate_all(void) +{ + return cpu_cache_invalidate_memregion(0, -1); +} + #endif /* _MEMREGION_H_ */ diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21a..866e3323f1fdc 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { diff --git a/include/linux/pci.h b/include/linux/pci.h index a5837cd74faad..60edd5520f751 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -51,7 +51,7 @@ PCI_STATUS_PARITY) /* Number of reset methods used in pci_reset_fn_methods array in pci.c */ -#define PCI_NUM_RESET_METHODS 9 +#define PCI_NUM_RESET_METHODS 8 #define PCI_RESET_PROBE true #define PCI_RESET_DO_RESET false @@ -466,6 +466,7 @@ struct pci_dev { unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int is_cxl:1; /* Compute Express Link (CXL) */ /* * Devices marked being untrusted are the ones that can potentially * execute DMA attacks and similar. They are typically connected @@ -773,6 +774,11 @@ static inline bool pci_is_display(struct pci_dev *pdev) return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; } +static inline bool pcie_is_cxl(struct pci_dev *pci_dev) +{ + return pci_dev->is_cxl; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -1956,6 +1962,9 @@ int pci_dev_trylock(struct pci_dev *dev); void pci_dev_unlock(struct pci_dev *dev); DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T)) +void pci_dev_save_and_disable(struct pci_dev *dev); +void pci_dev_restore(struct pci_dev *dev); + /* * PCI domain support. Sometimes called PCI segment (eg by ACPI), * a PCI domain is defined to be a set of PCI buses which share diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index fecfeb7c8be7f..3523cc8597612 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -301,9 +301,11 @@ TRACE_EVENT(aer_event, const u32 status, const u8 severity, const u8 tlp_header_valid, - struct pcie_tlp_log *tlp), + struct pcie_tlp_log *tlp, + const char *bus_type), - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), + + TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp, bus_type), TP_STRUCT__entry( __string( dev_name, dev_name ) @@ -311,10 +313,12 @@ TRACE_EVENT(aer_event, __field( u8, severity ) __field( u8, tlp_header_valid) __array( u32, tlp_header, PCIE_STD_MAX_TLP_HEADERLOG) + __string( bus_type, bus_type ) ), TP_fast_assign( __assign_str(dev_name); + __assign_str(bus_type); __entry->status = status; __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; @@ -326,8 +330,8 @@ TRACE_EVENT(aer_event, } ), - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", - __get_str(dev_name), + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", + __get_str(dev_name), __get_str(bus_type), __entry->severity == AER_CORRECTABLE ? "Corrected" : __entry->severity == AER_FATAL ? "Fatal" : "Uncorrected, non-fatal", diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index bfa9ada355c9b..0b16aa7864f42 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1234,14 +1234,83 @@ /* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */ #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 +/* + * Compute Express Link (CXL r4.0, sec 8.1) + * + * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state + * is "disconnected" (CXL r4.0, sec 9.12.3). Re-enumerate these + * registers on downstream link-up events. + */ -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_CACHE_CAPABLE BIT(0) +/* CXL r4.0, 8.1.3: PCIe DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE 0 +#define PCI_DVSEC_CXL_CAP 0xA +#define PCI_DVSEC_CXL_CACHE_CAPABLE _BITUL(0) +#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) +#define PCI_DVSEC_CXL_HDM_COUNT __GENMASK(5, 4) +#define PCI_DVSEC_CXL_CACHE_WBI_CAPABLE _BITUL(6) +#define PCI_DVSEC_CXL_RST_CAPABLE _BITUL(7) +#define PCI_DVSEC_CXL_RST_TIMEOUT __GENMASK(10, 8) +#define PCI_DVSEC_CXL_RST_MEM_CLR_CAPABLE _BITUL(11) +#define PCI_DVSEC_CXL_CTRL 0xC +#define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) +#define PCI_DVSEC_CXL_CTRL_RWL 0x5FED +#define PCI_DVSEC_CXL_CTRL2 0x10 +#define PCI_DVSEC_CXL_DISABLE_CACHING _BITUL(0) +#define PCI_DVSEC_CXL_INIT_CACHE_WBI _BITUL(1) +#define PCI_DVSEC_CXL_INIT_CXL_RST _BITUL(2) +#define PCI_DVSEC_CXL_RST_MEM_CLR_EN _BITUL(3) +#define PCI_DVSEC_CXL_STATUS2 0x12 +#define PCI_DVSEC_CXL_CACHE_INV _BITUL(0) +#define PCI_DVSEC_CXL_RST_DONE _BITUL(1) +#define PCI_DVSEC_CXL_RST_ERR _BITUL(2) +#define PCI_DVSEC_CXL_LOCK 0x14 +#define PCI_DVSEC_CXL_LOCK_CONFIG _BITUL(0) +#define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) +#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1) +#define PCI_DVSEC_CXL_MEM_SIZE_LOW __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_BASE_HI_RWL 0xFFFFFFFF +#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_BASE_LOW __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_LO_RWL 0xF0000000 + +#define CXL_DVSEC_RANGE_MAX 2 + +/* CXL r4.0, 8.1.4: Non-CXL Function Map DVSEC */ +#define PCI_DVSEC_CXL_FUNCTION_MAP 2 +#define PCI_DVSEC_CXL_FUNCTION_MAP_REG 0x0C + +/* CXL r4.0, 8.1.5: Extensions DVSEC for Ports */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL r4.0, 8.1.6: GPF DVSEC for CXL Port */ +#define PCI_DVSEC_CXL_PORT_GPF 4 +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL 0x0C +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE __GENMASK(11, 8) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL 0xE +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE __GENMASK(11, 8) + +/* CXL r4.0, 8.1.7: GPF DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE_GPF 5 + +/* CXL r4.0, 8.1.8: Flex Bus DVSEC */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS 0xE +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_CACHE _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_PORT_STATUS_MEM _BITUL(2) + +/* CXL r4.0, 8.1.9: Register Locator DVSEC */ +#define PCI_DVSEC_CXL_REG_LOCATOR 8 +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1 0xC +#define PCI_DVSEC_CXL_REG_LOCATOR_BIR __GENMASK(2, 0) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID __GENMASK(15, 8) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW __GENMASK(31, 16) #endif /* LINUX_PCI_REGS_H */ diff --git a/lib/Kconfig b/lib/Kconfig index c483951b624ff..cd8e5844f9bb6 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -543,6 +543,9 @@ config MEMREGION config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION bool +config GENERIC_CPU_CACHE_MAINTENANCE + bool + config ARCH_HAS_MEMREMAP_COMPAT_ALIGN bool diff --git a/lib/Makefile b/lib/Makefile index 392ff808c9b90..eed20c50f358b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -130,6 +130,8 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-$(CONFIG_GENERIC_CPU_CACHE_MAINTENANCE) += cache_maint.o + lib-y += logic_pio.o lib-$(CONFIG_INDIRECT_IOMEM) += logic_iomem.o diff --git a/lib/cache_maint.c b/lib/cache_maint.c new file mode 100644 index 0000000000000..9256a9ffc34c7 --- /dev/null +++ b/lib/cache_maint.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic support for Memory System Cache Maintenance operations. + * + * Coherency maintenance drivers register with this simple framework that will + * iterate over each registered instance to first kick off invalidation and + * then to wait until it is complete. + * + * If no implementations are registered yet cpu_cache_has_invalidate_memregion() + * will return false. If this runs concurrently with unregistration then a + * race exists but this is no worse than the case where the operations instance + * responsible for a given memory region has not yet registered. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(cache_ops_instance_list); +static DECLARE_RWSEM(cache_ops_instance_list_lock); + +static void __cache_coherency_ops_instance_free(struct kref *kref) +{ + struct cache_coherency_ops_inst *cci = + container_of(kref, struct cache_coherency_ops_inst, kref); + kfree(cci); +} + +void cache_coherency_ops_instance_put(struct cache_coherency_ops_inst *cci) +{ + kref_put(&cci->kref, __cache_coherency_ops_instance_free); +} +EXPORT_SYMBOL_GPL(cache_coherency_ops_instance_put); + +static int cache_inval_one(struct cache_coherency_ops_inst *cci, void *data) +{ + if (!cci->ops) + return -EINVAL; + + return cci->ops->wbinv(cci, data); +} + +static int cache_inval_done_one(struct cache_coherency_ops_inst *cci) +{ + if (!cci->ops) + return -EINVAL; + + if (!cci->ops->done) + return 0; + + return cci->ops->done(cci); +} + +static int cache_invalidate_memregion(phys_addr_t addr, size_t size) +{ + int ret; + struct cache_coherency_ops_inst *cci; + struct cc_inval_params params = { + .addr = addr, + .size = size, + }; + + guard(rwsem_read)(&cache_ops_instance_list_lock); + list_for_each_entry(cci, &cache_ops_instance_list, node) { + ret = cache_inval_one(cci, ¶ms); + if (ret) + return ret; + } + list_for_each_entry(cci, &cache_ops_instance_list, node) { + ret = cache_inval_done_one(cci); + if (ret) + return ret; + } + + return 0; +} + +struct cache_coherency_ops_inst * +_cache_coherency_ops_instance_alloc(const struct cache_coherency_ops *ops, + size_t size) +{ + struct cache_coherency_ops_inst *cci; + + if (!ops || !ops->wbinv) + return NULL; + + cci = kzalloc(size, GFP_KERNEL); + if (!cci) + return NULL; + + cci->ops = ops; + INIT_LIST_HEAD(&cci->node); + kref_init(&cci->kref); + + return cci; +} +EXPORT_SYMBOL_NS_GPL(_cache_coherency_ops_instance_alloc, "CACHE_COHERENCY"); + +int cache_coherency_ops_instance_register(struct cache_coherency_ops_inst *cci) +{ + guard(rwsem_write)(&cache_ops_instance_list_lock); + list_add(&cci->node, &cache_ops_instance_list); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_register, "CACHE_COHERENCY"); + +void cache_coherency_ops_instance_unregister(struct cache_coherency_ops_inst *cci) +{ + guard(rwsem_write)(&cache_ops_instance_list_lock); + list_del(&cci->node); +} +EXPORT_SYMBOL_NS_GPL(cache_coherency_ops_instance_unregister, "CACHE_COHERENCY"); + +int cpu_cache_invalidate_memregion(phys_addr_t start, size_t len) +{ + return cache_invalidate_memregion(start, len); +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, "DEVMEM"); + +/* + * Used for optimization / debug purposes only as removal can race + * + * Machines that do not support invalidation, e.g. VMs, will not have any + * operations instance to register and so this will always return false. + */ +bool cpu_cache_has_invalidate_memregion(void) +{ + guard(rwsem_read)(&cache_ops_instance_list_lock); + return !list_empty(&cache_ops_instance_list); +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, "DEVMEM"); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index d07f14cb7aa45..6eceefefb0e04 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -4,23 +4,19 @@ ldflags-y += --wrap=is_acpi_device_node ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register -ldflags-y += --wrap=devm_cxl_port_enumerate_dports -ldflags-y += --wrap=devm_cxl_setup_hdm -ldflags-y += --wrap=devm_cxl_add_passthrough_decoder -ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_hdm_decode_init -ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport -ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting +ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup +ldflags-y += --wrap=hmat_get_extended_linear_cache_size DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ ccflags-y += -D__mock=__weak +ccflags-y += -DCXL_TEST_ENABLE=1 ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/ obj-m += cxl_acpi.o @@ -61,12 +57,13 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o -cxl_core-y += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras_rch.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index f088792a8925f..6754de35598d5 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -2,6 +2,28 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include "cxl.h" +#include "exports.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); + +cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev = + __devm_cxl_add_dport_by_dev; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL"); + +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + return _devm_cxl_add_dport_by_dev(port, dport_dev); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); + +cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL"); + +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + return _devm_cxl_switch_port_decoders_setup(port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h new file mode 100644 index 0000000000000..7ebee7c0bd67e --- /dev/null +++ b/tools/testing/cxl/exports.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef __MOCK_CXL_EXPORTS_H_ +#define __MOCK_CXL_EXPORTS_H_ + +typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port, + struct device *dport_dev); +extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev; + +typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port); +extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup; + +#endif diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index 6b19278978561..af50972c8b6d3 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -4,6 +4,7 @@ ccflags-y := -I$(srctree)/drivers/cxl/ -I$(srctree)/drivers/cxl/core obj-m += cxl_test.o obj-m += cxl_mock.o obj-m += cxl_mock_mem.o +obj-m += cxl_translate.o cxl_test-y := cxl.o cxl_mock-y := mock.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index f4dceecf7e335..e68bf64460996 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -15,6 +15,7 @@ #include "mock.h" static int interleave_arithmetic; +static bool extended_linear_cache; #define FAKE_QTG_ID 42 @@ -26,6 +27,9 @@ static int interleave_arithmetic; #define NR_CXL_PORT_DECODERS 8 #define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH) +#define MOCK_AUTO_REGION_SIZE_DEFAULT SZ_512M +static int mock_auto_region_size = MOCK_AUTO_REGION_SIZE_DEFAULT; + static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; #define NR_MULTI_ROOT (NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS) @@ -210,7 +214,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -225,7 +229,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -240,7 +244,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -255,7 +259,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -270,7 +274,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -285,7 +289,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, @@ -302,7 +306,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -318,7 +322,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 1, .granularity = 0, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -334,7 +338,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 8, .granularity = 1, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_512M * 6UL, @@ -426,6 +430,22 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align) return res; } +/* Only update CFMWS0 as this is used by the auto region. */ +static void cfmws_elc_update(struct acpi_cedt_cfmws *window, int index) +{ + if (!extended_linear_cache) + return; + + if (index != 0) + return; + + /* + * The window size should be 2x of the CXL region size where half is + * DRAM and half is CXL + */ + window->window_size = mock_auto_region_size * 2; +} + static int populate_cedt(void) { struct cxl_mock_res *res; @@ -450,6 +470,7 @@ static int populate_cedt(void) for (i = cfmws_start; i <= cfmws_end; i++) { struct acpi_cedt_cfmws *window = mock_cfmws[i]; + cfmws_elc_update(window, i); res = alloc_mock_res(window->window_size, SZ_256M); if (!res) return -ENOMEM; @@ -591,6 +612,25 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, return AE_OK; } +static int +mock_hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *cache_size) +{ + struct acpi_cedt_cfmws *window = mock_cfmws[0]; + struct resource cfmws0_res = + DEFINE_RES_MEM(window->base_hpa, window->window_size); + + if (!extended_linear_cache || + !resource_contains(&cfmws0_res, backing_res)) { + return hmat_get_extended_linear_cache_size(backing_res, + nid, cache_size); + } + + *cache_size = mock_auto_region_size; + + return 0; +} + static struct pci_bus mock_pci_bus[NR_BRIDGES]; static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = { [0] = { @@ -643,15 +683,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) -{ - dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); - return -EOPNOTSUPP; -} - - struct target_map_ctx { - int *target_map; + u32 *target_map; int index; int target_count; }; @@ -808,7 +841,6 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) struct cxl_endpoint_decoder *cxled; struct cxl_switch_decoder *cxlsd; struct cxl_port *port, *iter; - const int size = SZ_512M; struct cxl_memdev *cxlmd; struct cxl_dport *dport; struct device *dev; @@ -864,9 +896,12 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) return; } + if (extended_linear_cache) + base += mock_auto_region_size; + cxld->hpa_range = (struct range) { .start = base, - .end = base + size - 1, + .end = base + mock_auto_region_size - 1, }; cxld->interleave_ways = 2; @@ -875,7 +910,8 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->flags = CXL_DECODER_F_ENABLE; cxled->state = CXL_DECODER_STATE_AUTO; port->commit_end = cxld->id; - devm_cxl_dpa_reserve(cxled, 0, size / cxld->interleave_ways, 0); + devm_cxl_dpa_reserve(cxled, 0, + mock_auto_region_size / cxld->interleave_ways, 0); cxld->commit = mock_decoder_commit; cxld->reset = mock_decoder_reset; @@ -894,15 +930,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) */ if (WARN_ON(!dev)) continue; + cxlsd = to_cxl_switch_decoder(dev); if (i == 0) { /* put cxl_mem.4 second in the decode order */ - if (pdev->id == 4) + if (pdev->id == 4) { cxlsd->target[1] = dport; - else + cxld->target_map[1] = dport->port_id; + } else { cxlsd->target[0] = dport; - } else + cxld->target_map[0] = dport->port_id; + } + } else { cxlsd->target[0] = dport; + cxld->target_map[0] = dport->port_id; + } cxld = &cxlsd->cxld; cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; @@ -918,7 +960,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->interleave_granularity = 4096; cxld->hpa_range = (struct range) { .start = base, - .end = base + size - 1, + .end = base + mock_auto_region_size - 1, }; put_device(dev); @@ -955,9 +997,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, target_count = NR_CXL_SWITCH_PORTS; for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; struct target_map_ctx ctx = { - .target_map = target_map, .target_count = target_count, }; struct cxl_decoder *cxld; @@ -986,6 +1026,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxled->cxld; } + ctx.target_map = cxld->target_map; + mock_init_hdm_decoder(cxld); if (target_count) { @@ -997,7 +1039,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } } - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -1013,10 +1055,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +static int __mock_cxl_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + cxlhdm = mock_cxl_setup_hdm(port, NULL); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) != -ENODEV) + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + return mock_cxl_enumerate_decoders(cxlhdm, NULL); +} + +static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int get_port_array(struct cxl_port *port, + struct platform_device ***port_array, + int *port_array_size) { struct platform_device **array; - int i, array_size; + int array_size; if (port->depth == 1) { if (is_multi_bridge(port->uport_dev)) { @@ -1050,9 +1124,24 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return -ENXIO; } + *port_array = array; + *port_array_size = array_size; + + return 0; +} + +static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct platform_device **array; + int rc, i, array_size; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return ERR_PTR(rc); + for (i = 0; i < array_size; i++) { struct platform_device *pdev = array[i]; - struct cxl_dport *dport; if (pdev->dev.parent != port->uport_dev) { dev_dbg(&port->dev, "%s: mismatch parent %s\n", @@ -1061,14 +1150,14 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) continue; } - dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, - CXL_RESOURCE_NONE); + if (&pdev->dev != dport_dev) + continue; - if (IS_ERR(dport)) - return PTR_ERR(dport); + return devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); } - return 0; + return ERR_PTR(-ENODEV); } /* @@ -1127,11 +1216,12 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, - .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, - .devm_cxl_setup_hdm = mock_cxl_setup_hdm, - .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, - .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, + .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup, + .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, + .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, + .hmat_get_extended_linear_cache_size = + mock_hmat_get_extended_linear_cache_size, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -1621,6 +1711,8 @@ static __exit void cxl_test_exit(void) module_param(interleave_arithmetic, int, 0444); MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); +module_param(extended_linear_cache, bool, 0444); +MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/cxl_translate.c b/tools/testing/cxl/test/cxl_translate.c new file mode 100644 index 0000000000000..2200ae21795c7 --- /dev/null +++ b/tools/testing/cxl/test/cxl_translate.c @@ -0,0 +1,445 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright(c) 2025 Intel Corporation. All rights reserved. + +/* Preface all log entries with "cxl_translate" */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Maximum number of test vectors and entry length */ +#define MAX_TABLE_ENTRIES 128 +#define MAX_ENTRY_LEN 128 + +/* Expected number of parameters in each test vector */ +#define EXPECTED_PARAMS 7 + +/* Module parameters for test vectors */ +static char *table[MAX_TABLE_ENTRIES]; +static int table_num; + +/* Interleave Arithmetic */ +#define MODULO_MATH 0 +#define XOR_MATH 1 + +/* + * XOR mapping configuration + * The test data sets all use the same set of xormaps. When additional + * data sets arrive for validation, this static setup will need to + * be changed to accept xormaps as additional parameters. + */ +struct cxl_cxims_data *cximsd; +static u64 xormaps[] = { + 0x2020900, + 0x4041200, + 0x1010400, + 0x800, +}; + +static int nr_maps = ARRAY_SIZE(xormaps); + +#define HBIW_TO_NR_MAPS_SIZE (CXL_DECODER_MAX_INTERLEAVE + 1) +static const int hbiw_to_nr_maps[HBIW_TO_NR_MAPS_SIZE] = { + [1] = 0, [2] = 1, [3] = 0, [4] = 2, [6] = 1, [8] = 3, [12] = 2, [16] = 4 +}; + +/** + * to_hpa - calculate an HPA offset from a DPA offset and position + * + * dpa_offset: device physical address offset + * pos: devices position in interleave + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: host physical address offset + */ +static u64 to_hpa(u64 dpa_offset, int pos, u8 r_eiw, u16 r_eig, u8 hb_ways, + u8 math) +{ + u64 hpa_offset; + + /* Calculate base HPA offset from DPA and position */ + hpa_offset = cxl_calculate_hpa_offset(dpa_offset, pos, r_eiw, r_eig); + + if (math == XOR_MATH) { + cximsd->nr_maps = hbiw_to_nr_maps[hb_ways]; + if (cximsd->nr_maps) + return cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + } + return hpa_offset; +} + +/** + * to_dpa - translate an HPA offset to DPA offset + * + * hpa_offset: host physical address offset + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: device physical address offset + */ +static u64 to_dpa(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math) +{ + u64 offset = hpa_offset; + + if (math == XOR_MATH) { + cximsd->nr_maps = hbiw_to_nr_maps[hb_ways]; + if (cximsd->nr_maps) + offset = + cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + } + return cxl_calculate_dpa_offset(offset, r_eiw, r_eig); +} + +/** + * to_pos - extract an interleave position from an HPA offset + * + * hpa_offset: host physical address offset + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * + * Returns: devices position in region interleave + */ +static u64 to_pos(u64 hpa_offset, u8 r_eiw, u16 r_eig, u8 hb_ways, u8 math) +{ + u64 offset = hpa_offset; + + /* Reverse XOR mapping if specified */ + if (math == XOR_MATH) + offset = cxl_do_xormap_calc(cximsd, hpa_offset, hb_ways); + + return cxl_calculate_position(offset, r_eiw, r_eig); +} + +/** + * run_translation_test - execute forward and reverse translations + * + * @dpa: device physical address + * @pos: expected position in region interleave + * @r_eiw: region encoded interleave ways + * @r_eig: region encoded interleave granularity + * @hb_ways: host bridge interleave ways + * @math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * @expect_spa: expected system physical address + * + * Returns: 0 on success, -1 on failure + */ +static int run_translation_test(u64 dpa, int pos, u8 r_eiw, u16 r_eig, + u8 hb_ways, int math, u64 expect_hpa) +{ + u64 translated_spa, reverse_dpa; + int reverse_pos; + + /* Test Device to Host translation: DPA + POS -> SPA */ + translated_spa = to_hpa(dpa, pos, r_eiw, r_eig, hb_ways, math); + if (translated_spa != expect_hpa) { + pr_err("Device to host failed: expected HPA %llu, got %llu\n", + expect_hpa, translated_spa); + return -1; + } + + /* Test Host to Device DPA translation: SPA -> DPA */ + reverse_dpa = to_dpa(translated_spa, r_eiw, r_eig, hb_ways, math); + if (reverse_dpa != dpa) { + pr_err("Host to Device DPA failed: expected %llu, got %llu\n", + dpa, reverse_dpa); + return -1; + } + + /* Test Host to Device Position translation: SPA -> POS */ + reverse_pos = to_pos(translated_spa, r_eiw, r_eig, hb_ways, math); + if (reverse_pos != pos) { + pr_err("Position lookup failed: expected %d, got %d\n", pos, + reverse_pos); + return -1; + } + + return 0; +} + +/** + * parse_test_vector - parse a single test vector string + * + * entry: test vector string to parse + * dpa: device physical address + * pos: expected position in region interleave + * r_eiw: region encoded interleave ways + * r_eig: region encoded interleave granularity + * hb_ways: host bridge interleave ways + * math: interleave arithmetic (MODULO_MATH or XOR_MATH) + * expect_spa: expected system physical address + * + * Returns: 0 on success, negative error code on failure + */ +static int parse_test_vector(const char *entry, u64 *dpa, int *pos, u8 *r_eiw, + u16 *r_eig, u8 *hb_ways, int *math, + u64 *expect_hpa) +{ + unsigned int tmp_r_eiw, tmp_r_eig, tmp_hb_ways; + int parsed; + + parsed = sscanf(entry, "%llu %d %u %u %u %d %llu", dpa, pos, &tmp_r_eiw, + &tmp_r_eig, &tmp_hb_ways, math, expect_hpa); + + if (parsed != EXPECTED_PARAMS) { + pr_err("Parse error: expected %d parameters, got %d in '%s'\n", + EXPECTED_PARAMS, parsed, entry); + return -EINVAL; + } + if (tmp_r_eiw > U8_MAX || tmp_r_eig > U16_MAX || tmp_hb_ways > U8_MAX) { + pr_err("Parameter overflow in entry: '%s'\n", entry); + return -ERANGE; + } + if (*math != MODULO_MATH && *math != XOR_MATH) { + pr_err("Invalid math type %d in entry: '%s'\n", *math, entry); + return -EINVAL; + } + *r_eiw = tmp_r_eiw; + *r_eig = tmp_r_eig; + *hb_ways = tmp_hb_ways; + + return 0; +} + +/* + * setup_xor_mapping - Initialize XOR mapping data structure + * + * The test data sets all use the same HBIG so we can use one set + * of xormaps, and set the number to apply based on HBIW before + * calling cxl_do_xormap_calc(). + * + * When additional data sets arrive for validation with different + * HBIG's this static setup will need to be updated. + * + * Returns: 0 on success, negative error code on failure + */ +static int setup_xor_mapping(void) +{ + if (nr_maps <= 0) + return -EINVAL; + + cximsd = kzalloc(struct_size(cximsd, xormaps, nr_maps), GFP_KERNEL); + if (!cximsd) + return -ENOMEM; + + memcpy(cximsd->xormaps, xormaps, nr_maps * sizeof(*cximsd->xormaps)); + cximsd->nr_maps = nr_maps; + + return 0; +} + +static int test_random_params(void) +{ + u8 valid_eiws[] = { 0, 1, 2, 3, 4, 8, 9, 10 }; + u16 valid_eigs[] = { 0, 1, 2, 3, 4, 5, 6 }; + int i, ways, pos, reverse_pos; + u64 dpa, hpa, reverse_dpa; + int iterations = 10000; + int failures = 0; + + for (i = 0; i < iterations; i++) { + /* Generate valid random parameters for eiw, eig, pos, dpa */ + u8 eiw = valid_eiws[get_random_u32() % ARRAY_SIZE(valid_eiws)]; + u16 eig = valid_eigs[get_random_u32() % ARRAY_SIZE(valid_eigs)]; + + eiw_to_ways(eiw, &ways); + pos = get_random_u32() % ways; + dpa = get_random_u64() >> 12; + + hpa = cxl_calculate_hpa_offset(dpa, pos, eiw, eig); + reverse_dpa = cxl_calculate_dpa_offset(hpa, eiw, eig); + reverse_pos = cxl_calculate_position(hpa, eiw, eig); + + if (reverse_dpa != dpa || reverse_pos != pos) { + pr_err("test random iter %d FAIL hpa=%llu, dpa=%llu reverse_dpa=%llu, pos=%d reverse_pos=%d eiw=%u eig=%u\n", + i, hpa, dpa, reverse_dpa, pos, reverse_pos, eiw, + eig); + + if (failures++ > 10) { + pr_err("test random too many failures, stop\n"); + break; + } + } + } + pr_info("..... test random: PASS %d FAIL %d\n", i - failures, failures); + + if (failures) + return -EINVAL; + + return 0; +} + +struct param_test { + u8 eiw; + u16 eig; + int pos; + bool expect; /* true: expect pass, false: expect fail */ + const char *desc; +}; + +static struct param_test param_tests[] = { + { 0x0, 0, 0, true, "1-way, min eig=0, pos=0" }, + { 0x0, 3, 0, true, "1-way, mid eig=3, pos=0" }, + { 0x0, 6, 0, true, "1-way, max eig=6, pos=0" }, + { 0x1, 0, 0, true, "2-way, eig=0, pos=0" }, + { 0x1, 3, 1, true, "2-way, eig=3, max pos=1" }, + { 0x1, 6, 1, true, "2-way, eig=6, max pos=1" }, + { 0x2, 0, 0, true, "4-way, eig=0, pos=0" }, + { 0x2, 3, 3, true, "4-way, eig=3, max pos=3" }, + { 0x2, 6, 3, true, "4-way, eig=6, max pos=3" }, + { 0x3, 0, 0, true, "8-way, eig=0, pos=0" }, + { 0x3, 3, 7, true, "8-way, eig=3, max pos=7" }, + { 0x3, 6, 7, true, "8-way, eig=6, max pos=7" }, + { 0x4, 0, 0, true, "16-way, eig=0, pos=0" }, + { 0x4, 3, 15, true, "16-way, eig=3, max pos=15" }, + { 0x4, 6, 15, true, "16-way, eig=6, max pos=15" }, + { 0x8, 0, 0, true, "3-way, eig=0, pos=0" }, + { 0x8, 3, 2, true, "3-way, eig=3, max pos=2" }, + { 0x8, 6, 2, true, "3-way, eig=6, max pos=2" }, + { 0x9, 0, 0, true, "6-way, eig=0, pos=0" }, + { 0x9, 3, 5, true, "6-way, eig=3, max pos=5" }, + { 0x9, 6, 5, true, "6-way, eig=6, max pos=5" }, + { 0xA, 0, 0, true, "12-way, eig=0, pos=0" }, + { 0xA, 3, 11, true, "12-way, eig=3, max pos=11" }, + { 0xA, 6, 11, true, "12-way, eig=6, max pos=11" }, + { 0x5, 0, 0, false, "invalid eiw=5" }, + { 0x7, 0, 0, false, "invalid eiw=7" }, + { 0xB, 0, 0, false, "invalid eiw=0xB" }, + { 0xFF, 0, 0, false, "invalid eiw=0xFF" }, + { 0x1, 7, 0, false, "invalid eig=7 (out of range)" }, + { 0x2, 0x10, 0, false, "invalid eig=0x10" }, + { 0x3, 0xFFFF, 0, false, "invalid eig=0xFFFF" }, + { 0x1, 0, -1, false, "pos < 0" }, + { 0x1, 0, 2, false, "2-way, pos=2 (>= ways)" }, + { 0x2, 0, 4, false, "4-way, pos=4 (>= ways)" }, + { 0x3, 0, 8, false, "8-way, pos=8 (>= ways)" }, + { 0x4, 0, 16, false, "16-way, pos=16 (>= ways)" }, + { 0x8, 0, 3, false, "3-way, pos=3 (>= ways)" }, + { 0x9, 0, 6, false, "6-way, pos=6 (>= ways)" }, + { 0xA, 0, 12, false, "12-way, pos=12 (>= ways)" }, +}; + +static int test_cxl_validate_translation_params(void) +{ + int i, rc, failures = 0; + bool valid; + + for (i = 0; i < ARRAY_SIZE(param_tests); i++) { + struct param_test *t = ¶m_tests[i]; + + rc = cxl_validate_translation_params(t->eiw, t->eig, t->pos); + valid = (rc == 0); + + if (valid != t->expect) { + pr_err("test params failed: %s\n", t->desc); + failures++; + } + } + pr_info("..... test params: PASS %d FAIL %d\n", i - failures, failures); + + if (failures) + return -EINVAL; + + return 0; +} + +/* + * cxl_translate_init + * + * Run the internal validation tests when no params are passed. + * Otherwise, parse the parameters (test vectors), and kick off + * the translation test. + * + * Returns: 0 on success, negative error code on failure + */ +static int __init cxl_translate_init(void) +{ + int rc, i; + + /* If no tables are passed, validate module params only */ + if (table_num == 0) { + pr_info("Internal validation test start...\n"); + rc = test_cxl_validate_translation_params(); + if (rc) + return rc; + + rc = test_random_params(); + if (rc) + return rc; + + pr_info("Internal validation test completed successfully\n"); + + return 0; + } + + pr_info("CXL translate test module loaded with %d test vectors\n", + table_num); + + rc = setup_xor_mapping(); + if (rc) + return rc; + + /* Process each test vector */ + for (i = 0; i < table_num; i++) { + u64 dpa, expect_spa; + int pos, math; + u8 r_eiw, hb_ways; + u16 r_eig; + + pr_debug("Processing test vector %d: '%s'\n", i, table[i]); + + /* Parse the test vector */ + rc = parse_test_vector(table[i], &dpa, &pos, &r_eiw, &r_eig, + &hb_ways, &math, &expect_spa); + if (rc) { + pr_err("CXL Translate Test %d: FAIL\n" + " Failed to parse test vector '%s'\n", + i, table[i]); + continue; + } + /* Run the translation test */ + rc = run_translation_test(dpa, pos, r_eiw, r_eig, hb_ways, math, + expect_spa); + if (rc) { + pr_err("CXL Translate Test %d: FAIL\n" + " dpa=%llu pos=%d r_eiw=%u r_eig=%u hb_ways=%u math=%s expect_spa=%llu\n", + i, dpa, pos, r_eiw, r_eig, hb_ways, + (math == XOR_MATH) ? "XOR" : "MODULO", + expect_spa); + } else { + pr_info("CXL Translate Test %d: PASS\n", i); + } + } + + kfree(cximsd); + pr_info("CXL translate test completed\n"); + + return 0; +} + +static void __exit cxl_translate_exit(void) +{ + pr_info("CXL translate test module unloaded\n"); +} + +module_param_array(table, charp, &table_num, 0444); +MODULE_PARM_DESC(table, "Test vectors as space-separated decimal strings"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("cxl_test: cxl address translation test module"); +MODULE_IMPORT_NS("CXL"); + +module_init(cxl_translate_init); +module_exit(cxl_translate_exit); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index d533481672b78..79f42f4474d47 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -250,22 +250,21 @@ static void mes_add_event(struct mock_event_store *mes, * Vary the number of events returned to simulate events occuring while the * logs are being read. */ -static int ret_limit = 0; +static atomic_t event_counter = ATOMIC_INIT(0); static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_get_event_payload *pl; struct mock_event_log *log; - u16 nr_overflow; + int ret_limit; u8 log_type; int i; if (cmd->size_in != sizeof(log_type)) return -EINVAL; - ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX; - if (!ret_limit) - ret_limit = 1; + /* Vary return limit from 1 to CXL_TEST_EVENT_RET_MAX */ + ret_limit = (atomic_inc_return(&event_counter) % CXL_TEST_EVENT_RET_MAX) + 1; if (cmd->size_out < struct_size(pl, records, ret_limit)) return -EINVAL; @@ -299,7 +298,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) u64 ns; pl->flags |= CXL_GET_EVENT_FLAG_OVERFLOW; - pl->overflow_err_count = cpu_to_le16(nr_overflow); + pl->overflow_err_count = cpu_to_le16(log->nr_overflow); ns = ktime_get_real_ns(); ns -= 5000000000; /* 5s ago */ pl->first_overflow_timestamp = cpu_to_le64(ns); @@ -1717,7 +1716,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mds = cxl_memdev_state_create(dev); + mds = cxl_memdev_state_create(dev, pdev->id + 1, 0); if (IS_ERR(mds)) return PTR_ERR(mds); @@ -1733,7 +1732,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; @@ -1768,7 +1766,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxl_mock_add_event_logs(&mdata->mes); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1989ae020df3d..44bce80ef3ff5 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -10,12 +10,21 @@ #include #include #include "mock.h" +#include "../exports.h" static LIST_HEAD(mock); +static struct cxl_dport * +redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port); + void register_cxl_mock_ops(struct cxl_mock_ops *ops) { list_add_rcu(&ops->list, &mock); + _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev; + _devm_cxl_switch_port_decoders_setup = + redirect_devm_cxl_switch_port_decoders_setup; } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); @@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { + _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; + _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev; list_del_rcu(&ops->list); synchronize_srcu(&cxl_mock_srcu); } @@ -99,6 +111,26 @@ acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, } EXPORT_SYMBOL(__wrap_acpi_evaluate_integer); +int __wrap_hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, + resource_size_t *cache_size) +{ + int index, rc; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->hmat_get_extended_linear_cache_size(backing_res, nid, + cache_size); + else + rc = hmat_get_extended_linear_cache_size(backing_res, nid, + cache_size); + + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_hmat_get_extended_linear_cache_size); + struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) { int index; @@ -131,70 +163,34 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) - -{ - int index; - struct cxl_hdm *cxlhdm; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - cxlhdm = ops->devm_cxl_setup_hdm(port, info); - else - cxlhdm = devm_cxl_setup_hdm(port, info); - put_cxl_mock_ops(index); - - return cxlhdm; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); - -int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_add_passthrough_decoder(port); + rc = ops->devm_cxl_switch_port_decoders_setup(port); else - rc = devm_cxl_add_passthrough_decoder(port); + rc = __devm_cxl_switch_port_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); -int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) { int rc, index; - struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info); + rc = ops->devm_cxl_endpoint_decoders_setup(port); else - rc = devm_cxl_enumerate_decoders(cxlhdm, info); + rc = devm_cxl_endpoint_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); - -int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) -{ - int rc, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_port_enumerate_dports(port); - else - rc = devm_cxl_port_enumerate_dports(port); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL"); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL"); int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) { @@ -211,39 +207,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_hdm_decode_init(cxlds, cxlhdm, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); - -int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_dvsec_rr_decode(cxlds, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); - struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, @@ -268,23 +231,6 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL"); -resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport) -{ - int index; - resource_size_t component_reg_phys; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(dev)) - component_reg_phys = CXL_RESOURCE_NONE; - else - component_reg_phys = cxl_rcd_component_reg_phys(dev, dport); - put_cxl_mock_ops(index); - - return component_reg_phys; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL"); - void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) { int index; @@ -311,6 +257,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); +struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_dport *dport; + + if (ops && ops->is_mock_port(port->uport_dev)) + dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev); + else + dport = __devm_cxl_add_dport_by_dev(port, dport_dev); + put_cxl_mock_ops(index); + + return dport; +} + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index d1b0271d28220..2684b89c8aa2d 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -19,13 +19,14 @@ struct cxl_mock_ops { bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); - int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); - struct cxl_hdm *(*devm_cxl_setup_hdm)( - struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); - int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); - int (*devm_cxl_enumerate_decoders)( - struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); + int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); + struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port, + struct device *dport_dev); + int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res, + int nid, + resource_size_t *cache_size); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops);