diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 44a70e1ab59a2..6cd934ae385d6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6174,8 +6174,14 @@
 			Turn on/off individual RDT features. List is:
 			cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
 			mba, smba, bmec, abmc.
+			mba, smba, bmec, abmc, sdciae, energy[:guid],
+			perf[:guid].
 			E.g. to turn on cmt and turn off mba use:
 				rdt=cmt,!mba
+			To turn off all energy telemetry monitoring and ensure that
+			perf telemetry monitoring associated with guid 0x12345
+			is enabled use:
+				rdt=!energy,perf:0x12345
 
 	reboot=		[KNL]
 			Format (x86 or x86_64):
diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst
index b9f6aa44fc4d7..c38a14331cd79 100644
--- a/Documentation/filesystems/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -167,13 +167,12 @@ with respect to allocation:
 			bandwidth percentages are directly applied to
 			the threads running on the core
 
-If RDT monitoring is available there will be an "L3_MON" directory
+If L3 monitoring is available there will be an "L3_MON" directory
 with the following files:
 
 "num_rmids":
-		The number of RMIDs available. This is the
-		upper bound for how many "CTRL_MON" + "MON"
-		groups can be created.
+		The number of RMIDs supported by hardware for
+		L3 monitoring events.
 
 "mon_features":
 		Lists the monitoring events if
@@ -399,6 +398,24 @@ with the following files:
 		bytes) at which a previously used LLC_occupancy
 		counter can be considered for re-use.
 
+If telemetry monitoring is available there will be a "PERF_PKG_MON" directory
+with the following files:
+
+"num_rmids":
+		The number of RMIDs for telemetry monitoring events.
+
+		On Intel resctrl will not enable telemetry events if the number of
+		RMIDs that can be tracked concurrently is lower than the total number
+		of RMIDs supported. Telemetry events can be force-enabled with the
+		"rdt=" kernel parameter, but this may reduce the number of
+		monitoring groups that can be created.
+
+"mon_features":
+		Lists the telemetry monitoring events that are enabled on this system.
+
+The upper bound for how many "CTRL_MON" + "MON" can be created
+is the smaller of the L3_MON and PERF_PKG_MON "num_rmids" values.
+
 Finally, in the top level of the "info" directory there is a file
 named "last_cmd_status". This is reset with every "command" issued
 via the file system (making new directories or writing to any of the
@@ -504,15 +521,40 @@ When control is enabled all CTRL_MON groups will also contain:
 When monitoring is enabled all MON groups will also contain:
 
 "mon_data":
-	This contains a set of files organized by L3 domain and by
-	RDT event. E.g. on a system with two L3 domains there will
-	be subdirectories "mon_L3_00" and "mon_L3_01".	Each of these
-	directories have one file per event (e.g. "llc_occupancy",
-	"mbm_total_bytes", and "mbm_local_bytes"). In a MON group these
-	files provide a read out of the current value of the event for
-	all tasks in the group. In CTRL_MON groups these files provide
-	the sum for all tasks in the CTRL_MON group and all tasks in
+	This contains directories for each monitor domain.
+
+	If L3 monitoring is enabled, there will be a "mon_L3_XX" directory for
+	each instance of an L3 cache. Each directory contains files for the enabled
+	L3 events (e.g. "llc_occupancy", "mbm_total_bytes", and "mbm_local_bytes").
+
+	If telemetry monitoring is enabled, there will be a "mon_PERF_PKG_YY"
+	directory for each physical processor package. Each directory contains
+	files for the enabled telemetry events (e.g. "core_energy". "activity",
+	"uops_retired", etc.)
+
+	The info/`*`/mon_features files provide the full list of enabled
+	event/file names.
+
+	"core energy" reports a floating point number for the energy (in Joules)
+	consumed by cores (registers, arithmetic units, TLB and L1/L2 caches)
+	during execution of instructions summed across all logical CPUs on a
+	package for the current monitoring group.
+
+	"activity" also reports a floating point value (in Farads).  This provides
+	an estimate of work done independent of the frequency that the CPUs used
+	for execution.
+
+	Note that "core energy" and "activity" only measure energy/activity in the
+	"core" of the CPU (arithmetic units, TLB, L1 and L2 caches, etc.). They
+	do not include L3 cache, memory, I/O devices etc.
+
+	All other events report decimal integer values.
+
+	In a MON group these files provide a read out of the current value of
+	the event for all tasks in the group. In CTRL_MON groups these files
+	provide the sum for all tasks in the CTRL_MON group and all tasks in
 	MON groups. Please see example section for more details on usage.
+
 	On systems with Sub-NUMA Cluster (SNC) enabled there are extra
 	directories for each node (located within the "mon_L3_XX" directory
 	for the L3 cache they occupy). These are named "mon_sub_L3_YY"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5902dde9f4477..61d86219d4719 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -525,6 +525,19 @@ config X86_CPU_RESCTRL
 
 	  Say N if unsure.
 
+config X86_CPU_RESCTRL_INTEL_AET
+	bool "Intel Application Energy Telemetry"
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL && INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
+	help
+	  Enable per-RMID telemetry events in resctrl.
+
+	  Intel feature that collects per-RMID execution data
+	  about energy consumption, measure of frequency independent
+	  activity and other performance metrics. Data is aggregated
+	  per package.
+
+	  Say N if unsure.
+
 config X86_FRED
 	bool "Flexible Return and Event Delivery"
 	depends on X86_64
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 279aba8e97bf5..40a74a0617345 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -198,15 +198,6 @@ static inline bool resctrl_arch_mon_can_overflow(void)
 
 void resctrl_cpu_detect(struct cpuinfo_x86 *c);
 
-static inline bool resctrl_arch_get_mb_uses_numa_nid(void)
-{
-	return false;
-}
-
-static inline int resctrl_arch_set_mb_uses_numa_nid(bool enabled)
-{
-	return -EOPNOTSUPP;
-}
 #else
 
 static inline void resctrl_arch_sched_in(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
index d8a04b195da21..273ddfa308366 100644
--- a/arch/x86/kernel/cpu/resctrl/Makefile
+++ b/arch/x86/kernel/cpu/resctrl/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_X86_CPU_RESCTRL)		+= core.o rdtgroup.o monitor.o
 obj-$(CONFIG_X86_CPU_RESCTRL)		+= ctrlmondata.o
+obj-$(CONFIG_X86_CPU_RESCTRL_INTEL_AET)	+= intel_aet.o
 obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= pseudo_lock.o
 
 # To allow define_trace.h's recursive include:
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 42fcc9d7ff7a2..ed1d4c6e50f37 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -98,14 +98,33 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
 			.ctrl_domains		= ctrl_domain_init(RDT_RESOURCE_SMBA),
 		},
 	},
+	[RDT_RESOURCE_PERF_PKG] =
+	{
+		.r_resctrl = {
+			.name			= "PERF_PKG",
+			.mon_scope		= RESCTRL_PACKAGE,
+			.mon_domains		= mon_domain_init(RDT_RESOURCE_PERF_PKG),
+		},
+	},
 };
 
+/**
+ * resctrl_arch_system_num_rmid_idx - Compute number of supported RMIDs
+ *				      (minimum across all mon_capable resource)
+ *
+ * Return: Number of supported RMIDs at time of call. Note that mount time
+ * enumeration of resources may reduce the number.
+ */
 u32 resctrl_arch_system_num_rmid_idx(void)
 {
-	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+	u32 num_rmids = U32_MAX;
+	struct rdt_resource *r;
+
+	for_each_mon_capable_rdt_resource(r)
+		num_rmids = min(num_rmids, r->mon.num_rmid);
 
 	/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
-	return r->mon.num_rmid;
+	return num_rmids == U32_MAX ? 0 : num_rmids;
 }
 
 struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l)
@@ -363,7 +382,7 @@ static void ctrl_domain_free(struct rdt_hw_ctrl_domain *hw_dom)
 	kfree(hw_dom);
 }
 
-static void mon_domain_free(struct rdt_hw_mon_domain *hw_dom)
+static void l3_mon_domain_free(struct rdt_hw_l3_mon_domain *hw_dom)
 {
 	int idx;
 
@@ -396,11 +415,13 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain *
 }
 
 /**
- * arch_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
+ * l3_mon_domain_mbm_alloc() - Allocate arch private storage for the MBM counters
  * @num_rmid:	The size of the MBM counter array
  * @hw_dom:	The domain that owns the allocated arrays
+ *
+ * Return:	0 for success, or -ENOMEM.
  */
-static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom)
+static int l3_mon_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_l3_mon_domain *hw_dom)
 {
 	size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]);
 	enum resctrl_event_id eventid;
@@ -433,6 +454,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
 		return get_cpu_cacheinfo_id(cpu, scope);
 	case RESCTRL_L3_NODE:
 		return cpu_to_node(cpu);
+	case RESCTRL_PACKAGE:
+		return topology_physical_package_id(cpu);
 	default:
 		break;
 	}
@@ -459,7 +482,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
 
 	hdr = resctrl_find_domain(&r->ctrl_domains, id, &add_pos);
 	if (hdr) {
-		if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+		if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid))
 			return;
 		d = container_of(hdr, struct rdt_ctrl_domain, hdr);
 
@@ -476,6 +499,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
 	d = &hw_dom->d_resctrl;
 	d->hdr.id = id;
 	d->hdr.type = RESCTRL_CTRL_DOMAIN;
+	d->hdr.rid = r->rid;
 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
 
 	rdt_domain_reconfigure_cdp(r);
@@ -495,37 +519,13 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
 	}
 }
 
-static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+static void l3_mon_domain_setup(int cpu, int id, struct rdt_resource *r, struct list_head *add_pos)
 {
-	int id = get_domain_id_from_scope(cpu, r->mon_scope);
-	struct list_head *add_pos = NULL;
-	struct rdt_hw_mon_domain *hw_dom;
-	struct rdt_domain_hdr *hdr;
-	struct rdt_mon_domain *d;
+	struct rdt_hw_l3_mon_domain *hw_dom;
+	struct rdt_l3_mon_domain *d;
 	struct cacheinfo *ci;
 	int err;
 
-	lockdep_assert_held(&domain_list_lock);
-
-	if (id < 0) {
-		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
-			     cpu, r->mon_scope, r->name);
-		return;
-	}
-
-	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
-	if (hdr) {
-		if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
-			return;
-		d = container_of(hdr, struct rdt_mon_domain, hdr);
-
-		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
-		/* Update the mbm_assign_mode state for the CPU if supported */
-		if (r->mon.mbm_cntr_assignable)
-			resctrl_arch_mbm_cntr_assign_set_one(r);
-		return;
-	}
-
 	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
 	if (!hw_dom)
 		return;
@@ -533,33 +533,66 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
 	d = &hw_dom->d_resctrl;
 	d->hdr.id = id;
 	d->hdr.type = RESCTRL_MON_DOMAIN;
+	d->hdr.rid = RDT_RESOURCE_L3;
 	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
 	if (!ci) {
 		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
-		mon_domain_free(hw_dom);
+		l3_mon_domain_free(hw_dom);
 		return;
 	}
 	d->ci_id = ci->id;
 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
 
-	/* Update the mbm_assign_mode state for the CPU if supported */
-	if (r->mon.mbm_cntr_assignable)
-		resctrl_arch_mbm_cntr_assign_set_one(r);
-
 	arch_mon_domain_online(r, d);
 
-	if (arch_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
-		mon_domain_free(hw_dom);
+	if (l3_mon_domain_mbm_alloc(r->mon.num_rmid, hw_dom)) {
+		l3_mon_domain_free(hw_dom);
 		return;
 	}
 
 	list_add_tail_rcu(&d->hdr.list, add_pos);
 
-	err = resctrl_online_mon_domain(r, d);
+	err = resctrl_online_mon_domain(r, &d->hdr);
 	if (err) {
 		list_del_rcu(&d->hdr.list);
 		synchronize_rcu();
-		mon_domain_free(hw_dom);
+		l3_mon_domain_free(hw_dom);
+	}
+}
+
+static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
+{
+	int id = get_domain_id_from_scope(cpu, r->mon_scope);
+	struct list_head *add_pos = NULL;
+	struct rdt_domain_hdr *hdr;
+
+	lockdep_assert_held(&domain_list_lock);
+
+	if (id < 0) {
+		pr_warn_once("Can't find monitor domain id for CPU:%d scope:%d for resource %s\n",
+			     cpu, r->mon_scope, r->name);
+		return;
+	}
+
+	hdr = resctrl_find_domain(&r->mon_domains, id, &add_pos);
+	if (hdr)
+		cpumask_set_cpu(cpu, &hdr->cpu_mask);
+
+	switch (r->rid) {
+	case RDT_RESOURCE_L3:
+		/* Update the mbm_assign_mode state for the CPU if supported */
+		if (r->mon.mbm_cntr_assignable)
+			resctrl_arch_mbm_cntr_assign_set_one(r);
+		if (!hdr)
+			l3_mon_domain_setup(cpu, id, r, add_pos);
+		break;
+	case RDT_RESOURCE_PERF_PKG:
+		if (!hdr)
+			intel_aet_mon_domain_setup(cpu, id, r, add_pos);
+		break;
+	default:
+		pr_warn_once("Unknown resource rid=%d\n", r->rid);
+		break;
 	}
 }
 
@@ -593,36 +626,33 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
 		return;
 	}
 
-	if (WARN_ON_ONCE(hdr->type != RESCTRL_CTRL_DOMAIN))
+	cpumask_clear_cpu(cpu, &hdr->cpu_mask);
+	if (!cpumask_empty(&hdr->cpu_mask))
+		return;
+
+	if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid))
 		return;
 
 	d = container_of(hdr, struct rdt_ctrl_domain, hdr);
 	hw_dom = resctrl_to_arch_ctrl_dom(d);
 
-	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
-	if (cpumask_empty(&d->hdr.cpu_mask)) {
-		resctrl_offline_ctrl_domain(r, d);
-		list_del_rcu(&d->hdr.list);
-		synchronize_rcu();
-
-		/*
-		 * rdt_ctrl_domain "d" is going to be freed below, so clear
-		 * its pointer from pseudo_lock_region struct.
-		 */
-		if (d->plr)
-			d->plr->d = NULL;
-		ctrl_domain_free(hw_dom);
+	resctrl_offline_ctrl_domain(r, d);
+	list_del_rcu(&hdr->list);
+	synchronize_rcu();
 
-		return;
-	}
+	/*
+	 * rdt_ctrl_domain "d" is going to be freed below, so clear
+	 * its pointer from pseudo_lock_region struct.
+	 */
+	if (d->plr)
+		d->plr->d = NULL;
+	ctrl_domain_free(hw_dom);
 }
 
 static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
 {
 	int id = get_domain_id_from_scope(cpu, r->mon_scope);
-	struct rdt_hw_mon_domain *hw_dom;
 	struct rdt_domain_hdr *hdr;
-	struct rdt_mon_domain *d;
 
 	lockdep_assert_held(&domain_list_lock);
 
@@ -639,20 +669,42 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
 		return;
 	}
 
-	if (WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN))
+	cpumask_clear_cpu(cpu, &hdr->cpu_mask);
+	if (!cpumask_empty(&hdr->cpu_mask))
 		return;
 
-	d = container_of(hdr, struct rdt_mon_domain, hdr);
-	hw_dom = resctrl_to_arch_mon_dom(d);
+	switch (r->rid) {
+	case RDT_RESOURCE_L3: {
+		struct rdt_hw_l3_mon_domain *hw_dom;
+		struct rdt_l3_mon_domain *d;
 
-	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
-	if (cpumask_empty(&d->hdr.cpu_mask)) {
-		resctrl_offline_mon_domain(r, d);
-		list_del_rcu(&d->hdr.list);
+		if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
+			return;
+
+		d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
+		hw_dom = resctrl_to_arch_mon_dom(d);
+		resctrl_offline_mon_domain(r, hdr);
+		list_del_rcu(&hdr->list);
 		synchronize_rcu();
-		mon_domain_free(hw_dom);
+		l3_mon_domain_free(hw_dom);
+		break;
+	}
+	case RDT_RESOURCE_PERF_PKG: {
+		struct rdt_perf_pkg_mon_domain *pkgd;
 
-		return;
+		if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_PERF_PKG))
+			return;
+
+		pkgd = container_of(hdr, struct rdt_perf_pkg_mon_domain, hdr);
+		resctrl_offline_mon_domain(r, hdr);
+		list_del_rcu(&hdr->list);
+		synchronize_rcu();
+		kfree(pkgd);
+		break;
+	}
+	default:
+		pr_warn_once("Unknown resource rid=%d\n", r->rid);
+		break;
 	}
 }
 
@@ -707,6 +759,28 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
 	return 0;
 }
 
+void resctrl_arch_pre_mount(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
+	int cpu;
+
+	if (!intel_aet_get_events())
+		return;
+
+	/*
+	 * Late discovery of telemetry events means the domains for the
+	 * resource were not built. Do that now.
+	 */
+	cpus_read_lock();
+	mutex_lock(&domain_list_lock);
+	r->mon_capable = true;
+	rdt_mon_capable = true;
+	for_each_online_cpu(cpu)
+		domain_add_cpu_mon(cpu, r);
+	mutex_unlock(&domain_list_lock);
+	cpus_read_unlock();
+}
+
 enum {
 	RDT_FLAG_CMT,
 	RDT_FLAG_MBM_TOTAL,
@@ -760,6 +834,8 @@ static int __init set_rdt_options(char *str)
 		force_off = *tok == '!';
 		if (force_off)
 			tok++;
+		if (intel_handle_aet_option(force_off, tok))
+			continue;
 		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
 			if (strcmp(tok, o->name) == 0) {
 				if (force_off)
@@ -879,15 +955,15 @@ static __init bool get_rdt_mon_resources(void)
 	bool ret = false;
 
 	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
-		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID);
+		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID, false, 0, NULL);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
-		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
+		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID, false, 0, NULL);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
-		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
+		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID, false, 0, NULL);
 		ret = true;
 	}
 	if (rdt_cpu_has(X86_FEATURE_ABMC))
@@ -896,7 +972,7 @@ static __init bool get_rdt_mon_resources(void)
 	if (!ret)
 		return false;
 
-	return !rdt_get_mon_l3_config(r);
+	return !rdt_get_l3_mon_config(r);
 }
 
 static __init void __check_quirks_intel(void)
@@ -1062,6 +1138,8 @@ late_initcall(resctrl_arch_late_init);
 
 static void __exit resctrl_arch_exit(void)
 {
+	intel_aet_exit();
+
 	cpuhp_remove_state(rdt_online);
 
 	resctrl_exit();
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
new file mode 100644
index 0000000000000..89b8b619d5d53
--- /dev/null
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Resource Director Technology(RDT)
+ * - Intel Application Energy Telemetry
+ *
+ * Copyright (C) 2025 Intel Corporation
+ *
+ * Author:
+ *    Tony Luck <tony.luck@intel.com>
+ */
+
+#define pr_fmt(fmt)   "resctrl: " fmt
+
+#include <linux/bits.h>
+#include <linux/compiler_types.h>
+#include <linux/container_of.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gfp_types.h>
+#include <linux/init.h>
+#include <linux/intel_pmt_features.h>
+#include <linux/intel_vsec.h>
+#include <linux/io.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/resctrl.h>
+#include <linux/resctrl_types.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/topology.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+/**
+ * struct pmt_event - Telemetry event.
+ * @id:		Resctrl event id.
+ * @idx:	Counter index within each per-RMID block of counters.
+ * @bin_bits:	Zero for integer valued events, else number bits in fraction
+ *		part of fixed-point.
+ */
+struct pmt_event {
+	enum resctrl_event_id	id;
+	unsigned int		idx;
+	unsigned int		bin_bits;
+};
+
+#define EVT(_id, _idx, _bits) { .id = _id, .idx = _idx, .bin_bits = _bits }
+
+/**
+ * struct event_group - Events with the same feature type ("energy" or "perf") and GUID.
+ * @pfname:		PMT feature name ("energy" or "perf") of this event group.
+ *			Used by boot rdt= option.
+ * @pfg:		Points to the aggregated telemetry space information
+ *			returned by the intel_pmt_get_regions_by_feature()
+ *			call to the INTEL_PMT_TELEMETRY driver that contains
+ *			data for all telemetry regions of type @pfname.
+ *			Valid if the system supports the event group,
+ *			NULL otherwise.
+ * @force_off:		True when "rdt" command line or architecture code disables
+ *			this event group due to insufficient RMIDs.
+ * @force_on:		True when "rdt" command line overrides disable of this
+ *			event group.
+ * @guid:		Unique number per XML description file.
+ * @num_rmid:		Number of RMIDs supported by this group. May be
+ *			adjusted downwards if enumeration from
+ *			intel_pmt_get_regions_by_feature() indicates fewer
+ *			RMIDs can be tracked simultaneously.
+ * @mmio_size:		Number of bytes of MMIO registers for this group.
+ * @num_events:		Number of events in this group.
+ * @evts:		Array of event descriptors.
+ */
+struct event_group {
+	/* Data fields for additional structures to manage this group. */
+	const char			*pfname;
+	struct pmt_feature_group	*pfg;
+	bool				force_off, force_on;
+
+	/* Remaining fields initialized from XML file. */
+	u32				guid;
+	u32				num_rmid;
+	size_t				mmio_size;
+	unsigned int			num_events;
+	struct pmt_event		evts[] __counted_by(num_events);
+};
+
+#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status) \
+		      (((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))
+
+/*
+ * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
+ */
+static struct event_group energy_0x26696143 = {
+	.pfname		= "energy",
+	.guid		= 0x26696143,
+	.num_rmid	= 576,
+	.mmio_size	= XML_MMIO_SIZE(576, 2, 3),
+	.num_events	= 2,
+	.evts		= {
+		EVT(PMT_EVENT_ENERGY, 0, 18),
+		EVT(PMT_EVENT_ACTIVITY, 1, 18),
+	}
+};
+
+/*
+ * Link: https://github.com/intel/Intel-PMT/blob/main/xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml
+ */
+static struct event_group perf_0x26557651 = {
+	.pfname		= "perf",
+	.guid		= 0x26557651,
+	.num_rmid	= 576,
+	.mmio_size	= XML_MMIO_SIZE(576, 7, 3),
+	.num_events	= 7,
+	.evts		= {
+		EVT(PMT_EVENT_STALLS_LLC_HIT, 0, 0),
+		EVT(PMT_EVENT_C1_RES, 1, 0),
+		EVT(PMT_EVENT_UNHALTED_CORE_CYCLES, 2, 0),
+		EVT(PMT_EVENT_STALLS_LLC_MISS, 3, 0),
+		EVT(PMT_EVENT_AUTO_C6_RES, 4, 0),
+		EVT(PMT_EVENT_UNHALTED_REF_CYCLES, 5, 0),
+		EVT(PMT_EVENT_UOPS_RETIRED, 6, 0),
+	}
+};
+
+static struct event_group *known_event_groups[] = {
+	&energy_0x26696143,
+	&perf_0x26557651,
+};
+
+#define for_each_event_group(_peg)						\
+	for (_peg = known_event_groups;						\
+	     _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)];	\
+	     _peg++)
+
+bool intel_handle_aet_option(bool force_off, char *tok)
+{
+	struct event_group **peg;
+	bool ret = false;
+	u32 guid = 0;
+	char *name;
+
+	if (!tok)
+		return false;
+
+	name = strsep(&tok, ":");
+	if (tok && kstrtou32(tok, 16, &guid))
+		return false;
+
+	for_each_event_group(peg) {
+		if (strcmp(name, (*peg)->pfname))
+			continue;
+		if (guid && (*peg)->guid != guid)
+			continue;
+		if (force_off)
+			(*peg)->force_off = true;
+		else
+			(*peg)->force_on = true;
+		ret = true;
+	}
+
+	return ret;
+}
+
+static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)
+{
+	if (tr->guid != e->guid)
+		return true;
+	if (tr->plat_info.package_id >= topology_max_packages()) {
+		pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,
+			tr->guid);
+		return true;
+	}
+	if (tr->size != e->mmio_size) {
+		pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",
+			tr->size, e->guid, e->mmio_size);
+		return true;
+	}
+
+	return false;
+}
+
+static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)
+{
+	bool usable_regions = false;
+
+	for (int i = 0; i < p->count; i++) {
+		if (skip_telem_region(&p->regions[i], e)) {
+			/*
+			 * Clear the address field of regions that did not pass the checks in
+			 * skip_telem_region() so they will not be used by intel_aet_read_event().
+			 * This is safe to do because intel_pmt_get_regions_by_feature() allocates
+			 * a new pmt_feature_group structure to return to each caller and only makes
+			 * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
+			 * returns the structure.
+			 */
+			p->regions[i].addr = NULL;
+
+			continue;
+		}
+		usable_regions = true;
+	}
+
+	return usable_regions;
+}
+
+static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p)
+{
+	struct telemetry_region *tr;
+
+	for (int i = 0; i < p->count; i++) {
+		if (!p->regions[i].addr)
+			continue;
+		tr = &p->regions[i];
+		if (tr->num_rmids < e->num_rmid) {
+			e->force_off = true;
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
+	int skipped_events = 0;
+
+	if (e->force_off)
+		return false;
+
+	if (!group_has_usable_regions(e, p))
+		return false;
+
+	/*
+	 * Only enable event group with insufficient RMIDs if the user requested
+	 * it from the kernel command line.
+	 */
+	if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) {
+		pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n",
+			r->name, e->pfname, e->guid);
+		return false;
+	}
+
+	for (int i = 0; i < p->count; i++) {
+		if (!p->regions[i].addr)
+			continue;
+		/*
+		 * e->num_rmid only adjusted lower if user (via rdt= kernel
+		 * parameter) forces an event group with insufficient RMID
+		 * to be enabled.
+		 */
+		e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids);
+	}
+
+	for (int j = 0; j < e->num_events; j++) {
+		if (!resctrl_enable_mon_event(e->evts[j].id, true,
+					      e->evts[j].bin_bits, &e->evts[j]))
+			skipped_events++;
+	}
+	if (e->num_events == skipped_events) {
+		pr_info("No events enabled in %s %s:0x%x\n", r->name, e->pfname, e->guid);
+		return false;
+	}
+
+	if (r->mon.num_rmid)
+		r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid);
+	else
+		r->mon.num_rmid = e->num_rmid;
+
+	if (skipped_events)
+		pr_info("%s %s:0x%x monitoring detected (skipped %d events)\n", r->name,
+			e->pfname, e->guid, skipped_events);
+	else
+		pr_info("%s %s:0x%x monitoring detected\n", r->name, e->pfname, e->guid);
+
+	return true;
+}
+
+static enum pmt_feature_id lookup_pfid(const char *pfname)
+{
+	if (!strcmp(pfname, "energy"))
+		return FEATURE_PER_RMID_ENERGY_TELEM;
+	else if (!strcmp(pfname, "perf"))
+		return FEATURE_PER_RMID_PERF_TELEM;
+
+	pr_warn("Unknown PMT feature name '%s'\n", pfname);
+
+	return FEATURE_INVALID;
+}
+
+/*
+ * Request a copy of struct pmt_feature_group for each event group. If there is
+ * one, the returned structure has an array of telemetry_region structures,
+ * each element of the array describes one telemetry aggregator. The
+ * telemetry aggregators may have different GUIDs so obtain duplicate struct
+ * pmt_feature_group for event groups with same feature type but different
+ * GUID. Post-processing ensures an event group can only use the telemetry
+ * aggregators that match its GUID. An event group keeps a pointer to its
+ * struct pmt_feature_group to indicate that its events are successfully
+ * enabled.
+ */
+bool intel_aet_get_events(void)
+{
+	struct pmt_feature_group *p;
+	enum pmt_feature_id pfid;
+	struct event_group **peg;
+	bool ret = false;
+
+	for_each_event_group(peg) {
+		pfid = lookup_pfid((*peg)->pfname);
+		p = intel_pmt_get_regions_by_feature(pfid);
+		if (IS_ERR_OR_NULL(p))
+			continue;
+		if (enable_events(*peg, p)) {
+			(*peg)->pfg = p;
+			ret = true;
+		} else {
+			intel_pmt_put_feature_group(p);
+		}
+	}
+
+	return ret;
+}
+
+void __exit intel_aet_exit(void)
+{
+	struct event_group **peg;
+
+	for_each_event_group(peg) {
+		if ((*peg)->pfg) {
+			intel_pmt_put_feature_group((*peg)->pfg);
+			(*peg)->pfg = NULL;
+		}
+	}
+}
+
+#define DATA_VALID	BIT_ULL(63)
+#define DATA_BITS	GENMASK_ULL(62, 0)
+
+/*
+ * Read counter for an event on a domain (summing all aggregators on the
+ * domain). If an aggregator hasn't received any data for a specific RMID,
+ * the MMIO read indicates that data is not valid.  Return success if at
+ * least one aggregator has valid data.
+ */
+int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
+{
+	struct pmt_event *pevt = arch_priv;
+	struct event_group *e;
+	bool valid = false;
+	u64 total = 0;
+	u64 evtcount;
+	void *pevt0;
+	u32 idx;
+
+	pevt0 = pevt - pevt->idx;
+	e = container_of(pevt0, struct event_group, evts);
+	idx = rmid * e->num_events;
+	idx += pevt->idx;
+
+	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
+		pr_warn_once("MMIO index %u out of range\n", idx);
+		return -EIO;
+	}
+
+	for (int i = 0; i < e->pfg->count; i++) {
+		if (!e->pfg->regions[i].addr)
+			continue;
+		if (e->pfg->regions[i].plat_info.package_id != domid)
+			continue;
+		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
+		if (!(evtcount & DATA_VALID))
+			continue;
+		total += evtcount & DATA_BITS;
+		valid = true;
+	}
+
+	if (valid)
+		*val = total;
+
+	return valid ? 0 : -EINVAL;
+}
+
+void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
+				struct list_head *add_pos)
+{
+	struct rdt_perf_pkg_mon_domain *d;
+	int err;
+
+	d = kzalloc_node(sizeof(*d), GFP_KERNEL, cpu_to_node(cpu));
+	if (!d)
+		return;
+
+	d->hdr.id = id;
+	d->hdr.type = RESCTRL_MON_DOMAIN;
+	d->hdr.rid = RDT_RESOURCE_PERF_PKG;
+	cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
+	list_add_tail_rcu(&d->hdr.list, add_pos);
+
+	err = resctrl_online_mon_domain(r, &d->hdr);
+	if (err) {
+		list_del_rcu(&d->hdr.list);
+		synchronize_rcu();
+		kfree(d);
+	}
+}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 9f4c2f0aaf5c8..eb923cd978e08 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -60,17 +60,17 @@ struct rdt_hw_ctrl_domain {
 };
 
 /**
- * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share
- *			      a resource for a monitor function
- * @d_resctrl:	Properties exposed to the resctrl file system
+ * struct rdt_hw_l3_mon_domain - Arch private attributes of a set of CPUs sharing
+ *				 RDT_RESOURCE_L3 monitoring
+ * @d_resctrl:		Properties exposed to the resctrl file system
  * @arch_mbm_states:	Per-event pointer to the MBM event's saved state.
  *			An MBM event's state is an array of struct arch_mbm_state
  *			indexed by RMID on x86.
  *
  * Members of this structure are accessed via helpers that provide abstraction.
  */
-struct rdt_hw_mon_domain {
-	struct rdt_mon_domain		d_resctrl;
+struct rdt_hw_l3_mon_domain {
+	struct rdt_l3_mon_domain	d_resctrl;
 	struct arch_mbm_state		*arch_mbm_states[QOS_NUM_L3_MBM_EVENTS];
 };
 
@@ -79,11 +79,19 @@ static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctr
 	return container_of(r, struct rdt_hw_ctrl_domain, d_resctrl);
 }
 
-static inline struct rdt_hw_mon_domain *resctrl_to_arch_mon_dom(struct rdt_mon_domain *r)
+static inline struct rdt_hw_l3_mon_domain *resctrl_to_arch_mon_dom(struct rdt_l3_mon_domain *r)
 {
-	return container_of(r, struct rdt_hw_mon_domain, d_resctrl);
+	return container_of(r, struct rdt_hw_l3_mon_domain, d_resctrl);
 }
 
+/**
+ * struct rdt_perf_pkg_mon_domain - CPUs sharing an package scoped resctrl monitor resource
+ * @hdr:	common header for different domain types
+ */
+struct rdt_perf_pkg_mon_domain {
+	struct rdt_domain_hdr	hdr;
+};
+
 /**
  * struct msr_param - set a range of MSRs from a domain
  * @res:       The resource to use
@@ -135,7 +143,7 @@ static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r
 
 extern struct rdt_hw_resource rdt_resources_all[];
 
-void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d);
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d);
 
 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
 union cpuid_0x10_1_eax {
@@ -208,7 +216,7 @@ union l3_qos_abmc_cfg {
 
 void rdt_ctrl_update(void *arg);
 
-int rdt_get_mon_l3_config(struct rdt_resource *r);
+int rdt_get_l3_mon_config(struct rdt_resource *r);
 
 bool rdt_cpu_has(int flag);
 
@@ -217,4 +225,24 @@ void __init intel_rdt_mbm_apply_quirk(void);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 void resctrl_arch_mbm_cntr_assign_set_one(struct rdt_resource *r);
 
+#ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
+bool intel_aet_get_events(void);
+void __exit intel_aet_exit(void);
+int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
+void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
+				struct list_head *add_pos);
+bool intel_handle_aet_option(bool force_off, char *tok);
+#else
+static inline bool intel_aet_get_events(void) { return false; }
+static inline void __exit intel_aet_exit(void) { }
+static inline int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val)
+{
+	return -EINVAL;
+}
+
+static inline void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
+					      struct list_head *add_pos) { }
+static inline bool intel_handle_aet_option(bool force_off, char *tok) { return false; }
+#endif
+
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index fe1a2aa53c16a..c10e1ae5ee8a7 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -109,7 +109,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
  *
  * In RMID sharing mode there are fewer "logical RMID" values available
  * to accumulate data ("physical RMIDs" are divided evenly between SNC
- * nodes that share an L3 cache). Linux creates an rdt_mon_domain for
+ * nodes that share an L3 cache). Linux creates an rdt_l3_mon_domain for
  * each SNC node.
  *
  * The value loaded into IA32_PQR_ASSOC is the "logical RMID".
@@ -157,7 +157,7 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
 	return 0;
 }
 
-static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
+static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_l3_mon_domain *hw_dom,
 						 u32 rmid,
 						 enum resctrl_event_id eventid)
 {
@@ -171,11 +171,11 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
 	return state ? &state[rmid] : NULL;
 }
 
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 unused, u32 rmid,
 			     enum resctrl_event_id eventid)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	int cpu = cpumask_any(&d->hdr.cpu_mask);
 	struct arch_mbm_state *am;
 	u32 prmid;
@@ -194,9 +194,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
  * Assumes that hardware counters are also reset and thus that there is
  * no need to record initial non-zero counts.
  */
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	enum resctrl_event_id eventid;
 	int idx;
 
@@ -217,10 +217,10 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
 	return chunks >> shift;
 }
 
-static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
+static u64 get_corrected_val(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 rmid, enum resctrl_event_id eventid, u64 msr_val)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
 	struct arch_mbm_state *am;
 	u64 chunks;
@@ -238,19 +238,29 @@ static u64 get_corrected_val(struct rdt_resource *r, struct rdt_mon_domain *d,
 	return chunks * hw_res->mon_scale;
 }
 
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
 			   u32 unused, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *ignored)
+			   void *arch_priv, u64 *val, void *ignored)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
-	int cpu = cpumask_any(&d->hdr.cpu_mask);
+	struct rdt_hw_l3_mon_domain *hw_dom;
+	struct rdt_l3_mon_domain *d;
 	struct arch_mbm_state *am;
 	u64 msr_val;
 	u32 prmid;
+	int cpu;
 	int ret;
 
 	resctrl_arch_rmid_read_context_check();
 
+	if (r->rid == RDT_RESOURCE_PERF_PKG)
+		return intel_aet_read_event(hdr->id, rmid, arch_priv, val);
+
+	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
+		return -EINVAL;
+
+	d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
+	hw_dom = resctrl_to_arch_mon_dom(d);
+	cpu = cpumask_any(&hdr->cpu_mask);
 	prmid = logical_rmid_to_physical_rmid(cpu, rmid);
 	ret = __rmid_read_phys(prmid, eventid, &msr_val);
 
@@ -302,11 +312,11 @@ static int __cntr_id_read(u32 cntr_id, u64 *val)
 	return 0;
 }
 
-void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 unused, u32 rmid, int cntr_id,
 			     enum resctrl_event_id eventid)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	struct arch_mbm_state *am;
 
 	am = get_arch_mbm_state(hw_dom, rmid, eventid);
@@ -318,7 +328,7 @@ void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
 	}
 }
 
-int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			   u32 unused, u32 rmid, int cntr_id,
 			   enum resctrl_event_id eventid, u64 *val)
 {
@@ -348,7 +358,7 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
  * must adjust RMID counter numbers based on SNC node. See
  * logical_rmid_to_physical_rmid() for code that does this.
  */
-void arch_mon_domain_online(struct rdt_resource *r, struct rdt_mon_domain *d)
+void arch_mon_domain_online(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
 {
 	if (snc_nodes_per_l3_cache > 1)
 		msr_clear_bit(MSR_RMID_SNC_CONFIG, 0);
@@ -417,7 +427,7 @@ static __init int snc_get_config(void)
 	return ret;
 }
 
-int __init rdt_get_mon_l3_config(struct rdt_resource *r)
+int __init rdt_get_l3_mon_config(struct rdt_resource *r)
 {
 	unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset;
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -509,7 +519,7 @@ static void resctrl_abmc_set_one_amd(void *arg)
  */
 static void _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
 {
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 
 	lockdep_assert_cpus_held();
 
@@ -548,11 +558,11 @@ static void resctrl_abmc_config_one_amd(void *info)
 /*
  * Send an IPI to the domain to assign the counter to RMID, event pair.
  */
-void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
 			      u32 cntr_id, bool assign)
 {
-	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
+	struct rdt_hw_l3_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	union l3_qos_abmc_cfg abmc_cfg = { 0 };
 	struct arch_mbm_state *am;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 394c69e5ed838..39e8ac236b90a 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3733,94 +3733,67 @@ static int arm_smmu_def_domain_type(struct device *dev)
 	return 0;
 }
 
-static int arm_smmu_group_set_mpam(struct iommu_group *group, u16 partid,
+static int arm_smmu_group_set_mpam(struct device *dev, u16 partid,
 				   u8 pmg)
 {
 	int i;
 	u32 sid;
-	unsigned long flags;
 	struct arm_smmu_ste *step;
-	struct iommu_domain *domain;
 	struct arm_smmu_device *smmu;
 	struct arm_smmu_master *master;
 	struct arm_smmu_cmdq_batch cmds;
-	struct arm_smmu_domain *smmu_domain;
 	struct arm_smmu_cmdq_ent cmd = {
 		.opcode	= CMDQ_OP_CFGI_STE,
 		.cfgi	= {
 			.leaf	= true,
 		},
 	};
-	struct arm_smmu_master_domain *master_domain;
 
-	domain = iommu_get_domain_for_group(group);
-	smmu_domain = to_smmu_domain(domain);
-	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM))
+	master = dev_iommu_priv_get(dev);
+	if (!(master->smmu->features & ARM_SMMU_FEAT_MPAM))
 		return -EIO;
-	smmu = smmu_domain->smmu;
+	smmu = master->smmu;
 
 	arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
 
-	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-	list_for_each_entry(master_domain, &smmu_domain->devices,
-			    devices_elm) {
-		master = master_domain->master;
-
-		for (i = 0; i < master->num_streams; i++) {
-			sid = master->streams[i].id;
-			step = arm_smmu_get_step_for_sid(smmu, sid);
-
-			/* These need locking if the VMSPtr is ever used */
-			step->data[4] = FIELD_PREP(STRTAB_STE_4_PARTID, partid);
-			step->data[5] = FIELD_PREP(STRTAB_STE_5_PMG, pmg);
+	for (i = 0; i < master->num_streams; i++) {
+		sid = master->streams[i].id;
+		step = arm_smmu_get_step_for_sid(smmu, sid);
 
-			cmd.cfgi.sid = sid;
-			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
-		}
+		/* These need locking if the VMSPtr is ever used */
+		step->data[4] = FIELD_PREP(STRTAB_STE_4_PARTID, partid);
+		step->data[5] = FIELD_PREP(STRTAB_STE_5_PMG, pmg);
 
-		master->partid = partid;
-		master->pmg = pmg;
+		cmd.cfgi.sid = sid;
+		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
 	}
-	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+	master->partid = partid;
+	master->pmg = pmg;
 
 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
 
 	return 0;
 }
 
-static int arm_smmu_group_get_mpam(struct iommu_group *group, u16 *partid,
+static int arm_smmu_group_get_mpam(struct device *dev, u16 *partid,
 				   u8 *pmg)
 {
-	int err = -EINVAL;
-	unsigned long flags;
-	struct iommu_domain *domain;
 	struct arm_smmu_master *master;
-	struct arm_smmu_domain *smmu_domain;
-	struct arm_smmu_master_domain *master_domain;
 
-	domain = iommu_get_domain_for_group(group);
-	smmu_domain = to_smmu_domain(domain);
-	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_MPAM))
+	master = dev_iommu_priv_get(dev);
+	if (!(master->smmu->features & ARM_SMMU_FEAT_MPAM))
 		return -EIO;
 
 	if (!partid && !pmg)
 		return 0;
 
-	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-	list_for_each_entry(master_domain, &smmu_domain->devices,
-			    devices_elm) {
-		master = master_domain->master;
-		if (master) {
-			if (partid)
-				*partid = master->partid;
-			if (pmg)
-				*pmg = master->pmg;
-			err = 0;
-		}
-	}
-	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+	if (partid)
+		*partid = master->partid;
+	if (pmg)
+		*pmg = master->pmg;
 
-	return err;
+	return 0;
 }
 
 static const struct iommu_ops arm_smmu_ops = {
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index db770b73e3a8f..4c243f6000c3a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3914,25 +3914,19 @@ int iommu_group_set_qos_params(struct iommu_group *group,
 {
 	const struct iommu_ops *ops;
 	struct group_device *device;
-	int ret;
+	int ret = -ENODEV;
 
 	mutex_lock(&group->mutex);
-	device = list_first_entry_or_null(&group->devices, typeof(*device),
-					  list);
-	if (!device) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	ops = dev_iommu_ops(device->dev);
-	if (!ops->set_group_qos_params) {
-		ret = -EOPNOTSUPP;
-		goto out_unlock;
+	for_each_group_device(group, device) {
+		ops = dev_iommu_ops(device->dev);
+		if (!ops->set_group_qos_params) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+		ret = ops->set_group_qos_params(device->dev, partition, perf_mon_grp);
+		if (ret < 0)
+			break;
 	}
-
-	ret = ops->set_group_qos_params(group, partition, perf_mon_grp);
-
-out_unlock:
 	mutex_unlock(&group->mutex);
 
 	return ret;
@@ -3952,25 +3946,19 @@ int iommu_group_get_qos_params(struct iommu_group *group,
 {
 	const struct iommu_ops *ops;
 	struct group_device *device;
-	int ret;
+	int ret = -ENODEV;
 
 	mutex_lock(&group->mutex);
-	device = list_first_entry_or_null(&group->devices, typeof(*device),
-					  list);
-	if (!device) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	ops = dev_iommu_ops(device->dev);
-	if (!ops->get_group_qos_params) {
-		ret = -EOPNOTSUPP;
-		goto out_unlock;
+	for_each_group_device(group, device) {
+		ops = dev_iommu_ops(device->dev);
+		if (!ops->get_group_qos_params) {
+			ret = -EOPNOTSUPP;
+			break;
+		}
+		ret = ops->get_group_qos_params(device->dev, partition, perf_mon_grp);
+		if (!ret)
+			break;
 	}
-
-	ret = ops->get_group_qos_params(group, partition, perf_mon_grp);
-
-out_unlock:
 	mutex_unlock(&group->mutex);
 
 	return ret;
diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
index f0740b5d59b5b..7ee8925aec9ef 100644
--- a/drivers/resctrl/mpam_devices.c
+++ b/drivers/resctrl/mpam_devices.c
@@ -723,6 +723,11 @@ static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
 	case MPAM_CLASS_MEMORY:
 		get_cpumask_from_node_id(comp->comp_id, affinity);
 		/* affinity may be empty for CPU-less memory nodes */
+		if (cpumask_empty(affinity)) {
+			dev_warn_once(&msc->pdev->dev, "CPU-less numa node");
+			cpumask_copy(affinity, cpu_possible_mask);
+		} else if (class->level > 3)
+			cpumask_copy(affinity, cpu_possible_mask);
 		break;
 	case MPAM_CLASS_UNKNOWN:
 		return 0;
@@ -1547,6 +1552,9 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 	if (!mpam_has_feature(type, cprops))
 		return -EOPNOTSUPP;
 
+	if (type == mpam_feat_msmon_mbwu)
+		type = mpam_msmon_choose_counter(class);
+
 	arg = (struct mon_read) {
 		.ctx = ctx,
 		.type = type,
@@ -1554,9 +1562,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 	};
 	*val = 0;
 
-	if (type == mpam_feat_msmon_mbwu)
-		type = mpam_msmon_choose_counter(class);
-
 	err = _msmon_read(comp, &arg);
 	if (err == -EBUSY && class->nrdy_usec)
 		wait_jiffies = usecs_to_jiffies(class->nrdy_usec);
@@ -1579,41 +1584,6 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 	return err;
 }
 
-void mpam_msmon_reset_all_mbwu(struct mpam_component *comp)
-{
-	int idx, i;
-	struct mpam_msc *msc;
-	struct mpam_vmsc *vmsc;
-	struct mpam_msc_ris *ris;
-
-	if (!mpam_is_enabled())
-		return;
-
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_rcu(vmsc, &comp->vmsc, comp_list) {
-		if (!mpam_has_feature(mpam_feat_msmon_mbwu, &vmsc->props))
-			continue;
-
-		msc = vmsc->msc;
-		mpam_mon_sel_outer_lock(msc);
-		list_for_each_entry_rcu(ris, &msc->ris, vmsc_list) {
-			if (!mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
-				continue;
-
-			if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc)))
-				continue;
-
-			for (i = 0; i < ris->props.num_mbwu_mon; i++) {
-				ris->mbwu_state[i].correction = 0;
-				ris->mbwu_state[i].reset_on_next_read = true;
-			}
-			mpam_mon_sel_inner_unlock(msc);
-		}
-		mpam_mon_sel_outer_unlock(msc);
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-}
-
 void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx)
 {
 	struct mpam_msc *msc;
@@ -1647,34 +1617,6 @@ void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx)
 	}
 }
 
-static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
-{
-	u32 num_words, msb;
-	u32 bm = ~0;
-	int i;
-
-	lockdep_assert_held(&msc->part_sel_lock);
-
-	if (wd == 0)
-		return;
-
-	/*
-	 * Write all ~0 to all but the last 32bit-word, which may
-	 * have fewer bits...
-	 */
-	num_words = DIV_ROUND_UP(wd, 32);
-	for (i = 0; i < num_words - 1; i++, reg += sizeof(bm))
-		__mpam_write_reg(msc, reg, bm);
-
-	/*
-	 * ....and then the last (maybe) partial 32bit word. When wd is a
-	 * multiple of 32, msb should be 31 to write a full 32bit word.
-	 */
-	msb = (wd - 1) % 32;
-	bm = GENMASK(msb, 0);
-	__mpam_write_reg(msc, reg, bm);
-}
-
 static void mpam_apply_t241_erratum(struct mpam_msc_ris *ris, u16 partid)
 {
 	int sidx, i, lcount = 1000;
@@ -1713,12 +1655,42 @@ static void mpam_quirk_post_config_change(struct mpam_msc_ris *ris, u16 partid,
 		mpam_apply_t241_erratum(ris, partid);
 }
 
+static u16 mpam_wa_t241_force_mbw_min_to_one(struct mpam_props *props)
+{
+	u16 max_hw_value, min_hw_granule, res0_bits;
+
+	res0_bits = 16 - props->bwa_wd;
+	max_hw_value = ((1 << props->bwa_wd) - 1) << res0_bits;
+	min_hw_granule = ~max_hw_value;
+
+	return min_hw_granule + 1;
+}
+
+static u16 mpam_wa_t241_calc_min_from_max(struct mpam_props *props,
+					  struct mpam_config *cfg)
+{
+	u16 val = 0;
+	u16 max;
+	u16 delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1;
+
+	if (mpam_has_feature(mpam_feat_mbw_max, cfg)) {
+		max = cfg->mbw_max;
+	} else {
+		/* Resetting. Hence, use the ris specific default. */
+		max = GENMASK(15, 16 - props->bwa_wd);
+	}
+
+	if (max > delta)
+		val = max - delta;
+
+	return val;
+}
+
 /* Called via IPI. Call while holding an SRCU reference */
 static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
 				      struct mpam_config *cfg)
 {
 	u32 pri_val = 0;
-	u16 cmax = MPAMCFG_CMAX_CMAX;
 	struct mpam_msc *msc = ris->vmsc->msc;
 	struct mpam_props *rprops = &ris->props;
 	u16 dspri = GENMASK(rprops->dspri_wd, 0);
@@ -1740,26 +1712,25 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
 	}
 
 	if (mpam_has_feature(mpam_feat_cpor_part, rprops) &&
-	    mpam_has_feature(mpam_feat_cpor_part, cfg)) {
-		if (cfg->reset_cpbm)
-			mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM,
-					      rprops->cpbm_wd);
-		else
-			mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
-	}
+	    mpam_has_feature(mpam_feat_cpor_part, cfg))
+		mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
 
 	if (mpam_has_feature(mpam_feat_mbw_part, rprops) &&
-	    mpam_has_feature(mpam_feat_mbw_part, cfg)) {
-		if (cfg->reset_mbw_pbm)
-			mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM,
-					      rprops->mbw_pbm_bits);
-		else
-			mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
-	}
+	    mpam_has_feature(mpam_feat_mbw_part, cfg))
+		mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
+
+	if (mpam_has_feature(mpam_feat_mbw_min, rprops)) {
+		u16 val = 0;
 
-	if (mpam_has_feature(mpam_feat_mbw_min, rprops) &&
-	    mpam_has_feature(mpam_feat_mbw_min, cfg))
-		mpam_write_partsel_reg(msc, MBW_MIN, cfg->mbw_min);
+		if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, msc)) {
+			u16 min = mpam_wa_t241_force_mbw_min_to_one(rprops);
+
+			val = mpam_wa_t241_calc_min_from_max(rprops, cfg);
+			val = max(val, min);
+		}
+
+		mpam_write_partsel_reg(msc, MBW_MIN, val);
+	}
 
 	if (mpam_has_feature(mpam_feat_mbw_max, rprops) &&
 	    mpam_has_feature(mpam_feat_mbw_max, cfg))
@@ -1769,25 +1740,18 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
 	    mpam_has_feature(mpam_feat_mbw_prop, cfg))
 		mpam_write_partsel_reg(msc, MBW_PROP, 0);
 
-	if (mpam_has_feature(mpam_feat_cmax_cmax, rprops)) {
-		if (mpam_has_feature(mpam_feat_cmax_cmax, cfg)) {
-			u32 cmax_val = cfg->cmax;
+	if (mpam_has_feature(mpam_feat_cmax_cmax, rprops) &&
+	    mpam_has_feature(mpam_feat_cmax_cmax, cfg)) {
+		u32 cmax = cfg->cmax;
 
-			if (cfg->cmax_softlim)
-				cmax_val |= MPAMCFG_CMAX_SOFTLIM;
-			mpam_write_partsel_reg(msc, CMAX, cmax_val);
-		} else {
-			mpam_write_partsel_reg(msc, CMAX, cmax);
-		}
+		if (cfg->cmax_softlim)
+			cmax |= MPAMCFG_CMAX_SOFTLIM;
+		mpam_write_partsel_reg(msc, CMAX, cmax);
 	}
 
-	if (mpam_has_feature(mpam_feat_cmax_cmin, rprops)) {
-		if (mpam_has_feature(mpam_feat_cmax_cmin, cfg)) {
-			mpam_write_partsel_reg(msc, CMIN, cfg->cmin);
-		} else {
-			mpam_write_partsel_reg(msc, CMIN, 0);
-		}
-	}
+	if (mpam_has_feature(mpam_feat_cmax_cmin, rprops) &&
+	    mpam_has_feature(mpam_feat_cmax_cmin, cfg))
+		mpam_write_partsel_reg(msc, CMIN, cfg->cmin);
 
 	if (mpam_has_feature(mpam_feat_cmax_cassoc, rprops))
 		mpam_write_partsel_reg(msc, CASSOC, MPAMCFG_CASSOC_CASSOC);
@@ -1910,33 +1874,32 @@ static int mpam_save_mbwu_state(void *arg)
 	return 0;
 }
 
-static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
-{
-	*reset_cfg = (struct mpam_config) {
-		.cpbm = ~0,
-		.mbw_pbm = ~0,
-		.mbw_max = MPAMCFG_MBW_MAX_MAX,
-
-		.reset_cpbm = true,
-		.reset_mbw_pbm = true,
-	};
-	bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST);
-}
-
-/*
- * This is not part of mpam_init_reset_cfg() as high level callers have the
- * class, and low level callers a ris.
- */
-static void mpam_wa_t241_force_mbw_min_to_one(struct mpam_config *cfg,
-					      struct mpam_props *props)
+static void mpam_init_reset_cfg(struct mpam_config *reset_cfg,
+				const struct mpam_props *props)
 {
-	u16 max_hw_value, min_hw_granule, res0_bits;
+	memset(reset_cfg, 0, sizeof(*reset_cfg));
 
-	res0_bits = 16 - props->bwa_wd;
-	max_hw_value = ((1 << props->bwa_wd) - 1) << res0_bits;
-	min_hw_granule = ~max_hw_value;
-
-	cfg->mbw_min = min_hw_granule + 1;
+	/* Set features and explicit default values for controls supported by this RIS. */
+	if (mpam_has_feature(mpam_feat_cpor_part, props)) {
+		mpam_set_feature(mpam_feat_cpor_part, reset_cfg);
+		reset_cfg->cpbm = GENMASK(props->cpbm_wd - 1, 0);
+	}
+	if (mpam_has_feature(mpam_feat_mbw_part, props)) {
+		mpam_set_feature(mpam_feat_mbw_part, reset_cfg);
+		reset_cfg->mbw_pbm = GENMASK(props->mbw_pbm_bits - 1, 0);
+	}
+	if (mpam_has_feature(mpam_feat_mbw_max, props)) {
+		mpam_set_feature(mpam_feat_mbw_max, reset_cfg);
+		reset_cfg->mbw_max = MPAMCFG_MBW_MAX_MAX;
+	}
+	if (mpam_has_feature(mpam_feat_cmax_cmax, props)) {
+		mpam_set_feature(mpam_feat_cmax_cmax, reset_cfg);
+		reset_cfg->cmax = MPAMCFG_CMAX_CMAX;
+	}
+	if (mpam_has_feature(mpam_feat_cmax_cmin, props)) {
+		mpam_set_feature(mpam_feat_cmax_cmin, reset_cfg);
+		reset_cfg->cmin = 0;
+	}
 }
 
 /*
@@ -1948,14 +1911,11 @@ static int mpam_reset_ris(void *arg)
 	struct mpam_config reset_cfg;
 	struct mpam_msc_ris *ris = arg;
 	struct reprogram_ris reprogram_arg;
-	struct mpam_msc *msc = ris->vmsc->msc;
 
 	if (ris->in_reset_state)
 		return 0;
 
-	mpam_init_reset_cfg(&reset_cfg);
-	if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, msc))
-		mpam_wa_t241_force_mbw_min_to_one(&reset_cfg, &ris->props);
+	mpam_init_reset_cfg(&reset_cfg, &ris->props);
 
 	reprogram_arg.ris = ris;
 	reprogram_arg.cfg = &reset_cfg;
@@ -2759,6 +2719,9 @@ static void mpam_enable_merge_class_features(struct mpam_component *comp)
 
 	list_for_each_entry(vmsc, &comp->vmsc, comp_list)
 		__class_props_mismatch(class, vmsc);
+
+	if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class))
+		mpam_clear_feature(mpam_feat_mbw_min, &class->props);
 }
 
 /*
@@ -2855,6 +2818,12 @@ static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
 			   msc->id, mpam_errcode_names[errcode], partid, pmg,
 			   ris);
 
+	/* No action is required for the MPAM programming errors */
+	if ((errcode != MPAM_ERRCODE_REQ_PARTID_RANGE) &&
+	    (errcode != MPAM_ERRCODE_REQ_PMG_RANGE)) {
+		return IRQ_HANDLED;
+	}
+
 	/* Disable this interrupt. */
 	mpam_disable_msc_ecr(msc);
 
@@ -2988,7 +2957,7 @@ static void __destroy_component_cfg(struct mpam_component *comp)
 static void mpam_reset_component_cfg(struct mpam_component *comp)
 {
 	int i;
-	struct mpam_class *class = comp->class;
+	struct mpam_props *cprops = &comp->class->props;
 
 	mpam_assert_partid_sizes_fixed();
 
@@ -2996,10 +2965,22 @@ static void mpam_reset_component_cfg(struct mpam_component *comp)
 		return;
 
 	for (i = 0; i < mpam_partid_max + 1; i++) {
-		mpam_init_reset_cfg(&comp->cfg[i]);
-		if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class))
-			mpam_wa_t241_force_mbw_min_to_one(&comp->cfg[i],
-							  &class->props);
+		if (cprops->cpbm_wd) {
+			comp->cfg[i].cpbm = GENMASK(cprops->cpbm_wd - 1, 0);
+			mpam_set_feature(mpam_feat_cpor_part, &comp->cfg[i]);
+		}
+		if (cprops->mbw_pbm_bits) {
+			comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0);
+			mpam_set_feature(mpam_feat_mbw_part, &comp->cfg[i]);
+		}
+		if (cprops->bwa_wd) {
+			comp->cfg[i].mbw_max = MPAMCFG_MBW_MAX_MAX;
+			mpam_set_feature(mpam_feat_mbw_max, &comp->cfg[i]);
+		}
+		if (cprops->cmax_wd) {
+			comp->cfg[i].cmax = MPAMCFG_CMAX_CMAX;
+			mpam_set_feature(mpam_feat_cmax_cmax, &comp->cfg[i]);
+		}
 	}
 }
 
@@ -3433,18 +3414,6 @@ static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg
 	u16 min, min_hw_granule, delta;
 	u16 max_hw_value, res0_bits;
 
-	/*
-	 * Calculate the values the 'min' control can hold.
-	 * e.g. on a platform with bwa_wd = 8, min_hw_granule is 0x00ff because
-	 * those bits are RES0. Configurations of this value are effectively
-	 * zero. But configurations need to saturate at min_hw_granule on
-	 * systems with mismatched bwa_wd, where the 'less than 0' values are
-	 * implemented on some MSC, but not others.
-	 */
-	res0_bits = 16 - cprops->bwa_wd;
-	max_hw_value = ((1 << cprops->bwa_wd) - 1) << res0_bits;
-	min_hw_granule = ~max_hw_value;
-
 	/*
 	 * MAX and MIN should be set together. If only one is provided,
 	 * generate a configuration for the other. If only one control
@@ -3454,6 +3423,19 @@ static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg
 	 */
 	if (mpam_has_feature(mpam_feat_mbw_max, cfg) &&
 	    !mpam_has_feature(mpam_feat_mbw_min, cfg)) {
+		/*
+		 * Calculate the values the 'min' control can hold.
+		 * e.g. on a platform with bwa_wd = 8, min_hw_granule is 0x00ff
+		 * because those bits are RES0. Configurations of this value
+		 * are effectively zero. But configurations need to saturate
+		 * at min_hw_granule on systems with mismatched bwa_wd, where
+		 * the 'less than 0' values are implemented on some MSC, but
+		 * not others.
+		 */
+		res0_bits = 16 - cprops->bwa_wd;
+		max_hw_value = ((1 << cprops->bwa_wd) - 1) << res0_bits;
+		min_hw_granule = ~max_hw_value;
+
 		delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1;
 		if (cfg->mbw_max > delta)
 			min = cfg->mbw_max - delta;
@@ -3463,12 +3445,6 @@ static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg
 		cfg->mbw_min = max(min, min_hw_granule);
 		mpam_set_feature(mpam_feat_mbw_min, cfg);
 	}
-
-	if (mpam_has_quirk(T241_FORCE_MBW_MIN_TO_ONE, class) &&
-	    cfg->mbw_min <= min_hw_granule) {
-		cfg->mbw_min = min_hw_granule + 1;
-		mpam_set_feature(mpam_feat_mbw_min, cfg);
-	}
 }
 
 int mpam_apply_config(struct mpam_component *comp, u16 partid,
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index c2cb5129e3e21..0206fa67be21e 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -45,12 +45,6 @@ DECLARE_STATIC_KEY_FALSE(mpam_enabled);
  */
 #define USE_PRE_ALLOCATED      (U16_MAX + 1)
 
-/*
- * Only these event configuration bits are supported. MPAM can't know if
- * data is being written back, these will show up as a write.
- */
-#define MPAM_RESTRL_EVT_CONFIG_VALID	(READS_TO_LOCAL_MEM | NON_TEMP_WRITE_TO_LOCAL_MEM)
-
 static inline bool mpam_is_enabled(void)
 {
 	return static_branch_likely(&mpam_enabled);
@@ -369,9 +363,6 @@ struct mpam_config {
 
 	bool	cmax_softlim;
 
-	bool	reset_cpbm;
-	bool	reset_mbw_pbm;
-
 	struct mpam_garbage	garbage;
 };
 
@@ -455,9 +446,7 @@ struct mpam_resctrl_dom {
 	struct mpam_component   *mon_comp[QOS_NUM_EVENTS];
 
 	struct rdt_ctrl_domain	resctrl_ctrl_dom;
-	struct rdt_mon_domain	resctrl_mon_dom;
-
-	u32			mbm_local_evt_cfg;
+	struct rdt_l3_mon_domain	resctrl_mon_dom;
 };
 
 struct mpam_resctrl_res {
@@ -541,7 +530,6 @@ int mpam_apply_config(struct mpam_component *comp, u16 partid,
 int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 		    enum mpam_device_features, u64 *val);
 void mpam_msmon_reset_mbwu(struct mpam_component *comp, struct mon_cfg *ctx);
-void mpam_msmon_reset_all_mbwu(struct mpam_component *comp);
 
 int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
 				   cpumask_t *affinity);
@@ -554,13 +542,13 @@ void mpam_pcc_rx_callback(struct mbox_client *cl, void *msg);
 int mpam_resctrl_setup(void);
 void mpam_resctrl_exit(void);
 int mpam_resctrl_online_cpu(unsigned int cpu);
-int mpam_resctrl_offline_cpu(unsigned int cpu);
+void mpam_resctrl_offline_cpu(unsigned int cpu);
 void mpam_resctrl_teardown_class(struct mpam_class *class);
 #else
 static inline int mpam_resctrl_setup(void) { return 0; }
 static inline void mpam_resctrl_exit(void) { }
 static inline int mpam_resctrl_online_cpu(unsigned int cpu) { return 0; }
-static inline int mpam_resctrl_offline_cpu(unsigned int cpu) { return 0; }
+static inline void mpam_resctrl_offline_cpu(unsigned int cpu) { }
 static inline void mpam_resctrl_teardown_class(struct mpam_class *class) { }
 #endif /* CONFIG_RESCTRL_FS */
 
diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index 8e87afa90656a..0accede8cc09c 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -12,9 +12,6 @@
 #include <linux/limits.h>
 #include <linux/list.h>
 #include <linux/math.h>
-#include <linux/memory.h>
-#include <linux/memory_hotplug.h>
-#include <linux/node.h>
 #include <linux/printk.h>
 #include <linux/rculist.h>
 #include <linux/resctrl.h>
@@ -35,6 +32,11 @@ DECLARE_WAIT_QUEUE_HEAD(resctrl_mon_ctx_waiters);
  */
 static struct mpam_resctrl_res mpam_resctrl_controls[RDT_NUM_RESOURCES];
 
+#define for_each_mpam_resctrl_control(res, rid)					\
+	for (rid = 0, res = &mpam_resctrl_controls[rid];			\
+	     rid < RDT_NUM_RESOURCES;						\
+	     rid++, res = &mpam_resctrl_controls[rid])
+
 /* The lock for modifying resctrl's domain lists from cpuhp callbacks. */
 static DEFINE_MUTEX(domain_list_lock);
 
@@ -46,10 +48,13 @@ static DEFINE_MUTEX(domain_list_lock);
  * make use of them, we pretend they are on L3.
  * Class pointer may be NULL.
  */
-static struct mpam_resctrl_mon mpam_resctrl_counters[QOS_NUM_EVENTS];
+#define MPAM_MAX_EVENT QOS_L3_MBM_LOCAL_EVENT_ID
+static struct mpam_resctrl_mon mpam_resctrl_counters[MPAM_MAX_EVENT + 1];
 
-static bool exposed_alloc_capable;
-static bool exposed_mon_capable;
+#define for_each_mpam_resctrl_mon(mon, eventid)					\
+	for (eventid = QOS_FIRST_EVENT, mon = &mpam_resctrl_counters[eventid];	\
+	     eventid <= MPAM_MAX_EVENT;						\
+	     eventid++, mon = &mpam_resctrl_counters[eventid])
 
 /*
  * MPAM emulates CDP by setting different PARTID in the I/D fields of MPAM0_EL1.
@@ -57,14 +62,6 @@ static bool exposed_mon_capable;
  */
 static bool cdp_enabled;
 
-/*
- * To support CPU-less NUMA nodes, user-space needs to opt in to the MB
- * domain IDs being the NUMA nid instead of the corresponding CPU's L3
- * cache-id.
- */
-static bool mb_uses_numa_nid;
-static bool mb_numa_nid_possible;
-static bool mb_l3_cache_id_possible;
 /*
  * If resctrl_init() succeeded, resctrl_exit() can be used to remove support
  * for the filesystem in the event of an error.
@@ -103,12 +100,49 @@ static bool mpam_resctrl_abmc_enabled(void)
 
 bool resctrl_arch_alloc_capable(void)
 {
-	return exposed_alloc_capable;
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	for_each_mpam_resctrl_control(res, rid) {
+		if (res->resctrl_res.alloc_capable)
+			return true;
+	}
+
+	return false;
 }
 
 bool resctrl_arch_mon_capable(void)
 {
-	return exposed_mon_capable;
+	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
+	struct rdt_resource *l3 = &res->resctrl_res;
+
+	/* All monitors are presented as being on the L3 cache */
+	return l3->mon_capable;
+}
+
+/*
+ * Provide empty implementations for compilation. The feature are not
+ * needed on MPAM platforms.
+ */
+bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
+{
+	return false;
+}
+
+void resctrl_arch_mon_event_config_read(void *info)
+{
+}
+
+void resctrl_arch_mon_event_config_write(void *info)
+{
+}
+
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
+{
+}
+
+void resctrl_arch_pre_mount(void)
+{
 }
 
 bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level rid)
@@ -145,18 +179,48 @@ static void resctrl_reset_task_closids(void)
 	read_unlock(&tasklist_lock);
 }
 
-static void mpam_resctrl_monitor_sync_abmc_vals(struct rdt_resource *l3)
+static struct mpam_resctrl_mon *mpam_resctrl_mon_from_res(struct mpam_resctrl_res *res)
+{
+	struct mpam_resctrl_mon *mon;
+	enum resctrl_event_id eventid;
+
+	if (!res->class)
+		return NULL;
+
+	for_each_mpam_resctrl_mon(mon, eventid) {
+		if (mon->class == res->class)
+			return mon;
+	}
+	return NULL;
+}
+
+static struct mpam_resctrl_res *mpam_resctrl_res_from_mon(struct mpam_resctrl_mon *mon)
 {
-	l3->mon.num_mbm_cntrs = l3_num_allocated_mbwu;
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+
+	if (!mon->class)
+		return NULL;
+
+	for_each_mpam_resctrl_control(res, rid) {
+		if (res->class == mon->class)
+			return res;
+	}
+	return NULL;
+}
+
+static void mpam_resctrl_monitor_sync_abmc_vals(struct rdt_resource *r)
+{
+	r->mon.num_mbm_cntrs = l3_num_allocated_mbwu;
 	if (cdp_enabled)
-		l3->mon.num_mbm_cntrs /= 2;
+		r->mon.num_mbm_cntrs /= 2;
 
-	if (l3->mon.num_mbm_cntrs) {
-		l3->mon.mbm_cntr_assignable = mpam_resctrl_abmc_enabled();
-		l3->mon.mbm_assign_on_mkdir = mpam_resctrl_abmc_enabled();
+	if (r->mon.num_mbm_cntrs) {
+		r->mon.mbm_cntr_assignable = mpam_resctrl_abmc_enabled();
+		r->mon.mbm_assign_on_mkdir = mpam_resctrl_abmc_enabled();
 	} else {
-		l3->mon.mbm_cntr_assignable = false;
-		l3->mon.mbm_assign_on_mkdir = false;
+		r->mon.mbm_cntr_assignable = false;
+		r->mon.mbm_assign_on_mkdir = false;
 	}
 }
 
@@ -466,7 +530,7 @@ bool resctrl_arch_mon_can_overflow(void)
 
 static int
 __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
-	   enum mpam_device_features mon_type, enum mon_filter_options mon_opts,
+	   enum mpam_device_features mon_type,
 	   int mon_idx,
 	   enum resctrl_conf_type cdp_type, u32 closid, u32 rmid, u64 *val)
 {
@@ -495,7 +559,6 @@ __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
 	cfg.match_pmg = true;
 	cfg.partid = closid;
 	cfg.pmg = rmid;
-	cfg.opts = mon_opts;
 
 	if (irqs_disabled()) {
 		/* Check if we can access this domain without an IPI */
@@ -506,49 +569,36 @@ __read_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
 }
 
 static int read_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
-			     enum mpam_device_features mon_type, enum mon_filter_options mon_opts,
+			     enum mpam_device_features mon_type,
 			     int mon_idx, u32 closid, u32 rmid, u64 *val)
 {
 	if (cdp_enabled) {
 		u64 cdp_val = 0;
 		int err;
 
-		err = __read_mon(mon, mon_comp, mon_type, mon_opts, mon_idx,
+		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
 				 CDP_CODE, closid, rmid, &cdp_val);
 		if (err)
 			return err;
 
-		err = __read_mon(mon, mon_comp, mon_type, mon_opts, mon_idx,
+		err = __read_mon(mon, mon_comp, mon_type, mon_idx,
 				 CDP_DATA, closid, rmid, &cdp_val);
 		if (!err)
 			*val += cdp_val;
 		return err;
 	}
 
-	return __read_mon(mon, mon_comp, mon_type, mon_idx, mon_opts,
+	return __read_mon(mon, mon_comp, mon_type, mon_idx,
 			  CDP_NONE, closid, rmid, val);
 }
 
-static enum mon_filter_options resctrl_evt_config_to_mpam(u32 local_evt_cfg)
-{
-	switch (local_evt_cfg) {
-	case READS_TO_LOCAL_MEM:
-		return COUNT_READ;
-	case NON_TEMP_WRITE_TO_LOCAL_MEM:
-		return COUNT_WRITE;
-	default:
-		return COUNT_BOTH;
-	}
-}
-
 /* MBWU when not in ABMC mode, and CSU counters. */
-int resctrl_arch_rmid_read(struct rdt_resource	*r, struct rdt_mon_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource	*r, struct rdt_domain_hdr *hdr,
 			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *arch_mon_ctx)
+			   void *arch_priv, u64 *val, void *arch_mon_ctx)
 {
 	struct mpam_resctrl_dom *l3_dom;
 	struct mpam_component *mon_comp;
-	enum mon_filter_options mon_opts;
 	u32 mon_idx = *(u32 *)arch_mon_ctx;
 	enum mpam_device_features mon_type;
 	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
@@ -561,9 +611,8 @@ int resctrl_arch_rmid_read(struct rdt_resource	*r, struct rdt_mon_domain *d,
 	if (eventid >= QOS_NUM_EVENTS || !mon->class)
 		return -EINVAL;
 
-	l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom);
+	l3_dom = container_of(hdr, struct mpam_resctrl_dom, resctrl_mon_dom.hdr);
 	mon_comp = l3_dom->mon_comp[eventid];
-	mon_opts = resctrl_evt_config_to_mpam(l3_dom->mbm_local_evt_cfg);
 
 	switch (eventid) {
 	case QOS_L3_OCCUP_EVENT_ID:
@@ -577,17 +626,16 @@ int resctrl_arch_rmid_read(struct rdt_resource	*r, struct rdt_mon_domain *d,
 		return -EINVAL;
 	}
 
-	return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_opts, mon_idx,
+	return read_mon_cdp_safe(mon, mon_comp, mon_type, mon_idx,
 				 closid, rmid, val);
 }
 
 /* MBWU counters when in ABMC mode */
-int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			   u32 closid, u32 rmid, int mon_idx,
 			   enum resctrl_event_id eventid, u64 *val)
 {
 	struct mpam_resctrl_mon *mon = &mpam_resctrl_counters[eventid];
-	enum mon_filter_options mon_opts;
 	struct mpam_resctrl_dom *l3_dom;
 	struct mpam_component *mon_comp;
 
@@ -599,10 +647,9 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
 
 	l3_dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom);
 	mon_comp = l3_dom->mon_comp[eventid];
-	mon_opts = resctrl_evt_config_to_mpam(l3_dom->mbm_local_evt_cfg);
 
 	return read_mon_cdp_safe(mon, mon_comp, mpam_feat_msmon_mbwu, mon_idx,
-				 mon_opts, closid, rmid, val);
+				 closid, rmid, val);
 }
 
 static void __reset_mon(struct mpam_resctrl_mon *mon, struct mpam_component *mon_comp,
@@ -640,7 +687,7 @@ static void reset_mon_cdp_safe(struct mpam_resctrl_mon *mon, struct mpam_compone
 }
 
 /* Called via IPI. Call with read_cpus_lock() held. */
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 closid, u32 rmid, enum resctrl_event_id eventid)
 {
 	struct mpam_resctrl_dom *l3_dom;
@@ -661,7 +708,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
 }
 
 /* Reset an assigned counter */
-void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 closid, u32 rmid, int cntr_id,
 			     enum resctrl_event_id eventid)
 {
@@ -843,14 +890,7 @@ static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops)
  */
 static u32 fract16_to_percent(u16 fract, u8 wd)
 {
-	u32 val = fract;
-
-	val >>= 16 - wd;
-	val += 1;
-	val *= MAX_MBA_BW;
-	val = DIV_ROUND_CLOSEST(val, 1 << wd);
-
-	return val;
+	return DIV_ROUND_CLOSEST((fract + 1) * 100, 65536);
 }
 
 /*
@@ -865,14 +905,7 @@ static u32 fract16_to_percent(u16 fract, u8 wd)
  */
 static u16 percent_to_fract16(u8 pc, u8 wd)
 {
-	u32 val = pc;
-
-	val <<= wd;
-	val = DIV_ROUND_CLOSEST(val, MAX_MBA_BW);
-	val = max(val, 1) - 1;
-	val <<= 16 - wd;
-
-	return val;
+	return pc ? (((pc * 65536) / 100) - 1) : 0;
 }
 
 static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
@@ -942,7 +975,8 @@ static bool topology_matches_l3(struct mpam_class *victim)
 {
 	int cpu, err;
 	struct mpam_component *victim_iter;
-	cpumask_var_t __free(free_cpumask_var) tmp_cpumask;
+	bool matched_once = false;
+	cpumask_var_t __free(free_cpumask_var) tmp_cpumask = NULL;
 
 	if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL))
 		return false;
@@ -956,7 +990,10 @@ static bool topology_matches_l3(struct mpam_class *victim)
 			return false;
 		}
 
-		cpu = cpumask_any(&victim_iter->affinity);
+		cpu = cpumask_any_and(&victim_iter->affinity, cpu_online_mask);
+		if (matched_once && (cpu >= nr_cpu_ids))
+			continue;
+
 		if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
 			return false;
 
@@ -978,18 +1015,63 @@ static bool topology_matches_l3(struct mpam_class *victim)
 
 			return false;
 		}
+		matched_once = true;
 	}
 
 	return true;
 }
 
-static bool topology_matches_numa(struct mpam_class *victim)
+/*
+ * Test if the traffic for a class matches that at egress from the L3. For
+ * MSC at memory controllers this is only possible if there is a single L3
+ * as otherwise the counters at the memory can include bandwidth from the
+ * non-local L3.
+ */
+static bool traffic_matches_l3(struct mpam_class *class)
 {
-	/*
-	 * For now, check this is a memory class, in which case component
-	 * id are already NUMA nid.
-	 */
-	return (victim->type == MPAM_CLASS_MEMORY);
+	int err, cpu;
+
+	lockdep_assert_cpus_held();
+
+	if (class->type == MPAM_CLASS_CACHE && class->level == 3)
+		return true;
+
+	if (class->type == MPAM_CLASS_CACHE && class->level != 3) {
+		pr_debug("class %u is a different cache from L3\n", class->level);
+		return false;
+	}
+
+	cpumask_var_t __free(free_cpumask_var) tmp_cpumask = NULL;
+	if (!alloc_cpumask_var(&tmp_cpumask, GFP_KERNEL)) {
+		pr_debug("cpumask allocation failed\n");
+		return false;
+	}
+
+	if (class->type != MPAM_CLASS_MEMORY) {
+		pr_debug("class %u is neither of type cache or memory\n",
+			 class->level);
+		return false;
+	}
+
+	cpu = cpumask_any_and(&class->affinity, cpu_online_mask);
+	err = find_l3_equivalent_bitmask(cpu, tmp_cpumask);
+	if (err) {
+		pr_debug("Failed to find L3 downstream to cpu %d\n", cpu);
+		return false;
+	}
+
+	if (!cpumask_equal(tmp_cpumask, cpu_possible_mask)) {
+		pr_debug("There is more than one L3\n");
+		return false;
+	}
+
+	/* Be strict; the traffic might stop in the intermediate cache. */
+	if (get_cpu_cacheinfo_id(cpu, 4) != -1) {
+		pr_debug("L3 isn't the last level of cache\n");
+		return false;
+	}
+
+	return true;
 }
 
 /* Test whether we can export MPAM_CLASS_CACHE:{2,3}? */
@@ -1036,7 +1118,6 @@ static void mpam_resctrl_pick_caches(void)
 			else
 				res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
 			res->class = class;
-			exposed_alloc_capable = true;
 		}
 		if (has_cmax) {
 			pr_debug("pick_caches: Class has CMAX\n");
@@ -1045,7 +1126,6 @@ static void mpam_resctrl_pick_caches(void)
 			else
 				res = &mpam_resctrl_controls[RDT_RESOURCE_L3_MAX];
 			res->class = class;
-			exposed_alloc_capable = true;
 		}
 	}
 }
@@ -1061,8 +1141,6 @@ static void mpam_resctrl_pick_mba(void)
 	list_for_each_entry_srcu(class, &mpam_classes, classes_list,
 				 srcu_read_lock_held(&mpam_srcu)) {
 		struct mpam_props *cprops = &class->props;
-		bool l3_cache_id_possible = false;
-		bool numa_nid_possible = false;
 
 		if (class->level < 3) {
 			pr_debug("class %u is before L3\n", class->level);
@@ -1079,18 +1157,16 @@ static void mpam_resctrl_pick_mba(void)
 			continue;
 		}
 
-		if (topology_matches_numa(class)) {
-			pr_debug("class %u topology matches NUMA domains\n", class->level);
-			numa_nid_possible = true;
-		}
-
-		if (topology_matches_l3(class)) {
-			pr_debug("class %u topology matches L3\n", class->level);
-			l3_cache_id_possible = true;
+		if ((class->level == 3) && (!topology_matches_l3(class))) {
+			pr_debug("class %u topology doesn't match L3\n", class->level);
+			continue;
 		}
 
-		if (!l3_cache_id_possible && !numa_nid_possible) {
-			pr_debug("class %u has no matching topology for MB\n", class->level);
+		/* Check memory at egress from L3 for MSC with L3 */
+		if (!cpumask_equal(&class->affinity, cpu_possible_mask) &&
+		    !traffic_matches_l3(class)) {
+			pr_debug("class %u traffic doesn't match L3 egress\n",
+				 class->level);
 			continue;
 		}
 
@@ -1099,24 +1175,14 @@ static void mpam_resctrl_pick_mba(void)
 		 * mbm_local is implicitly part of the L3, pick a resource to be MBA
 		 * that as close as possible to the L3.
 		 */
-		if (!candidate_class || class->level < candidate_class->level) {
-			/*
-			 * Refuse to pick a closer class if it would prevent cache-id
-			 * being used as domain-id by default.
-			 */
-			if (!candidate_class || l3_cache_id_possible) {
-				candidate_class = class;
-				mb_l3_cache_id_possible = l3_cache_id_possible;
-				mb_numa_nid_possible = numa_nid_possible;
-			}
-		}
+		if (!candidate_class || class->level < candidate_class->level)
+			candidate_class = class;
 	}
 
 	if (candidate_class) {
 		pr_debug("selected class %u to back MBA\n", candidate_class->level);
 		res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
 		res->class = candidate_class;
-		exposed_alloc_capable = true;
 	}
 }
 
@@ -1188,7 +1254,6 @@ static void counter_update_class(enum resctrl_event_id evt_id,
 
 	pr_debug("Updating event %u to use class %u\n", evt_id, class->level);
 	mon->class = class;
-	exposed_mon_capable = true;
 
 	if (evt_id == QOS_L3_OCCUP_EVENT_ID)
 		return;
@@ -1272,7 +1337,10 @@ static void mpam_resctrl_pick_counters(void)
 		}
 
 		has_mbwu = class_has_usable_mbwu(class);
-		if (has_mbwu && topology_matches_l3(class)) {
+		if (has_mbwu &&
+		    ((class->type == MPAM_CLASS_MEMORY) ||
+		    (topology_matches_l3(class) &&
+		    traffic_matches_l3(class)))) {
 			pr_debug("class %u has usable MBWU, and matches L3 topology", class->level);
 
 			/*
@@ -1303,82 +1371,6 @@ static void mpam_resctrl_pick_counters(void)
 			     mpam_resctrl_counters[QOS_L3_MBM_TOTAL_EVENT_ID].class);
 }
 
-bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt)
-{
-	struct mpam_class *class;
-	struct mpam_props *cprops;
-
-	class = mpam_resctrl_counters[evt].class;
-	if (!class)
-		return false;
-
-	cprops = &class->props;
-
-	return mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, cprops);
-}
-
-void resctrl_arch_mon_event_config_read(void *info)
-{
-	struct mpam_resctrl_dom *dom;
-	struct resctrl_mon_config_info *mon_info = info;
-
-	if (!mpam_is_enabled()) {
-		mon_info->mon_config = 0;
-		return;
-	}
-
-	dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_mon_dom);
-	mon_info->mon_config = dom->mbm_local_evt_cfg & MAX_EVT_CONFIG_BITS;
-}
-
-void resctrl_arch_mon_event_config_write(void *info)
-{
-	struct mpam_resctrl_dom *dom;
-	struct resctrl_mon_config_info *mon_info = info;
-
-	WARN_ON_ONCE(mon_info->mon_config & ~MPAM_RESTRL_EVT_CONFIG_VALID);
-
-	dom = container_of(mon_info->d, struct mpam_resctrl_dom, resctrl_mon_dom);
-
-	if (!mpam_is_enabled()) {
-		dom->mbm_local_evt_cfg = 0;
-		return;
-	}
-
-	dom->mbm_local_evt_cfg = mon_info->mon_config & MPAM_RESTRL_EVT_CONFIG_VALID;
-}
-
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
-{
-	int i;
-	struct mpam_resctrl_dom *dom;
-	struct mpam_resctrl_mon *mon;
-	struct mpam_component *mon_comp;
-
-	dom = container_of(d, struct mpam_resctrl_dom, resctrl_mon_dom);
-	if (!mpam_is_enabled()) {
-		dom->mbm_local_evt_cfg = 0;
-		return;
-	}
-	dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
-
-	/*
-	 * Monitors may be backed by different classes of MSC, all
-	 * possible components need to be reset...
-	 */
-	for (i = 0; i < QOS_NUM_EVENTS; i++) {
-		mon = &mpam_resctrl_counters[i];
-		if (!mon->class)
-			continue;       // dummy resource
-
-		mon_comp = dom->mon_comp[i];
-		if (!mon_comp)
-			continue;
-
-		mpam_msmon_reset_all_mbwu(mon_comp);
-	}
-}
-
 static void __config_cntr(struct mpam_resctrl_mon *mon, u32 cntr_id,
 			  enum resctrl_conf_type cdp_type, u32 closid, u32 rmid,
 			  bool assign)
@@ -1395,7 +1387,7 @@ static void __config_cntr(struct mpam_resctrl_mon *mon, u32 cntr_id,
 		mon->mbwu_idx_to_mon[mbwu_idx] = -1;
 }
 
-void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
 			      u32 cntr_id, bool assign)
 {
@@ -1418,10 +1410,16 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
 
 bool resctrl_arch_mbm_cntr_assign_enabled(struct rdt_resource *r)
 {
-	if (r != &mpam_resctrl_controls[RDT_RESOURCE_L3].resctrl_res)
+	struct mpam_resctrl_res *res;
+	struct mpam_resctrl_mon *mon;
+
+	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
+
+	mon = mpam_resctrl_mon_from_res(res);
+	if (!mon)
 		return false;
 
-	return mpam_resctrl_abmc_enabled();
+	return mon->assigned_counters ? true : false;
 }
 
 int resctrl_arch_mbm_cntr_assign_set(struct rdt_resource *r, bool enable)
@@ -1486,13 +1484,9 @@ static int mpam_resctrl_control_init(struct mpam_resctrl_res *res,
 
 		break;
 	case RDT_RESOURCE_MBA:
-		/* Domain ID is the L3 cache-id by default */
-		if (mb_l3_cache_id_possible)
-			r->alloc_capable = true;
-
+		r->alloc_capable = true;
 		r->schema_fmt = RESCTRL_SCHEMA_PERCENT;
 		r->ctrl_scope = RESCTRL_L3_CACHE;
-
 		r->mba.delay_linear = true;
 		r->mba.throttle_mode = THREAD_THROTTLE_UNDEFINED;
 		r->membw.min_bw = get_mba_min(cprops);
@@ -1511,15 +1505,12 @@ static int mpam_resctrl_control_init(struct mpam_resctrl_res *res,
 
 static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
 {
-	bool is_mb;
 	struct mpam_class *class = comp->class;
 
-	is_mb = (mpam_resctrl_controls[RDT_RESOURCE_MBA].class == class);
-
-	if (is_mb && mb_uses_numa_nid && topology_matches_numa(class))
+	if (class->type == MPAM_CLASS_CACHE)
 		return comp->comp_id;
 
-	if (class->type == MPAM_CLASS_CACHE)
+	if ((class->type == MPAM_CLASS_MEMORY) && (class->level > 3))
 		return comp->comp_id;
 
 	if (topology_matches_l3(class)) {
@@ -1545,10 +1536,10 @@ static int mpam_resctrl_pick_domain_id(int cpu, struct mpam_component *comp)
  */
 static int mpam_resctrl_monitor_init_abmc(struct mpam_resctrl_mon *mon)
 {
-	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
+	struct mpam_resctrl_res *res = mpam_resctrl_res_from_mon(mon);
 	size_t array_size = resctrl_arch_system_num_rmid_idx() * sizeof(int);
 	int *rmid_array __free(kfree) = kmalloc(array_size, GFP_KERNEL);
-	struct rdt_resource *l3 = &res->resctrl_res;
+	struct rdt_resource *r = &res->resctrl_res;
 	struct mpam_class *class = mon->class;
 	u16 num_mbwu_mon;
 
@@ -1569,70 +1560,33 @@ static int mpam_resctrl_monitor_init_abmc(struct mpam_resctrl_mon *mon)
 		return PTR_ERR(mon->assigned_counters);
 	mon->mbwu_idx_to_mon = no_free_ptr(rmid_array);
 
-	mpam_resctrl_monitor_sync_abmc_vals(l3);
+	mpam_resctrl_monitor_sync_abmc_vals(r);
 
 	return 0;
 }
 
-bool resctrl_arch_get_mb_uses_numa_nid(void)
-{
-	return mb_uses_numa_nid;
-}
-
-int resctrl_arch_set_mb_uses_numa_nid(bool enabled)
+static int mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
+				     enum resctrl_event_id type)
 {
-	struct rdt_resource *r;
 	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-	struct rdt_ctrl_domain *ctrl_d;
-
-	lockdep_assert_cpus_held();
-	lockdep_assert_mems_held();
-
-	if (!mb_numa_nid_possible)
-		return -EOPNOTSUPP;
-
-	if (mb_uses_numa_nid == enabled)
-		return 0;
-
-	/* Domain IDs as NUMA nid is only defined for MBA */
-	res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
-	if (!res->class)
-		return -EOPNOTSUPP;
-	r = &res->resctrl_res;
-
-	/* repaint the domain IDs */
-	mb_uses_numa_nid = enabled;
-	list_for_each_entry(ctrl_d, &r->ctrl_domains, hdr.list) {
-		int cpu = cpumask_any(&ctrl_d->hdr.cpu_mask);
-
-		dom = container_of(ctrl_d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
-		ctrl_d->hdr.id = mpam_resctrl_pick_domain_id(cpu, dom->ctrl_comp);
-	}
-
-	/* monitor domains are unaffected and should continue to use the L3 */
+	struct rdt_resource *r;
 
-	if (!enabled && mb_l3_cache_id_possible)
-		r->alloc_capable = true;
-	else if (enabled && mb_numa_nid_possible)
-		r->alloc_capable = true;
+	if ((mon->class->type == MPAM_CLASS_MEMORY) && (mon->class->level > 3))
+		res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
 	else
-		r->alloc_capable = false;
-
-	return 0;
-}
+		res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
 
-static void mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
-				      enum resctrl_event_id type)
-{
-	struct mpam_resctrl_res *res = &mpam_resctrl_controls[RDT_RESOURCE_L3];
-	struct rdt_resource *l3 = &res->resctrl_res;
+	r = &res->resctrl_res;
 
 	lockdep_assert_cpus_held();
 
-	/* There also needs to be an L3 cache present */
+	/*
+	 * There also needs to be an L3 cache present.
+	 * The check just requires any online CPU and it can't go offline as we
+	 * hold the cpu lock.
+	 */
 	if (get_cpu_cacheinfo_id(smp_processor_id(), 3) == -1)
-		return;
+		return 0;
 
 	/*
 	 * If there are no MPAM resources on L3, force it into existence.
@@ -1644,42 +1598,43 @@ static void mpam_resctrl_monitor_init(struct mpam_resctrl_mon *mon,
 		res->class = mpam_resctrl_counters[type].class;
 	}
 
-	/* Called multiple times!, once per event type */
-	if (exposed_mon_capable) {
-		l3->mon_capable = true;
-
-		/* Setting name is necessary on monitor only platforms */
-		l3->name = "L3";
-		l3->mon_scope = RESCTRL_L3_CACHE;
+	/*
+	 * Called multiple times!, once per event type that has a
+	 * monitoring class.
+	 * Setting name is necessary on monitor only platforms.
+	 */
+	if ((mon->class->type == MPAM_CLASS_MEMORY) && (mon->class->level > 3)) {
+		r->name = "MB";
+	} else {
+		r->name = "L3";
+	}
+	r->mon_scope = RESCTRL_L3_CACHE;
 
-		resctrl_enable_mon_event(type);
+	/*
+	 * num-rmid is the upper bound for the number of monitoring
+	 * groups that can exist simultaneously, including the
+	 * default monitoring group for each control group. Hence,
+	 * advertise the whole rmid_idx space even though each
+	 * control group has its own pmg/rmid space. Unfortunately,
+	 * this does mean userspace needs to know the architecture
+	 * to correctly interpret this value.
+	 */
+	r->mon.num_rmid = resctrl_arch_system_num_rmid_idx();
 
-		/*
-		 * Unfortunately, num_rmid doesn't mean anything for
-		 * mpam, and its exposed to user-space!
-		 *
-		 * num-rmid is supposed to mean the minimum number of
-		 * monitoring groups that can exist simultaneously, including
-		 * the default monitoring group for each control group.
-		 *
-		 * For mpam, each control group has its own pmg/rmid space, so
-		 * it is not appropriate to advertise the whole rmid_idx space
-		 * here.  But the pmgs corresponding to the parent control
-		 * group can be allocated freely:
-		 */
-		l3->mon.num_rmid = mpam_pmg_max + 1;;
+	if (resctrl_enable_mon_event(type, false, 0, NULL))
+		r->mon_capable = true;
 
-		switch (type) {
-		case QOS_L3_MBM_LOCAL_EVENT_ID:
-		case QOS_L3_MBM_TOTAL_EVENT_ID:
-			mpam_resctrl_monitor_init_abmc(mon);
-			l3->mon.mbm_cfg_mask = MPAM_RESTRL_EVT_CONFIG_VALID;
+	switch (type) {
+	case QOS_L3_MBM_LOCAL_EVENT_ID:
+	case QOS_L3_MBM_TOTAL_EVENT_ID:
+		mpam_resctrl_monitor_init_abmc(mon);
 
-			return;
-		default:
-			return;
-		}
+		return 0;
+	default:
+		return 0;
 	}
+
+	return 0;
 }
 
 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
@@ -1881,46 +1836,40 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r)
 	mpam_reset_class_locked(res->class);
 }
 
-/**
- * mpam_resctrl_domain_hdr_init() - Bring a subset of a domain online.
- * @onlined_cpus:	The set of CPUs that are online from the domain's
- *			perspective.
- * @comp:		The mpam component being brought online.
- * @hdr:		The header representing the domain.
- *
- * Adds @onlined_cpus to @hdr's cpu_mask, and sets the @hdr id.
- * For NUMA nodes, @onlined_cpus will be cpu_possible_mask.
- */
-static void mpam_resctrl_domain_hdr_init(const struct cpumask *onlined_cpus,
-					 struct mpam_component *comp,
+static void mpam_resctrl_domain_hdr_init(int cpu, struct mpam_component *comp,
+					 enum resctrl_res_level rid,
 					 struct rdt_domain_hdr *hdr)
 {
-	int cpu = cpumask_any(onlined_cpus);
-
 	lockdep_assert_cpus_held();
 
 	INIT_LIST_HEAD(&hdr->list);
 	hdr->id = mpam_resctrl_pick_domain_id(cpu, comp);
-	cpumask_and(&hdr->cpu_mask, &hdr->cpu_mask, onlined_cpus);
+	hdr->rid = rid;
+	cpumask_set_cpu(cpu, &hdr->cpu_mask);
+}
+
+static void mpam_resctrl_online_domain_hdr(unsigned int cpu,
+					   struct rdt_domain_hdr *hdr)
+{
+	lockdep_assert_cpus_held();
+
+	cpumask_set_cpu(cpu, &hdr->cpu_mask);
 }
 
 /**
- * mpam_resctrl_offline_domain_hdr() - Take a subset of a domain offline.
- * @offlined_cpus:	The set of CPUs that are offline from the domain's
- *			perspective.
+ * mpam_resctrl_offline_domain_hdr() - Update the domain header to remove a CPU.
+ * @cpu:	The CPU to remove from the domain.
  * @hdr:	The domain's header.
  *
- * Removes @offlined_cpus from @hdr's cpu_mask. If the list is empty,
+ * Removes @cpu from the header mask. If this was the last CPU in the domain,
  * the domain header is removed from its parent list and true is returned,
  * indicating the parent structure can be freed.
  * If there are other CPUs in the domain, returns false.
- *
- * For NUMA nodes, @offlined_cpus will be cpu_possible_mask.
  */
-static bool mpam_resctrl_offline_domain_hdr(const struct cpumask *offlined_cpus,
+static bool mpam_resctrl_offline_domain_hdr(unsigned int cpu,
 					    struct rdt_domain_hdr *hdr)
 {
-	cpumask_andnot(&hdr->cpu_mask, &hdr->cpu_mask, offlined_cpus);
+	cpumask_clear_cpu(cpu, &hdr->cpu_mask);
 	if (cpumask_empty(&hdr->cpu_mask)) {
 		list_del(&hdr->list);
 		return true;
@@ -1929,18 +1878,14 @@ static bool mpam_resctrl_offline_domain_hdr(const struct cpumask *offlined_cpus,
 	return false;
 }
 
-static struct mpam_component *find_component(struct mpam_class *victim,
-					     const struct cpumask *onlined_cpus)
+static struct mpam_component *find_component(struct mpam_class *victim, int cpu)
 {
 	struct mpam_component *victim_comp;
 
 	guard(srcu)(&mpam_srcu);
 	list_for_each_entry_srcu(victim_comp, &victim->components, class_list,
 				 srcu_read_lock_held(&mpam_srcu)) {
-		struct cpumask tmp;
-
-		cpumask_andnot(&tmp, onlined_cpus, &victim_comp->affinity);
-		if (cpumask_empty(&tmp))
+		if (cpumask_test_cpu(cpu, &victim_comp->affinity))
 			return victim_comp;
 	}
 
@@ -1963,41 +1908,40 @@ static void mpam_resctrl_domain_insert(struct list_head *list,
 }
 
 static struct mpam_resctrl_dom *
-mpam_resctrl_alloc_domain(const struct cpumask *onlined_cpus, int nid,
-			  struct mpam_component *ctrl_comp,
-			  struct mpam_resctrl_res *res)
+mpam_resctrl_alloc_domain(unsigned int cpu, struct mpam_resctrl_res *res,
+			  struct mpam_component *comp)
 {
 	int err;
 	struct mpam_resctrl_dom *dom;
-	struct rdt_mon_domain *mon_d;
+	struct rdt_l3_mon_domain *mon_d;
 	struct rdt_ctrl_domain *ctrl_d;
 	struct rdt_resource *r = &res->resctrl_res;
 
 	lockdep_assert_held(&domain_list_lock);
 
-	dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, nid);
+	dom = kzalloc_node(sizeof(*dom), GFP_KERNEL, cpu_to_node(cpu));
 	if (!dom)
 		return ERR_PTR(-ENOMEM);
 
-	if (exposed_alloc_capable) {
-		dom->ctrl_comp = ctrl_comp;
+	if (resctrl_arch_alloc_capable()) {
+		dom->ctrl_comp = comp;
 
 		ctrl_d = &dom->resctrl_ctrl_dom;
-		mpam_resctrl_domain_hdr_init(onlined_cpus, ctrl_comp, &ctrl_d->hdr);
+		mpam_resctrl_domain_hdr_init(cpu, comp, r->rid, &ctrl_d->hdr);
 		ctrl_d->hdr.type = RESCTRL_CTRL_DOMAIN;
-		mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
 		err = resctrl_online_ctrl_domain(r, ctrl_d);
-		if (err) {
-			dom = ERR_PTR(err);
-			goto offline_ctrl_domain;
-		}
+		if (err)
+			goto free_domain;
+
+		mpam_resctrl_domain_insert(&r->ctrl_domains, &ctrl_d->hdr);
 	} else {
 		pr_debug("Skipped control domain online - no controls\n");
 	}
 
-	if (exposed_mon_capable) {
+	if (resctrl_arch_mon_capable()) {
 		int i;
-		struct mpam_component *mon_comp, *any_mon_comp;
+		struct mpam_component *any_mon_comp;
+		struct mpam_resctrl_mon *mon;
 
 		/*
 		 * Even if the monitor domain is backed by a different component,
@@ -2006,43 +1950,47 @@ mpam_resctrl_alloc_domain(const struct cpumask *onlined_cpus, int nid,
 		 * Search each event's class list for a component with overlapping
 		 * CPUs and set up the dom->mon_comp array.
 		 */
-		for (i = 0; i < QOS_NUM_EVENTS; i++) {
-			struct mpam_resctrl_mon *mon;
 
-			mon = &mpam_resctrl_counters[i];
+		for_each_mpam_resctrl_mon(mon, i) {
+			struct mpam_component *mon_comp;
+
 			if (!mon->class)
 				continue;       // dummy resource
 
-			mon_comp = find_component(mon->class, onlined_cpus);
+			mon_comp = comp ? comp: find_component(mon->class, cpu);
 			dom->mon_comp[i] = mon_comp;
 			if (mon_comp)
 				any_mon_comp = mon_comp;
 		}
-		WARN_ON_ONCE(!any_mon_comp);
-
-		dom->mbm_local_evt_cfg = MPAM_RESTRL_EVT_CONFIG_VALID;
+		if (!any_mon_comp) {
+			WARN_ON_ONCE(0);
+			err = -EFAULT;
+			goto offline_ctrl_domain;
+		}
 
 		mon_d = &dom->resctrl_mon_dom;
-		mpam_resctrl_domain_hdr_init(onlined_cpus, any_mon_comp,
-					     &mon_d->hdr);
+		mpam_resctrl_domain_hdr_init(cpu, any_mon_comp, r->rid, &mon_d->hdr);
 		mon_d->hdr.type = RESCTRL_MON_DOMAIN;
+		err = resctrl_online_mon_domain(r, &mon_d->hdr);
+		if (err)
+			goto offline_ctrl_domain;
+
 		mpam_resctrl_domain_insert(&r->mon_domains, &mon_d->hdr);
-		err = resctrl_online_mon_domain(r, mon_d);
-		if (err) {
-			dom = ERR_PTR(err);
-			goto offline_mon_hdr;
-		}
 	} else {
 		pr_debug("Skipped monitor domain online - no monitors\n");
 	}
-	goto out;
 
-offline_mon_hdr:
-	mpam_resctrl_offline_domain_hdr(onlined_cpus, &ctrl_d->hdr);
+	return dom;
 
 offline_ctrl_domain:
-	resctrl_offline_ctrl_domain(r, ctrl_d);
-out:
+	if (resctrl_arch_alloc_capable()) {
+		mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
+		resctrl_offline_ctrl_domain(r, ctrl_d);
+	}
+free_domain:
+	kfree(dom);
+	dom = ERR_PTR(err);
+
 	return dom;
 }
 
@@ -2053,10 +2001,10 @@ mpam_resctrl_alloc_domain(const struct cpumask *onlined_cpus, int nid,
  * This relies on mpam_resctrl_pick_domain_id() using the L3 cache-id
  * for anything that is not a cache.
  */
-static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
+static struct mpam_resctrl_dom *
+mpam_resctrl_get_mon_domain_from_cpu(int cpu, struct mpam_component *comp)
 {
 	u32 cache_id;
-	struct rdt_mon_domain *mon_d;
 	struct mpam_resctrl_dom *dom;
 	struct mpam_resctrl_res *l3 = &mpam_resctrl_controls[RDT_RESOURCE_L3];
 
@@ -2067,10 +2015,10 @@ static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
 	if (cache_id == ~0)
 		return NULL;
 
-	list_for_each_entry(mon_d, &l3->resctrl_res.mon_domains, hdr.list) {
-		dom = container_of(mon_d, struct mpam_resctrl_dom, resctrl_mon_dom);
-
-		if (mon_d->hdr.id == cache_id)
+	list_for_each_entry(dom, &l3->resctrl_res.mon_domains, resctrl_mon_dom.hdr.list) {
+		if (comp && (dom->ctrl_comp != comp))
+			continue;
+		if (dom->resctrl_mon_dom.hdr.id == cache_id)
 			return dom;
 	}
 
@@ -2091,72 +2039,17 @@ static struct mpam_resctrl_dom *mpam_resctrl_get_mon_domain_from_cpu(int cpu)
  * For the monitors, we need to search the list of events...
  */
 static struct mpam_resctrl_dom *
-mpam_resctrl_alloc_domain_cpu(int cpu, struct mpam_resctrl_res *res)
-{
-	struct mpam_component *comp_iter, *ctrl_comp;
-	struct mpam_class *class = res->class;
-	int idx;
-
-	ctrl_comp = NULL;
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_srcu(comp_iter, &class->components, class_list,
-				 srcu_read_lock_held(&mpam_srcu)) {
-		if (cpumask_test_cpu(cpu, &comp_iter->affinity)) {
-			ctrl_comp = comp_iter;
-			break;
-		}
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	/* cpu with unknown exported component? */
-	if (WARN_ON_ONCE(!ctrl_comp))
-		return ERR_PTR(-EINVAL);
-
-	return mpam_resctrl_alloc_domain(cpumask_of(cpu), cpu_to_node(cpu),
-					 ctrl_comp, res);
-}
-
-static struct mpam_resctrl_dom *
-mpam_resctrl_alloc_domain_nid(int nid, struct mpam_resctrl_res *res)
-{
-	struct mpam_component *comp_iter, *ctrl_comp;
-	struct mpam_class *class = res->class;
-	int idx;
-
-	/* Only the memory class uses comp_id as nid */
-	if (class->type != MPAM_CLASS_MEMORY)
-		return ERR_PTR(-EINVAL);
-
-	ctrl_comp = NULL;
-	idx = srcu_read_lock(&mpam_srcu);
-	list_for_each_entry_srcu(comp_iter, &class->components, class_list,
-				 srcu_read_lock_held(&mpam_srcu)) {
-		if (comp_iter->comp_id == nid) {
-			ctrl_comp = comp_iter;
-			break;
-		}
-	}
-	srcu_read_unlock(&mpam_srcu, idx);
-
-	/* cpu with unknown exported component? */
-	if (WARN_ON_ONCE(!ctrl_comp))
-		return ERR_PTR(-EINVAL);
-
-	return mpam_resctrl_alloc_domain(cpu_possible_mask, nid, ctrl_comp, res);
-}
-
-static struct mpam_resctrl_dom *
-mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
+mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res,
+				 struct mpam_component *comp)
 {
 	struct mpam_resctrl_dom *dom;
-	struct rdt_ctrl_domain *ctrl_d;
 	struct rdt_resource *r = &res->resctrl_res;
 
 	lockdep_assert_cpus_held();
 
-	list_for_each_entry(ctrl_d, &r->ctrl_domains, hdr.list) {
-		dom = container_of(ctrl_d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
-
+	list_for_each_entry_rcu(dom, &r->ctrl_domains, resctrl_ctrl_dom.hdr.list) {
+		if (comp && (dom->ctrl_comp != comp))
+			continue;
 		if (cpumask_test_cpu(cpu, &dom->ctrl_comp->affinity))
 			return dom;
 	}
@@ -2165,189 +2058,103 @@ mpam_resctrl_get_domain_from_cpu(int cpu, struct mpam_resctrl_res *res)
 		return NULL;
 
 	/* Search the mon domain list too - needed on monitor only platforms. */
-	return mpam_resctrl_get_mon_domain_from_cpu(cpu);
-}
-
-static struct mpam_resctrl_dom *
-mpam_get_domain_from_nid(int nid, struct mpam_resctrl_res *res)
-{
-	struct rdt_ctrl_domain *d;
-	struct mpam_resctrl_dom *dom;
-
-	list_for_each_entry(d, &res->resctrl_res.ctrl_domains, hdr.list) {
-		dom = container_of(d, struct mpam_resctrl_dom, resctrl_ctrl_dom);
-
-		/* Only the memory class uses comp_id as nid */
-		if (dom->ctrl_comp->class->type != MPAM_CLASS_MEMORY)
-			continue;
-
-		if (dom->ctrl_comp->comp_id == nid)
-			return dom;
-	}
-
-	return NULL;
+	return mpam_resctrl_get_mon_domain_from_cpu(cpu, comp);
 }
 
 int mpam_resctrl_online_cpu(unsigned int cpu)
 {
-	int i, err = 0;
-	struct mpam_resctrl_dom *dom;
-	struct mpam_resctrl_res *res;
-
-	mutex_lock(&domain_list_lock);
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_controls[i];
-		if (!res->class)
-			continue;	// dummy_resource;
-
-		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
-		if (!dom)
-			dom = mpam_resctrl_alloc_domain_cpu(cpu, res);
-		if (IS_ERR(dom)) {
-			err = PTR_ERR(dom);
-			break;
-		}
-
-		cpumask_set_cpu(cpu, &dom->resctrl_ctrl_dom.hdr.cpu_mask);
-		cpumask_set_cpu(cpu, &dom->resctrl_mon_dom.hdr.cpu_mask);
-	}
-	mutex_unlock(&domain_list_lock);
-
-	if (!err)
-		resctrl_online_cpu(cpu);
-
-	return err;
-}
-
-int mpam_resctrl_offline_cpu(unsigned int cpu)
-{
-	int i;
-	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-	struct rdt_mon_domain *mon_d;
+	struct rdt_l3_mon_domain *mon_d;
 	struct rdt_ctrl_domain *ctrl_d;
-	bool ctrl_dom_empty, mon_dom_empty;
+	struct mpam_resctrl_res *res;
+	enum resctrl_res_level rid;
+	struct mpam_component *comp;
 
-	resctrl_offline_cpu(cpu);
+	guard(mutex)(&domain_list_lock);
+	for_each_mpam_resctrl_control(res, rid) {
+		struct mpam_resctrl_dom *dom;
 
-	mutex_lock(&domain_list_lock);
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_controls[i];
 		if (!res->class)
-			continue;	// dummy resource
-
-		dom = mpam_resctrl_get_domain_from_cpu(cpu, res);
-		if (WARN_ON_ONCE(!dom))
-			continue;
+			continue;	// dummy_resource;
 
-		ctrl_dom_empty = true;
-		if (exposed_alloc_capable) {
-			mpam_reset_component_locked(dom->ctrl_comp);
+		guard(srcu)(&mpam_srcu);
+		list_for_each_entry_srcu(comp, &res->class->components, class_list,
+					 srcu_read_lock_held(&mpam_srcu)) {
+			if (!cpumask_test_cpu(cpu, &comp->affinity))
+				continue;
 
-			ctrl_d = &dom->resctrl_ctrl_dom;
-			ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpumask_of(cpu),
-									 &ctrl_d->hdr);
-			if (ctrl_dom_empty)
-				resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
-		}
+			dom = mpam_resctrl_get_domain_from_cpu(cpu, res, comp);
+			if (!dom) {
+				dom = mpam_resctrl_alloc_domain(cpu, res, comp);
+			} else {
+				if (resctrl_arch_alloc_capable()) {
+					ctrl_d = &dom->resctrl_ctrl_dom;
+					mpam_resctrl_online_domain_hdr(cpu, &ctrl_d->hdr);
+				}
+				if (resctrl_arch_mon_capable()) {
+					mon_d = &dom->resctrl_mon_dom;
+					mpam_resctrl_online_domain_hdr(cpu, &mon_d->hdr);
+				}
+			}
 
-		mon_dom_empty = true;
-		if (exposed_mon_capable) {
-			mon_d = &dom->resctrl_mon_dom;
-			mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpumask_of(cpu),
-									&mon_d->hdr);
-			if (mon_dom_empty)
-				resctrl_offline_mon_domain(&res->resctrl_res, mon_d);
+			if (IS_ERR(dom)) {
+				return	PTR_ERR(dom);
+			}
 		}
-
-		if (ctrl_dom_empty && mon_dom_empty)
-			kfree(dom);
 	}
-	mutex_unlock(&domain_list_lock);
-
-	return 0;
-}
-
-static int mpam_resctrl_online_node(unsigned int nid)
-{
-	struct mpam_resctrl_dom *dom;
-	struct mpam_resctrl_res *res;
 
-	/* Domain IDs as NUMA nid is only defined for MBA */
-	res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
-	if (!res->class)
-		return 0;	// dummy_resource;
-
-	dom = mpam_get_domain_from_nid(nid, res);
-	if (!dom)
-		dom = mpam_resctrl_alloc_domain_nid(nid, res);
-	if (IS_ERR(dom))
-		return PTR_ERR(dom);
+	resctrl_online_cpu(cpu);
 
 	return 0;
 }
 
-static int mpam_resctrl_offline_node(unsigned int nid)
+void mpam_resctrl_offline_cpu(unsigned int cpu)
 {
+	struct mpam_component *comp;
 	struct mpam_resctrl_res *res;
-	struct mpam_resctrl_dom *dom;
-	struct rdt_mon_domain *mon_d;
-	struct rdt_ctrl_domain *ctrl_d;
-
-	/* Domain IDs as NUMA nid is only defined for MBA */
-	res = &mpam_resctrl_controls[RDT_RESOURCE_MBA];
-	if (!res->class)
-		return 0;	// dummy_resource;
-
-	dom = mpam_get_domain_from_nid(nid, res);
-	if (WARN_ON_ONCE(!dom))
-		return 0;
+	enum resctrl_res_level rid;
 
-	ctrl_d = &dom->resctrl_ctrl_dom;
-	resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
-	if (!mpam_resctrl_offline_domain_hdr(cpu_possible_mask, &ctrl_d->hdr))
-		return 0;
+	resctrl_offline_cpu(cpu);
 
-	// TODO: skip monitor domains if there are no monitors for this resource
-	mon_d = &dom->resctrl_mon_dom;
-	resctrl_offline_mon_domain(&res->resctrl_res, mon_d);
-	if (!mpam_resctrl_offline_domain_hdr(cpu_possible_mask, &mon_d->hdr))
-		return 0;
+	guard(mutex)(&domain_list_lock);
+	for_each_mpam_resctrl_control(res, rid) {
+		struct mpam_resctrl_dom *dom;
+		struct rdt_l3_mon_domain *mon_d;
+		struct rdt_ctrl_domain *ctrl_d;
+		bool ctrl_dom_empty, mon_dom_empty;
 
-	kfree(dom);
+		if (!res->class)
+			continue;	// dummy resource
 
-	return 0;
-}
+		guard(srcu)(&mpam_srcu);
+		list_for_each_entry_srcu(comp, &res->class->components, class_list,
+					 srcu_read_lock_held(&mpam_srcu)) {
+			if (!cpumask_test_cpu(cpu, &comp->affinity))
+				continue;
 
-static int mpam_resctrl_node_notifier(struct notifier_block *self,
-				      unsigned long action, void *arg)
-{
-	struct node_notify *nn = arg;
+			dom = mpam_resctrl_get_domain_from_cpu(cpu, res, comp);
+			if (WARN_ON_ONCE(!dom))
+				continue;
 
-	if (nn->nid < 0 || !mb_uses_numa_nid)
-		return NOTIFY_OK;
+			ctrl_dom_empty = true;
+			if (resctrl_arch_alloc_capable()) {
+				ctrl_d = &dom->resctrl_ctrl_dom;
+				ctrl_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &ctrl_d->hdr);
+				if (ctrl_dom_empty)
+					resctrl_offline_ctrl_domain(&res->resctrl_res, ctrl_d);
+			}
 
-	/*
-	 * Ignore nid that have CPUs. Resctrl needs to see the cpu offline
-	 * call for each CPU to update the CPUs in control groups. Moving
-	 * the overflow handler isn't an issue as only L3 can be mon_capable,
-	 * and NUMA nid used as domain-id are only an option for MBA.
-	 */
-	if (!cpumask_empty(cpumask_of_node(nn->nid)))
-		return NOTIFY_OK;
+			mon_dom_empty = true;
+			if (resctrl_arch_mon_capable()) {
+				mon_d = &dom->resctrl_mon_dom;
+				mon_dom_empty = mpam_resctrl_offline_domain_hdr(cpu, &mon_d->hdr);
+				if (mon_dom_empty)
+					resctrl_offline_mon_domain(&res->resctrl_res, &mon_d->hdr);
+			}
 
-	switch (action) {
-	case NODE_ADDED_FIRST_MEMORY:
-		mpam_resctrl_online_node(nn->nid);
-		break;
-	case NODE_REMOVED_LAST_MEMORY:
-		mpam_resctrl_offline_node(nn->nid);
-		break;
-	default:
-		/* don't care */
+			if (ctrl_dom_empty && mon_dom_empty)
+				kfree(dom);
+		}
 	}
-
-	return NOTIFY_OK;
 }
 
 int mpam_resctrl_setup(void)
@@ -2361,10 +2168,9 @@ int mpam_resctrl_setup(void)
 	wait_event(wait_cacheinfo_ready, cacheinfo_ready);
 
 	cpus_read_lock();
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_controls[i];
-		INIT_LIST_HEAD(&res->resctrl_res.ctrl_domains);
-		INIT_LIST_HEAD(&res->resctrl_res.mon_domains);
+	for_each_mpam_resctrl_control(res, i) {
+		INIT_LIST_HEAD_RCU(&res->resctrl_res.ctrl_domains);
+		INIT_LIST_HEAD_RCU(&res->resctrl_res.mon_domains);
 		res->resctrl_res.rid = i;
 	}
 
@@ -2373,60 +2179,49 @@ int mpam_resctrl_setup(void)
 	mpam_resctrl_pick_mba();
 
 	/* Initialise the resctrl structures from the classes */
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_controls[i];
+	for_each_mpam_resctrl_control(res, i) {
 		if (!res->class)
 			continue;	// dummy resource
 
 		err = mpam_resctrl_control_init(res, i);
 		if (err) {
 			pr_debug("Failed to initialise rid %u\n", i);
-			break;
+			goto internal_error;
 		}
 	}
 
 	/* Find some classes to use for monitors */
 	mpam_resctrl_pick_counters();
 
-	for (j = 0; j < QOS_NUM_EVENTS; j++) {
-		mon = &mpam_resctrl_counters[j];
+	for_each_mpam_resctrl_mon(mon, j) {
 		if (!mon->class)
 			continue;	// dummy resource
 
-		mpam_resctrl_monitor_init(mon, j);
-	}
-
-	if (mb_numa_nid_possible) {
-		hotplug_node_notifier(mpam_resctrl_node_notifier,
-				      RESCTRL_CALLBACK_PRI);
+		err = mpam_resctrl_monitor_init(mon, j);
+		if (err) {
+			pr_debug("Failed to initialise event %u\n", j);
+			goto internal_error;
+		}
 	}
 
 	cpus_read_unlock();
 
-	if (err || (!exposed_alloc_capable && !exposed_mon_capable)) {
-		if (err)
-			pr_debug("Internal error %d - resctrl not supported\n", err);
-		else
-			pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
-				 exposed_alloc_capable, exposed_mon_capable);
-		err = -EOPNOTSUPP;
+	if (!resctrl_arch_alloc_capable() && !resctrl_arch_mon_capable()) {
+		pr_debug("No alloc(%u) or monitor(%u) found - resctrl not supported\n",
+			 resctrl_arch_alloc_capable(), resctrl_arch_mon_capable());
+		return -EOPNOTSUPP;
 	}
 
-	if (!err) {
-		if (!is_power_of_2(mpam_pmg_max + 1)) {
-			/*
-			 * If not all the partid*pmg values are valid indexes,
-			 * resctrl may allocate pmg that don't exist. This
-			 * should cause an error interrupt.
-			 */
-			pr_warn("Number of PMG is not a power of 2! resctrl may misbehave");
-		}
+	err = resctrl_init();
+	if (err)
+		return err;
+	WRITE_ONCE(resctrl_enabled, true);
 
-		err = resctrl_init();
-		if (!err)
-			WRITE_ONCE(resctrl_enabled, true);
-	}
+	return 0;
 
+internal_error:
+	cpus_read_unlock();
+	pr_debug("Internal error %d - resctrl not supported\n", err);
 	return err;
 }
 
@@ -2468,16 +2263,14 @@ void mpam_resctrl_teardown_class(struct mpam_class *class)
 
 	might_sleep();
 
-	for (i = 0; i < RDT_NUM_RESOURCES; i++) {
-		res = &mpam_resctrl_controls[i];
+	for_each_mpam_resctrl_control(res, i) {
 		if (res->class == class) {
 			mpam_resctrl_exit();
 			res->class = NULL;
 			break;
 		}
 	}
-	for (i = 0; i < QOS_NUM_EVENTS; i++) {
-		mon = &mpam_resctrl_counters[i];
+	for_each_mpam_resctrl_mon(mon, i) {
 		if (mon->class == class) {
 			mpam_resctrl_exit();
 			mon->class = NULL;
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index c3688cbe0ff5c..e04b8a5f76c3d 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -17,6 +17,7 @@
 
 #include <linux/cpu.h>
 #include <linux/kernfs.h>
+#include <linux/math.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/tick.h>
@@ -545,8 +546,8 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
 }
 
 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
-		    cpumask_t *cpumask, int evtid, int first)
+		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
+		    cpumask_t *cpumask, struct mon_evt *evt, int first)
 {
 	int cpu;
 
@@ -557,21 +558,26 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 	 * Setup the parameters to pass to mon_event_count() to read the data.
 	 */
 	rr->rgrp = rdtgrp;
-	rr->evtid = evtid;
+	rr->evt = evt;
 	rr->r = r;
-	rr->d = d;
+	rr->hdr = hdr;
 	rr->first = first;
 	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
-	    resctrl_is_mbm_event(evtid)) {
+	    resctrl_is_mbm_event(evt->evtid)) {
 		rr->is_mbm_cntr = true;
 	} else {
-		rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evtid);
+		rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, evt->evtid);
 		if (IS_ERR(rr->arch_mon_ctx)) {
 			rr->err = -EINVAL;
 			return;
 		}
 	}
 
+	if (evt->any_cpu) {
+		mon_event_count(rr);
+		goto out_ctx_free;
+	}
+
 	cpu = cpumask_any_housekeeping(cpumask, RESCTRL_PICK_ANY_CPU);
 
 	/*
@@ -585,22 +591,93 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 	else
 		smp_call_on_cpu(cpu, smp_mon_event_count, rr, false);
 
+out_ctx_free:
 	if (rr->arch_mon_ctx)
-		resctrl_arch_mon_ctx_free(r, evtid, rr->arch_mon_ctx);
+		resctrl_arch_mon_ctx_free(r, evt->evtid, rr->arch_mon_ctx);
+}
+
+/*
+ * Decimal place precision to use for each number of fixed-point
+ * binary bits computed from ceil(binary_bits * log10(2)) except
+ * binary_bits == 0 which will print "value.0"
+ */
+static const unsigned int decplaces[MAX_BINARY_BITS + 1] = {
+	[0]  =  1,
+	[1]  =  1,
+	[2]  =  1,
+	[3]  =  1,
+	[4]  =  2,
+	[5]  =  2,
+	[6]  =  2,
+	[7]  =  3,
+	[8]  =  3,
+	[9]  =  3,
+	[10] =  4,
+	[11] =  4,
+	[12] =  4,
+	[13] =  4,
+	[14] =  5,
+	[15] =  5,
+	[16] =  5,
+	[17] =  6,
+	[18] =  6,
+	[19] =  6,
+	[20] =  7,
+	[21] =  7,
+	[22] =  7,
+	[23] =  7,
+	[24] =  8,
+	[25] =  8,
+	[26] =  8,
+	[27] =  9
+};
+
+static void print_event_value(struct seq_file *m, unsigned int binary_bits, u64 val)
+{
+	unsigned long long frac = 0;
+
+	if (binary_bits) {
+		/* Mask off the integer part of the fixed-point value. */
+		frac = val & GENMASK_ULL(binary_bits - 1, 0);
+
+		/*
+		 * Multiply by 10^{desired decimal places}. The integer part of
+		 * the fixed point value is now almost what is needed.
+		 */
+		frac *= int_pow(10ull, decplaces[binary_bits]);
+
+		/*
+		 * Round to nearest by adding a value that would be a "1" in the
+		 * binary_bits + 1 place.  Integer part of fixed point value is
+		 * now the needed value.
+		 */
+		frac += 1ull << (binary_bits - 1);
+
+		/*
+		 * Extract the integer part of the value. This is the decimal
+		 * representation of the original fixed-point fractional value.
+		 */
+		frac >>= binary_bits;
+	}
+
+	/*
+	 * "frac" is now in the range [0 .. 10^decplaces).  I.e. string
+	 * representation will fit into chosen number of decimal places.
+	 */
+	seq_printf(m, "%llu.%0*llu\n", val >> binary_bits, decplaces[binary_bits], frac);
 }
 
 int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 {
 	struct kernfs_open_file *of = m->private;
 	enum resctrl_res_level resid;
-	enum resctrl_event_id evtid;
 	struct rdt_domain_hdr *hdr;
 	struct rmid_read rr = {0};
-	struct rdt_mon_domain *d;
 	struct rdtgroup *rdtgrp;
 	int domid, cpu, ret = 0;
 	struct rdt_resource *r;
 	struct cacheinfo *ci;
+	struct mon_evt *evt;
 	struct mon_data *md;
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
@@ -617,10 +694,17 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 
 	resid = md->rid;
 	domid = md->domid;
-	evtid = md->evtid;
+	evt = md->evt;
 	r = resctrl_arch_get_resource(resid);
 
 	if (md->sum) {
+		struct rdt_l3_mon_domain *d;
+
+		if (WARN_ON_ONCE(resid != RDT_RESOURCE_L3)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
 		/*
 		 * This file requires summing across all domains that share
 		 * the L3 cache id that was provided in the "domid" field of the
@@ -635,7 +719,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 					continue;
 				rr.ci = ci;
 				mon_event_read(&rr, r, NULL, rdtgrp,
-					       &ci->shared_cpu_map, evtid, false);
+					       &ci->shared_cpu_map, evt, false);
 				goto checkresult;
 			}
 		}
@@ -647,12 +731,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 		 * the resource to find the domain with "domid".
 		 */
 		hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
-		if (!hdr || WARN_ON_ONCE(hdr->type != RESCTRL_MON_DOMAIN)) {
+		if (!hdr) {
 			ret = -ENOENT;
 			goto out;
 		}
-		d = container_of(hdr, struct rdt_mon_domain, hdr);
-		mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
+		mon_event_read(&rr, r, hdr, rdtgrp, &hdr->cpu_mask, evt, false);
 	}
 
 checkresult:
@@ -667,6 +750,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 		seq_puts(m, "Unavailable\n");
 	else if (rr.err == -ENOENT)
 		seq_puts(m, "Unassigned\n");
+	else if (evt->is_floating_point)
+		print_event_value(m, evt->binary_bits, rr.val);
 	else
 		seq_printf(m, "%llu\n", rr.val);
 
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index f5f74342af317..08cdc8546a8fd 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -42,7 +42,6 @@ struct rdt_fs_context {
 	bool				enable_cdpl3;
 	bool				enable_mba_mbps;
 	bool				enable_debug;
-	bool				mb_uses_numa_nid;
 	bool				enable_abi_playground;
 };
 
@@ -63,7 +62,14 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
  *			READS_TO_REMOTE_MEM) being tracked by @evtid.
  *			Only valid if @evtid is an MBM event.
  * @configurable:	true if the event is configurable
+ * @any_cpu:		true if the event can be read from any CPU
+ * @is_floating_point:	event values are displayed in floating point format
+ * @binary_bits:	number of fixed-point binary bits from architecture,
+ *			only valid if @is_floating_point is true
  * @enabled:		true if the event is enabled
+ * @arch_priv:		Architecture private data for this event.
+ *			The @arch_priv provided by the architecture via
+ *			resctrl_enable_mon_event().
  */
 struct mon_evt {
 	enum resctrl_event_id	evtid;
@@ -71,7 +77,11 @@ struct mon_evt {
 	char			*name;
 	u32			evt_cfg;
 	bool			configurable;
+	bool			any_cpu;
+	bool			is_floating_point;
+	unsigned int		binary_bits;
 	bool			enabled;
+	void			*arch_priv;
 };
 
 extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
@@ -79,13 +89,16 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
 #define for_each_mon_event(mevt) for (mevt = &mon_event_all[QOS_FIRST_EVENT];	\
 				      mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++)
 
+/* Limit for mon_evt::binary_bits */
+#define MAX_BINARY_BITS	27
+
 /**
  * struct mon_data - Monitoring details for each event file.
  * @list:            Member of the global @mon_data_kn_priv_list list.
  * @rid:             Resource id associated with the event file.
- * @evtid:           Event id associated with the event file.
- * @sum:             Set when event must be summed across multiple
- *                   domains.
+ * @evt:             Event structure associated with the event file.
+ * @sum:             Set for RDT_RESOURCE_L3 when event must be summed
+ *                   across multiple domains.
  * @domid:           When @sum is zero this is the domain to which
  *                   the event file belongs. When @sum is one this
  *                   is the id of the L3 cache that all domains to be
@@ -97,7 +110,7 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
 struct mon_data {
 	struct list_head	list;
 	enum resctrl_res_level	rid;
-	enum resctrl_event_id	evtid;
+	struct mon_evt		*evt;
 	int			domid;
 	bool			sum;
 };
@@ -108,25 +121,27 @@ struct mon_data {
  *	   resource group then its event count is summed with the count from all
  *	   its child resource groups.
  * @r:	   Resource describing the properties of the event being read.
- * @d:	   Domain that the counter should be read from. If NULL then sum all
- *	   domains in @r sharing L3 @ci.id
- * @evtid: Which monitor event to read.
+ * @hdr:   Header of domain that the counter should be read from. If NULL then
+ *	   sum all domains in @r sharing L3 @ci.id
+ * @evt:   Which monitor event to read.
  * @first: Initialize MBM counter when true.
- * @ci:    Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @ci:    Cacheinfo for L3. Only set when @hdr is NULL. Used when summing
+ *	   domains.
  * @is_mbm_cntr: true if "mbm_event" counter assignment mode is enabled and it
  *	   is an MBM event.
  * @err:   Error encountered when reading counter.
- * @val:   Returned value of event counter. If @rgrp is a parent resource group,
- *	   @val includes the sum of event counts from its child resource groups.
- *	   If @d is NULL, @val includes the sum of all domains in @r sharing @ci.id,
- *	   (summed across child resource groups if @rgrp is a parent resource group).
+ * @val:   Returned value of event counter. If @rgrp is a parent resource
+ *	   group, @val includes the sum of event counts from its child
+ *	   resource groups.  If @hdr is NULL, @val includes the sum of all
+ *	   domains in @r sharing @ci.id, (summed across child resource groups
+ *	   if @rgrp is a parent resource group).
  * @arch_mon_ctx: Hardware monitor allocated for this read request (MPAM only).
  */
 struct rmid_read {
 	struct rdtgroup		*rgrp;
 	struct rdt_resource	*r;
-	struct rdt_mon_domain	*d;
-	enum resctrl_event_id	evtid;
+	struct rdt_domain_hdr	*hdr;
+	struct mon_evt		*evt;
 	bool			first;
 	struct cacheinfo	*ci;
 	bool			is_mbm_cntr;
@@ -250,6 +265,8 @@ struct rdtgroup {
 #define RFTYPE_SCHEMA_PERCENT		BIT(12)
 #define RFTYPE_SCHEMA_MBPS		BIT(13)
 
+#define RFTYPE_RES_PERF_PKG		BIT(12)
+
 #define RFTYPE_CTRL_INFO		(RFTYPE_INFO | RFTYPE_CTRL)
 
 #define RFTYPE_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
@@ -360,23 +377,27 @@ int closids_supported(void);
 
 void closid_free(int closid);
 
+int setup_rmid_lru_list(void);
+
+void free_rmid_lru_list(void);
+
 int alloc_rmid(u32 closid);
 
 void free_rmid(u32 closid, u32 rmid);
 
-void resctrl_mon_resource_exit(void);
+int resctrl_mon_init(void);
+
+void resctrl_mon_exit(void);
 
 void mon_event_count(void *info);
 
 int rdtgroup_mondata_show(struct seq_file *m, void *arg);
 
 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
-		    struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
-		    cpumask_t *cpumask, int evtid, int first);
-
-int resctrl_mon_resource_init(void);
+		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
+		    cpumask_t *cpumask, struct mon_evt *evt, int first);
 
-void mbm_setup_overflow_handler(struct rdt_mon_domain *dom,
+void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom,
 				unsigned long delay_ms,
 				int exclude_cpu);
 
@@ -384,14 +405,14 @@ void mbm_handle_overflow(struct work_struct *work);
 
 bool is_mba_sc(struct rdt_resource *r);
 
-void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
 			     int exclude_cpu);
 
 void cqm_handle_limbo(struct work_struct *work);
 
-bool has_busy_rmid(struct rdt_mon_domain *d);
+bool has_busy_rmid(struct rdt_l3_mon_domain *d);
 
-void __check_limbo(struct rdt_mon_domain *d, bool force_free);
+void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free);
 
 void resctrl_file_fflags_init(const char *config, unsigned long fflags);
 
@@ -436,6 +457,11 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi
 ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
 				 loff_t off);
 
+int mbm_MB_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v);
+
+ssize_t mbm_MB_assignments_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
+				 loff_t off);
+
 #ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK
 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
 
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index e62432467817f..47a6651aecfb9 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -18,7 +18,6 @@
 #define pr_fmt(fmt)	"resctrl: " fmt
 
 #include <linux/cpu.h>
-#include <linux/memory_hotplug.h>
 #include <linux/resctrl.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
@@ -150,16 +149,18 @@ static void limbo_release_entry(struct rmid_entry *entry)
  * decrement the count. If the busy count gets to zero on an RMID, we
  * free the RMID
  */
-void __check_limbo(struct rdt_mon_domain *d, bool force_free)
+void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	struct rmid_entry *entry;
 	u32 idx, cur_idx = 1;
 	void *arch_mon_ctx;
+	void *arch_priv;
 	bool rmid_dirty;
 	u64 val = 0;
 
+	arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
 	arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
 	if (IS_ERR(arch_mon_ctx)) {
 		pr_warn_ratelimited("Failed to allocate monitor context: %ld",
@@ -181,8 +182,8 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
 		entry = __rmid_entry(idx);
 		if (!entry)
 			break;
-		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
-					   QOS_L3_OCCUP_EVENT_ID, &val,
+		if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
+					   QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
 					   arch_mon_ctx)) {
 			rmid_dirty = true;
 		} else {
@@ -210,7 +211,7 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
 	resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
 }
 
-bool has_busy_rmid(struct rdt_mon_domain *d)
+bool has_busy_rmid(struct rdt_l3_mon_domain *d)
 {
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 
@@ -311,7 +312,7 @@ int alloc_rmid(u32 closid)
 static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	u32 idx;
 
 	lockdep_assert_held(&rdtgroup_mutex);
@@ -370,7 +371,7 @@ void free_rmid(u32 closid, u32 rmid)
 		list_add_tail(&entry->list, &rmid_free_lru);
 }
 
-static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
+static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
 				       u32 rmid, enum resctrl_event_id evtid)
 {
 	u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
@@ -390,7 +391,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
  * Return:
  * Valid counter ID on success, or -ENOENT on failure.
  */
-static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d,
+static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
 {
 	int cntr_id;
@@ -417,7 +418,7 @@ static int mbm_cntr_get(struct rdt_resource *r, struct rdt_mon_domain *d,
  * Return:
  * Valid counter ID on success, or -ENOSPC on failure.
  */
-static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
+static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			  struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
 {
 	int cntr_id;
@@ -436,24 +437,29 @@ static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_mon_domain *d,
 /*
  * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
  */
-static void mbm_cntr_free(struct rdt_mon_domain *d, int cntr_id)
+static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
 {
 	memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
 }
 
-static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
+static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 {
 	int cpu = smp_processor_id();
 	u32 closid = rdtgrp->closid;
 	u32 rmid = rdtgrp->mon.rmid;
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	int cntr_id = -ENOENT;
 	struct mbm_state *m;
-	int err, ret;
 	u64 tval = 0;
 
+	if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, rr->r->rid)) {
+		rr->err = -EIO;
+		return -EINVAL;
+	}
+	d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
+
 	if (rr->is_mbm_cntr) {
-		cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
+		cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
 		if (cntr_id < 0) {
 			rr->err = -ENOENT;
 			return -EINVAL;
@@ -462,31 +468,51 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 
 	if (rr->first) {
 		if (rr->is_mbm_cntr)
-			resctrl_arch_reset_cntr(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
+			resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
 		else
-			resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
-		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+			resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
+		m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
 		if (m)
 			memset(m, 0, sizeof(struct mbm_state));
 		return 0;
 	}
 
-	if (rr->d) {
-		/* Reading a single domain, must be on a CPU in that domain. */
-		if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
-			return -EINVAL;
-		if (rr->is_mbm_cntr)
-			rr->err = resctrl_arch_cntr_read(rr->r, rr->d, closid, rmid, cntr_id,
-							 rr->evtid, &tval);
-		else
-			rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
-							 rr->evtid, &tval, rr->arch_mon_ctx);
-		if (rr->err)
-			return rr->err;
+	/* Reading a single domain, must be on a CPU in that domain. */
+	if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
+		return -EINVAL;
+	if (rr->is_mbm_cntr)
+		rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
+						 rr->evt->evtid, &tval);
+	else
+		rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
+						 rr->evt->evtid, rr->evt->arch_priv,
+						 &tval, rr->arch_mon_ctx);
+	if (rr->err)
+		return rr->err;
 
-		rr->val += tval;
+	rr->val += tval;
 
-		return 0;
+	return 0;
+}
+
+static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
+{
+	int cpu = smp_processor_id();
+	u32 closid = rdtgrp->closid;
+	u32 rmid = rdtgrp->mon.rmid;
+	struct rdt_l3_mon_domain *d;
+	u64 tval = 0;
+	int err, ret;
+
+	/*
+	 * Summing across domains is only done for systems that implement
+	 * Sub-NUMA Cluster. There is no overlap with systems that support
+	 * assignable counters.
+	 */
+	if (rr->is_mbm_cntr) {
+		pr_warn_once("Summing domains using assignable counters is not supported\n");
+		rr->err = -EINVAL;
+		return -EINVAL;
 	}
 
 	/* Summing domains that share a cache, must be on a CPU for that cache. */
@@ -504,12 +530,9 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
 		if (d->ci_id != rr->ci->id)
 			continue;
-		if (rr->is_mbm_cntr)
-			err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
-						     rr->evtid, &tval);
-		else
-			err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
-						     rr->evtid, &tval, rr->arch_mon_ctx);
+		err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
+					     rr->evt->evtid, rr->evt->arch_priv,
+					     &tval, rr->arch_mon_ctx);
 		if (!err) {
 			rr->val += tval;
 			ret = 0;
@@ -522,6 +545,36 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	return ret;
 }
 
+static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
+{
+	switch (rr->r->rid) {
+	case RDT_RESOURCE_L3:
+	case RDT_RESOURCE_MBA:
+		WARN_ON_ONCE(rr->evt->any_cpu);
+		if (rr->hdr)
+			return __l3_mon_event_count(rdtgrp, rr);
+		else
+			return __l3_mon_event_count_sum(rdtgrp, rr);
+	case RDT_RESOURCE_PERF_PKG: {
+		u64 tval = 0;
+
+		rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
+						 rdtgrp->mon.rmid, rr->evt->evtid,
+						 rr->evt->arch_priv,
+						 &tval, rr->arch_mon_ctx);
+		if (rr->err)
+			return rr->err;
+
+		rr->val += tval;
+
+		return 0;
+	}
+	default:
+		rr->err = -EINVAL;
+		return -EINVAL;
+	}
+}
+
 /*
  * mbm_bw_count() - Update bw count from values previously read by
  *		    __mon_event_count().
@@ -539,9 +592,13 @@ static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	u64 cur_bw, bytes, cur_bytes;
 	u32 closid = rdtgrp->closid;
 	u32 rmid = rdtgrp->mon.rmid;
+	struct rdt_l3_mon_domain *d;
 	struct mbm_state *m;
 
-	m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
+	if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, rr->r->rid))
+		return;
+	d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
+	m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
 	if (WARN_ON_ONCE(!m))
 		return;
 
@@ -640,7 +697,7 @@ static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
  * throttle MSRs already have low percentage values.  To avoid
  * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
  */
-static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
 {
 	u32 closid, rmid, cur_msr_val, new_msr_val;
 	struct mbm_state *pmbm_data, *cmbm_data;
@@ -708,18 +765,18 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_mon_domain *dom_mbm)
 	resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
 }
 
-static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *d,
+static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 				 struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
 {
 	struct rmid_read rr = {0};
 
 	rr.r = r;
-	rr.d = d;
-	rr.evtid = evtid;
+	rr.hdr = &d->hdr;
+	rr.evt = &mon_event_all[evtid];
 	if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
 		rr.is_mbm_cntr = true;
 	} else {
-		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+		rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
 		if (IS_ERR(rr.arch_mon_ctx)) {
 			pr_warn_ratelimited("Failed to allocate monitor context: %ld",
 					    PTR_ERR(rr.arch_mon_ctx));
@@ -737,10 +794,10 @@ static void mbm_update_one_event(struct rdt_resource *r, struct rdt_mon_domain *
 		mbm_bw_count(rdtgrp, &rr);
 
 	if (rr.arch_mon_ctx)
-		resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
+		resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
 }
 
-static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
+static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 		       struct rdtgroup *rdtgrp)
 {
 	/*
@@ -761,13 +818,12 @@ static void mbm_update(struct rdt_resource *r, struct rdt_mon_domain *d,
 void cqm_handle_limbo(struct work_struct *work)
 {
 	unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
-	d = container_of(work, struct rdt_mon_domain, cqm_limbo.work);
+	d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
 
 	__check_limbo(d, false);
 
@@ -779,7 +835,6 @@ void cqm_handle_limbo(struct work_struct *work)
 	}
 
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 }
 
@@ -791,7 +846,7 @@ void cqm_handle_limbo(struct work_struct *work)
  * @exclude_cpu:   Which CPU the handler should not run on,
  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
  */
-void cqm_setup_limbo_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
 			     int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
@@ -808,12 +863,11 @@ void mbm_handle_overflow(struct work_struct *work)
 {
 	unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
 	struct rdtgroup *prgrp, *crgrp;
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	struct list_head *head;
 	struct rdt_resource *r;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	/*
@@ -824,7 +878,7 @@ void mbm_handle_overflow(struct work_struct *work)
 		goto out_unlock;
 
 	r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	d = container_of(work, struct rdt_mon_domain, mbm_over.work);
+	d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
 
 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
 		mbm_update(r, d, prgrp);
@@ -847,7 +901,6 @@ void mbm_handle_overflow(struct work_struct *work)
 
 out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 }
 
@@ -859,7 +912,7 @@ void mbm_handle_overflow(struct work_struct *work)
  * @exclude_cpu:   Which CPU the handler should not run on,
  *		   RESCTRL_PICK_ANY_CPU to pick any CPU.
  */
-void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_ms,
+void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
 				int exclude_cpu)
 {
 	unsigned long delay = msecs_to_jiffies(delay_ms);
@@ -880,42 +933,29 @@ void mbm_setup_overflow_handler(struct rdt_mon_domain *dom, unsigned long delay_
 		schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
 }
 
-static int dom_data_init(struct rdt_resource *r)
+int setup_rmid_lru_list(void)
 {
-	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-	u32 num_closid = resctrl_arch_get_num_closid(r);
 	struct rmid_entry *entry = NULL;
-	int err = 0, i;
+	u32 idx_limit;
 	u32 idx;
+	int i;
 
-	mutex_lock(&rdtgroup_mutex);
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		u32 *tmp;
-
-		/*
-		 * If the architecture hasn't provided a sanitised value here,
-		 * this may result in larger arrays than necessary. Resctrl will
-		 * use a smaller system wide value based on the resources in
-		 * use.
-		 */
-		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
-		if (!tmp) {
-			err = -ENOMEM;
-			goto out_unlock;
-		}
+	if (!resctrl_arch_mon_capable())
+		return 0;
 
-		closid_num_dirty_rmid = tmp;
-	}
+	/*
+	 * Called on every mount, but the number of RMIDs cannot change
+	 * after the first mount, so keep using the same set of rmid_ptrs[]
+	 * until resctrl_exit(). Note that the limbo handler continues to
+	 * access rmid_ptrs[] after resctrl is unmounted.
+	 */
+	if (rmid_ptrs)
+		return 0;
 
+	idx_limit = resctrl_arch_system_num_rmid_idx();
 	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
-	if (!rmid_ptrs) {
-		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-			kfree(closid_num_dirty_rmid);
-			closid_num_dirty_rmid = NULL;
-		}
-		err = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!rmid_ptrs)
+		return -ENOMEM;
 
 	for (i = 0; i < idx_limit; i++) {
 		entry = &rmid_ptrs[i];
@@ -928,7 +968,7 @@ static int dom_data_init(struct rdt_resource *r)
 	/*
 	 * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
 	 * are always allocated. These are used for the rdtgroup_default
-	 * control group, which will be setup later in resctrl_init().
+	 * control group, which was setup earlier in rdtgroup_setup_default().
 	 */
 	idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
 					   RESCTRL_RESERVED_RMID);
@@ -936,64 +976,69 @@ static int dom_data_init(struct rdt_resource *r)
 	WARN_ON_ONCE(!entry);
 	list_del(&entry->list);
 
-out_unlock:
-	mutex_unlock(&rdtgroup_mutex);
-
-	return err;
+	return 0;
 }
 
-static void dom_data_exit(struct rdt_resource *r)
+void free_rmid_lru_list(void)
 {
+	if (!resctrl_arch_mon_capable())
+		return;
+ 
 	mutex_lock(&rdtgroup_mutex);
-
-	if (!r->mon_capable)
-		goto out_unlock;
-
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		kfree(closid_num_dirty_rmid);
-		closid_num_dirty_rmid = NULL;
-	}
-
 	kfree(rmid_ptrs);
 	rmid_ptrs = NULL;
-
-out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }
 
+#define MON_EVENT(_eventid, _name, _res, _fp)	\
+	[_eventid] = {				\
+	.name			= _name,	\
+	.evtid			= _eventid,	\
+	.rid			= _res,		\
+	.is_floating_point	= _fp,		\
+}
+
 /*
  * All available events. Architecture code marks the ones that
  * are supported by a system using resctrl_enable_mon_event()
  * to set .enabled.
  */
 struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
-	[QOS_L3_OCCUP_EVENT_ID] = {
-		.name	= "llc_occupancy",
-		.evtid	= QOS_L3_OCCUP_EVENT_ID,
-		.rid	= RDT_RESOURCE_L3,
-	},
-	[QOS_L3_MBM_TOTAL_EVENT_ID] = {
-		.name	= "mbm_total_bytes",
-		.evtid	= QOS_L3_MBM_TOTAL_EVENT_ID,
-		.rid	= RDT_RESOURCE_L3,
-	},
-	[QOS_L3_MBM_LOCAL_EVENT_ID] = {
-		.name	= "mbm_local_bytes",
-		.evtid	= QOS_L3_MBM_LOCAL_EVENT_ID,
-		.rid	= RDT_RESOURCE_L3,
-	},
+	MON_EVENT(QOS_L3_OCCUP_EVENT_ID,		"llc_occupancy",	RDT_RESOURCE_L3,	false),
+	MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID,		"mbm_total_bytes",	RDT_RESOURCE_MBA,	false),
+	MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID,		"mbm_local_bytes",	RDT_RESOURCE_L3,	false),
+	MON_EVENT(PMT_EVENT_ENERGY,			"core_energy",		RDT_RESOURCE_PERF_PKG,	true),
+	MON_EVENT(PMT_EVENT_ACTIVITY,			"activity",		RDT_RESOURCE_PERF_PKG,	true),
+	MON_EVENT(PMT_EVENT_STALLS_LLC_HIT,		"stalls_llc_hit",	RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_C1_RES,			"c1_res",		RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES,	"unhalted_core_cycles",	RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_STALLS_LLC_MISS,		"stalls_llc_miss",	RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_AUTO_C6_RES,		"c6_res",		RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES,	"unhalted_ref_cycles",	RDT_RESOURCE_PERF_PKG,	false),
+	MON_EVENT(PMT_EVENT_UOPS_RETIRED,		"uops_retired",		RDT_RESOURCE_PERF_PKG,	false),
 };
 
-void resctrl_enable_mon_event(enum resctrl_event_id eventid)
+bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
+			      unsigned int binary_bits, void *arch_priv)
 {
-	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS))
-		return;
+	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
+			 binary_bits > MAX_BINARY_BITS))
+		return false;
 	if (mon_event_all[eventid].enabled) {
 		pr_warn("Duplicate enable for event %d\n", eventid);
-		return;
+		return false;
+	}
+	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
+		pr_warn("Event %d may not be floating point\n", eventid);
+		return false;
 	}
 
+	mon_event_all[eventid].any_cpu = any_cpu;
+	mon_event_all[eventid].binary_bits = binary_bits;
+	mon_event_all[eventid].arch_priv = arch_priv;
 	mon_event_all[eventid].enabled = true;
+
+	return true;
 }
 
 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
@@ -1117,7 +1162,7 @@ ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf
  * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
  *			 domain @d. Called when mbm_assign_mode is changed.
  */
-static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d)
+static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
 {
 	memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
 }
@@ -1126,7 +1171,7 @@ static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_mon_domain *d)
  * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
  *			      supported RMIDs.
  */
-static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d)
+static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
 {
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	enum resctrl_event_id evt;
@@ -1147,7 +1192,7 @@ static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain
  * Assign the counter if @assign is true else unassign the counter. Reset the
  * associated non-architectural state.
  */
-static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 				 enum resctrl_event_id evtid, u32 rmid, u32 closid,
 				 u32 cntr_id, bool assign)
 {
@@ -1167,7 +1212,7 @@ static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *
  * Return:
  * 0 on success, < 0 on failure.
  */
-static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 				      struct rdtgroup *rdtgrp, struct mon_evt *mevt)
 {
 	int cntr_id;
@@ -1202,7 +1247,7 @@ static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_mon_dom
  * Return:
  * 0 on success, < 0 on failure.
  */
-static int rdtgroup_assign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
 				      struct mon_evt *mevt)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
@@ -1252,7 +1297,7 @@ void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
  * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
  * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
  */
-static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
 {
 	int cntr_id;
@@ -1273,7 +1318,7 @@ static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_mon_d
  * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
  * the counters from all the domains if @d is NULL else unassign from @d.
  */
-static void rdtgroup_unassign_cntr_event(struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
+static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
 					 struct mon_evt *mevt)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
@@ -1348,7 +1393,7 @@ static int resctrl_parse_mem_transactions(char *tok, u32 *val)
 static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
 				       enum resctrl_event_id evtid)
 {
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	int cntr_id;
 
 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
@@ -1454,7 +1499,7 @@ ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
 				      size_t nbytes, loff_t off)
 {
 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	int ret = 0;
 	bool enable;
 
@@ -1527,7 +1572,7 @@ int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
 			       struct seq_file *s, void *v)
 {
 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-	struct rdt_mon_domain *dom;
+	struct rdt_l3_mon_domain *dom;
 	bool sep = false;
 
 	cpus_read_lock();
@@ -1551,7 +1596,7 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
 				     struct seq_file *s, void *v)
 {
 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
-	struct rdt_mon_domain *dom;
+	struct rdt_l3_mon_domain *dom;
 	bool sep = false;
 	u32 cntrs, i;
 	int ret = 0;
@@ -1589,10 +1634,10 @@ int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
 	return ret;
 }
 
-int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
+static int mbm_assignments_show(struct kernfs_open_file *of, struct seq_file *s,
+				void *v, struct rdt_resource *r)
 {
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	struct rdtgroup *rdtgrp;
 	struct mon_evt *mevt;
 	int ret = 0;
@@ -1637,6 +1682,18 @@ int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, voi
 	return ret;
 }
 
+int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
+{
+	return mbm_assignments_show(of, s, v,
+				    resctrl_arch_get_resource(RDT_RESOURCE_L3));
+}
+
+int mbm_MB_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
+{
+	return mbm_assignments_show(of, s, v,
+				    resctrl_arch_get_resource(RDT_RESOURCE_MBA));
+}
+
 /*
  * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
  * event name.
@@ -1655,7 +1712,7 @@ static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *n
 	return NULL;
 }
 
-static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d,
+static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
 					struct rdtgroup *rdtgrp, struct mon_evt *mevt)
 {
 	int ret = 0;
@@ -1681,7 +1738,7 @@ static int rdtgroup_modify_assign_state(char *assign, struct rdt_mon_domain *d,
 static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
 					char *event, char *tok)
 {
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	unsigned long dom_id = 0;
 	char *dom_str, *id_str;
 	struct mon_evt *mevt;
@@ -1731,10 +1788,10 @@ static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup
 	return -EINVAL;
 }
 
-ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
-				 size_t nbytes, loff_t off)
+static ssize_t mbm_assignments_write(struct kernfs_open_file *of, char *buf,
+				     size_t nbytes, loff_t off,
+				     struct rdt_resource *r)
 {
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
 	struct rdtgroup *rdtgrp;
 	char *token, *event;
 	int ret = 0;
@@ -1776,39 +1833,87 @@ ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
 	return ret ?: nbytes;
 }
 
+ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
+				 size_t nbytes, loff_t off)
+{
+	return mbm_assignments_write(of, buf, nbytes, off,
+			resctrl_arch_get_resource(RDT_RESOURCE_L3));
+}
+
+ssize_t mbm_MB_assignments_write(struct kernfs_open_file *of, char *buf,
+				 size_t nbytes, loff_t off)
+{
+	return mbm_assignments_write(of, buf, nbytes, off,
+			resctrl_arch_get_resource(RDT_RESOURCE_MBA));
+}
+
+static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
+{
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		u32 num_closid = resctrl_arch_get_num_closid(r);
+		u32 *tmp;
+
+		/* For ARM memory ordering access to closid_num_dirty_rmid */
+		mutex_lock(&rdtgroup_mutex);
+
+		/*
+		 * If the architecture hasn't provided a sanitised value here,
+		 * this may result in larger arrays than necessary. Resctrl will
+		 * use a smaller system wide value based on the resources in
+		 * use.
+		 */
+		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
+		if (!tmp) {
+			mutex_unlock(&rdtgroup_mutex);
+			return -ENOMEM;
+		}
+
+		closid_num_dirty_rmid = tmp;
+
+		mutex_unlock(&rdtgroup_mutex);
+	}
+
+	return 0;
+}
+
+static void closid_num_dirty_rmid_free(void)
+{
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		mutex_lock(&rdtgroup_mutex);
+		kfree(closid_num_dirty_rmid);
+		closid_num_dirty_rmid = NULL;
+		mutex_unlock(&rdtgroup_mutex);
+	}
+}
+
 /**
  * resctrl_mon_resource_init() - Initialise global monitoring structures.
  *
  * Allocate and initialise global monitor resources that do not belong to a
- * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
+ * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
+ * with the cleanest set of RMIDs.
  * Called once during boot after the struct rdt_resource's have been configured
  * but before the filesystem is mounted.
  * Resctrl's cpuhp callbacks may be called before this point to bring a domain
  * online.
  *
- * Returns 0 for success, or -ENOMEM.
+ * Return: 0 for success, or -ENOMEM.
  */
-int resctrl_mon_resource_init(void)
+static void resctrl_mon_resource_init(struct rdt_resource *r)
 {
-	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	int ret;
+	unsigned long fflags;
 
-	if (!r->mon_capable)
-		return 0;
-
-	ret = dom_data_init(r);
-	if (ret)
-		return ret;
+	fflags = (r->rid == RDT_RESOURCE_MBA) ? RFTYPE_RES_MB :RFTYPE_RES_CACHE;
 
 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
 		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
 		resctrl_file_fflags_init("mbm_total_bytes_config",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+					 RFTYPE_MON_INFO | fflags);
 	}
 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
 		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
 		resctrl_file_fflags_init("mbm_local_bytes_config",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+					 RFTYPE_MON_INFO | fflags);
 	}
 
 	if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
@@ -1826,21 +1931,48 @@ int resctrl_mon_resource_init(void)
 									    NON_TEMP_WRITE_TO_LOCAL_MEM);
 		r->mon.mbm_assign_on_mkdir = true;
 		resctrl_file_fflags_init("num_mbm_cntrs",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+					 RFTYPE_MON_INFO | fflags);
 		resctrl_file_fflags_init("available_mbm_cntrs",
-					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
+					 RFTYPE_MON_INFO | fflags);
 		resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
 		resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
-					 RFTYPE_RES_CACHE);
-		resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
+					 fflags);
+		if (r->rid == RDT_RESOURCE_MBA)
+			resctrl_file_fflags_init("mbm_MB_assignments", RFTYPE_MON_BASE);
+		else
+			resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
+		resctrl_file_fflags_init("mbm_assign_mode", RFTYPE_MON_INFO |
+					 fflags);
 	}
+}
+
+int resctrl_mon_init(void)
+{
+	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
+	int ret;
+
+	if (!r->mon_capable)
+		return 0;
+
+	ret = closid_num_dirty_rmid_alloc(r);
+	if (ret)
+		return ret;
+
+	resctrl_mon_resource_init(r);
+
+	r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
+	if (r)
+		resctrl_mon_resource_init(r);
 
 	return 0;
 }
 
-void resctrl_mon_resource_exit(void)
+void resctrl_mon_exit(void)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
 
-	dom_data_exit(r);
+	if (!r->mon_capable)
+		return;
+
+	closid_num_dirty_rmid_free();
 }
diff --git a/fs/resctrl/pseudo_lock.c b/fs/resctrl/pseudo_lock.c
index 4086e61df3e1c..87bbc2605de12 100644
--- a/fs/resctrl/pseudo_lock.c
+++ b/fs/resctrl/pseudo_lock.c
@@ -16,7 +16,6 @@
 #include <linux/cpumask.h>
 #include <linux/debugfs.h>
 #include <linux/kthread.h>
-#include <linux/memory_hotplug.h>
 #include <linux/mman.h>
 #include <linux/pm_qos.h>
 #include <linux/resctrl.h>
@@ -695,7 +694,6 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
 	int ret = -1;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	if (rdtgrp->flags & RDT_DELETED) {
@@ -743,7 +741,6 @@ static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
 
 out:
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 	return ret;
 }
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 3c9981f545017..c2aed590ad897 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -19,7 +19,7 @@
 #include <linux/iommu.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
-#include <linux/memory_hotplug.h>
+#include <linux/once.h>
 #include <linux/resctrl.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
@@ -1156,7 +1156,6 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 	u32 ctrl_val;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 	hw_shareable = r->cache.shareable_bits;
 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
@@ -1217,7 +1216,6 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
 	}
 	seq_putc(seq, '\n');
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 	return 0;
 }
@@ -1236,7 +1234,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of,
 {
 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
 
-	seq_printf(seq, "%d\n", r->mon.num_rmid);
+	seq_printf(seq, "%u\n", r->mon.num_rmid);
 
 	return 0;
 }
@@ -1718,11 +1716,10 @@ static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
 {
 	struct resctrl_mon_config_info mon_info;
-	struct rdt_mon_domain *dom;
+	struct rdt_l3_mon_domain *dom;
 	bool sep = false;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
@@ -1741,7 +1738,6 @@ static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid
 	seq_puts(s, "\n");
 
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 
 	return 0;
@@ -1792,7 +1788,7 @@ static int resctrl_schema_format_show(struct kernfs_open_file *of,
 }
 
 static void mbm_config_write_domain(struct rdt_resource *r,
-				    struct rdt_mon_domain *d, u32 evtid, u32 val)
+				    struct rdt_l3_mon_domain *d, u32 evtid, u32 val)
 {
 	struct resctrl_mon_config_info mon_info = {0};
 
@@ -1833,8 +1829,8 @@ static void mbm_config_write_domain(struct rdt_resource *r,
 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
 {
 	char *dom_str = NULL, *id_str;
+	struct rdt_l3_mon_domain *d;
 	unsigned long dom_id, val;
-	struct rdt_mon_domain *d;
 
 	/* Walking r->domains, ensure it can't race with cpuhp */
 	lockdep_assert_cpus_held();
@@ -1886,7 +1882,6 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
 		return -EINVAL;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	rdt_last_cmd_clear();
@@ -1896,7 +1891,6 @@ static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
 	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
 
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 
 	return ret ?: nbytes;
@@ -1914,7 +1908,6 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
 		return -EINVAL;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	rdt_last_cmd_clear();
@@ -1924,7 +1917,6 @@ static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
 	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
 
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 
 	return ret ?: nbytes;
@@ -2141,6 +2133,13 @@ static struct rftype res_common_files[] = {
 		.seq_show	= mbm_L3_assignments_show,
 		.write		= mbm_L3_assignments_write,
 	},
+	{
+		.name		= "mbm_MB_assignments",
+		.mode		= 0644,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= mbm_MB_assignments_show,
+		.write		= mbm_MB_assignments_write,
+	},
 	{
 		.name		= "mbm_assign_mode",
 		.mode		= 0644,
@@ -2496,6 +2495,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r)
 	case RDT_RESOURCE_MBA:
 	case RDT_RESOURCE_SMBA:
 		return RFTYPE_RES_MB;
+	case RDT_RESOURCE_PERF_PKG:
+		return RFTYPE_RES_PERF_PKG;
 	}
 
 	return 0;
@@ -2736,7 +2737,6 @@ struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
 	rdtgroup_kn_get(rdtgrp, kn);
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	/* Was this group deleted while we waited? */
@@ -2754,7 +2754,6 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 		return;
 
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 
 	rdtgroup_kn_put(rdtgrp, kn);
@@ -2768,7 +2767,6 @@ static void rdt_disable_ctx(void)
 {
 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-	resctrl_arch_set_mb_uses_numa_nid(false);
 	set_mba_sc(false);
 
 	resctrl_debug = false;
@@ -2799,17 +2797,8 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
 	if (ctx->enable_debug)
 		resctrl_debug = true;
 
-	if (ctx->mb_uses_numa_nid) {
-		ret = resctrl_arch_set_mb_uses_numa_nid(true);
-		if (ret)
-			goto out_debug;
-	}
-
 	return 0;
 
-out_debug:
-	resctrl_debug = false;
-	set_mba_sc(false);
 out_cdpl3:
 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
 out_cdpl2:
@@ -2975,15 +2964,16 @@ static int rdt_get_tree(struct fs_context *fc)
 {
 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
 	unsigned long flags = RFTYPE_CTRL_BASE;
-	struct rdt_mon_domain *dom;
+	struct rdt_l3_mon_domain *dom;
 	struct rdt_resource *r;
 	int ret;
 
+	DO_ONCE_SLEEPABLE(resctrl_arch_pre_mount);
+
 	if (ctx->enable_abi_playground)
 		enable_abi_playground();
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 	/*
 	 * resctrl file system can only be mounted once.
@@ -2993,6 +2983,10 @@ static int rdt_get_tree(struct fs_context *fc)
 		goto out;
 	}
 
+	ret = setup_rmid_lru_list();
+	if (ret)
+		goto out;
+
 	ret = rdtgroup_setup_root(ctx);
 	if (ret)
 		goto out;
@@ -3088,7 +3082,6 @@ static int rdt_get_tree(struct fs_context *fc)
 out:
 	rdt_last_cmd_clear();
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 	return ret;
 }
@@ -3098,17 +3091,15 @@ enum rdt_param {
 	Opt_cdpl2,
 	Opt_mba_mbps,
 	Opt_debug,
-	Opt_mb_uses_numa_nid,
 	Opt_not_abi_playground,
 	nr__rdt_params
 };
 
 static const struct fs_parameter_spec rdt_fs_parameters[] = {
-	fsparam_flag("cdp",			Opt_cdp),
-	fsparam_flag("cdpl2",			Opt_cdpl2),
-	fsparam_flag("mba_MBps",		Opt_mba_mbps),
-	fsparam_flag("debug",			Opt_debug),
-	fsparam_flag("mb_uses_numa_nid",	Opt_mb_uses_numa_nid),
+	fsparam_flag("cdp",		Opt_cdp),
+	fsparam_flag("cdpl2",		Opt_cdpl2),
+	fsparam_flag("mba_MBps",	Opt_mba_mbps),
+	fsparam_flag("debug",		Opt_debug),
 
 	/*
 	 * Some of MPAM's out of tree code exposes things through resctrl
@@ -3146,9 +3137,6 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_debug:
 		ctx->enable_debug = true;
 		return 0;
-	case Opt_mb_uses_numa_nid:
-		ctx->mb_uses_numa_nid = true;
-		return 0;
 	case Opt_not_abi_playground:
 		ctx->enable_abi_playground = true;
 		return 0;
@@ -3309,7 +3297,8 @@ static void rmdir_all_sub(void)
  * @rid:    The resource id for the event file being created.
  * @domid:  The domain id for the event file being created.
  * @mevt:   The type of event file being created.
- * @do_sum: Whether SNC summing monitors are being created.
+ * @do_sum: Whether SNC summing monitors are being created. Only set
+ *	    when @rid == RDT_RESOURCE_L3.
  */
 static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 					struct mon_evt *mevt,
@@ -3321,7 +3310,7 @@ static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 
 	list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
 		if (priv->rid == rid && priv->domid == domid &&
-		    priv->sum == do_sum && priv->evtid == mevt->evtid)
+		    priv->sum == do_sum && priv->evt == mevt)
 			return priv;
 	}
 
@@ -3332,7 +3321,7 @@ static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 	priv->rid = rid;
 	priv->domid = domid;
 	priv->sum = do_sum;
-	priv->evtid = mevt->evtid;
+	priv->evt = mevt;
 	list_add_tail(&priv->list, &mon_data_kn_priv_list);
 
 	return priv;
@@ -3378,7 +3367,6 @@ static void rdt_kill_sb(struct super_block *sb)
 	struct rdt_resource *r;
 
 	cpus_read_lock();
-	get_online_mems();
 	mutex_lock(&rdtgroup_mutex);
 
 	rdt_disable_ctx();
@@ -3395,7 +3383,6 @@ static void rdt_kill_sb(struct super_block *sb)
 	resctrl_mounted = false;
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
-	put_online_mems();
 	cpus_read_unlock();
 
 	if (static_branch_unlikely(&resctrl_abi_playground))
@@ -3446,23 +3433,24 @@ static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subn
 }
 
 /*
- * Remove all subdirectories of mon_data of ctrl_mon groups
- * and monitor groups for the given domain.
- * Remove files and directories containing "sum" of domain data
- * when last domain being summed is removed.
+ * Remove files and directories for one SNC node. If it is the last node
+ * sharing an L3 cache, then remove the upper level directory containing
+ * the "sum" files too.
  */
-static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-					   struct rdt_mon_domain *d)
+static void rmdir_mondata_subdir_allrdtgrp_snc(struct rdt_resource *r,
+					       struct rdt_domain_hdr *hdr)
 {
 	struct rdtgroup *prgrp, *crgrp;
+	struct rdt_l3_mon_domain *d;
 	char subname[32];
-	bool snc_mode;
 	char name[32];
 
-	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
-	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
-	if (snc_mode)
-		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
+	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
+		return;
+
+	d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
+	sprintf(name, "mon_%s_%02d", r->name, d->ci_id);
+	sprintf(subname, "mon_sub_%s_%02d", r->name, hdr->id);
 
 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
 		mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
@@ -3472,47 +3460,89 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 	}
 }
 
-static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
-			     struct rdt_resource *r, struct rdtgroup *prgrp,
-			     bool do_sum)
+/*
+ * Remove all subdirectories of mon_data of ctrl_mon groups
+ * and monitor groups for the given domain.
+ */
+static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
+					   struct rdt_domain_hdr *hdr)
+{
+	struct rdtgroup *prgrp, *crgrp;
+	char name[32];
+
+	if (r->rid == RDT_RESOURCE_L3 && r->mon_scope == RESCTRL_L3_NODE) {
+		rmdir_mondata_subdir_allrdtgrp_snc(r, hdr);
+		return;
+	}
+
+	sprintf(name, "mon_%s_%02d", r->name, hdr->id);
+	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
+		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
+
+		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
+			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
+	}
+}
+
+/*
+ * Create a directory for a domain and populate it with monitor files. Create
+ * summing monitors when @hdr is NULL. No need to initialize summing monitors.
+ */
+static struct kernfs_node *_mkdir_mondata_subdir(struct kernfs_node *parent_kn, char *name,
+						 struct rdt_domain_hdr *hdr,
+						 struct rdt_resource *r,
+						 struct rdtgroup *prgrp, int domid)
 {
 	struct rmid_read rr = {0};
+	struct kernfs_node *kn;
 	struct mon_data *priv;
 	struct mon_evt *mevt;
-	int ret, domid;
+	int ret;
+
+	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+	if (IS_ERR(kn))
+		return kn;
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
 
 	for_each_mon_event(mevt) {
 		if (mevt->rid != r->rid || !mevt->enabled)
 			continue;
-		domid = do_sum ? d->ci_id : d->hdr.id;
-		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
-		if (WARN_ON_ONCE(!priv))
-			return -EINVAL;
+		priv = mon_get_kn_priv(r->rid, domid, mevt, !hdr);
+		if (WARN_ON_ONCE(!priv)) {
+			ret = -EINVAL;
+			goto out_destroy;
+		}
 
 		ret = mon_addfile(kn, mevt->name, priv);
 		if (ret)
-			return ret;
+			goto out_destroy;
 
-		if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
-			mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
+		if (hdr && resctrl_is_mbm_event(mevt->evtid))
+			mon_event_read(&rr, r, hdr, prgrp, &hdr->cpu_mask, mevt, true);
 	}
 
-	return 0;
+	return kn;
+out_destroy:
+	kernfs_remove(kn);
+	return ERR_PTR(ret);
 }
 
-static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
-				struct rdt_mon_domain *d,
-				struct rdt_resource *r, struct rdtgroup *prgrp)
+static int mkdir_mondata_subdir_snc(struct kernfs_node *parent_kn,
+				    struct rdt_domain_hdr *hdr,
+				    struct rdt_resource *r, struct rdtgroup *prgrp)
 {
-	struct kernfs_node *kn, *ckn;
+	struct kernfs_node *ckn, *kn;
+	struct rdt_l3_mon_domain *d;
 	char name[32];
-	bool snc_mode;
-	int ret = 0;
 
-	lockdep_assert_held(&rdtgroup_mutex);
+	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
+		return -EINVAL;
 
-	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
-	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
+	d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
+	sprintf(name, "mon_%s_%02d", r->name, d->ci_id);
 	kn = kernfs_find_and_get(parent_kn, name);
 	if (kn) {
 		/*
@@ -3521,41 +3551,41 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
 		 */
 		kernfs_put(kn);
 	} else {
-		kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
+		kn = _mkdir_mondata_subdir(parent_kn, name, NULL, r, prgrp, d->ci_id);
 		if (IS_ERR(kn))
 			return PTR_ERR(kn);
+	}
 
-		ret = rdtgroup_kn_set_ugid(kn);
-		if (ret)
-			goto out_destroy;
-		ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
-		if (ret)
-			goto out_destroy;
+	sprintf(name, "mon_sub_%s_%02d", r->name, hdr->id);
+	ckn = _mkdir_mondata_subdir(kn, name, hdr, r, prgrp, hdr->id);
+	if (IS_ERR(ckn)) {
+		kernfs_remove(kn);
+		return PTR_ERR(ckn);
 	}
 
-	if (snc_mode) {
-		sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
-		ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
-		if (IS_ERR(ckn)) {
-			ret = -EINVAL;
-			goto out_destroy;
-		}
+	kernfs_activate(kn);
+	return 0;
+}
 
-		ret = rdtgroup_kn_set_ugid(ckn);
-		if (ret)
-			goto out_destroy;
+static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
+				struct rdt_domain_hdr *hdr,
+				struct rdt_resource *r, struct rdtgroup *prgrp)
+{
+	struct kernfs_node *kn;
+	char name[32];
 
-		ret = mon_add_all_files(ckn, d, r, prgrp, false);
-		if (ret)
-			goto out_destroy;
-	}
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	if (r->rid == RDT_RESOURCE_L3 && r->mon_scope == RESCTRL_L3_NODE)
+		return mkdir_mondata_subdir_snc(parent_kn, hdr, r, prgrp);
+
+	sprintf(name, "mon_%s_%02d", r->name, hdr->id);
+	kn = _mkdir_mondata_subdir(parent_kn, name, hdr, r, prgrp, hdr->id);
+	if (IS_ERR(kn))
+		return PTR_ERR(kn);
 
 	kernfs_activate(kn);
 	return 0;
-
-out_destroy:
-	kernfs_remove(kn);
-	return ret;
 }
 
 /*
@@ -3563,7 +3593,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
  * and "monitor" groups with given domain id.
  */
 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
-					   struct rdt_mon_domain *d)
+					   struct rdt_domain_hdr *hdr)
 {
 	struct kernfs_node *parent_kn;
 	struct rdtgroup *prgrp, *crgrp;
@@ -3571,12 +3601,12 @@ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
 
 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
 		parent_kn = prgrp->mon.mon_data_kn;
-		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
+		mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
 
 		head = &prgrp->mon.crdtgrp_list;
 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
 			parent_kn = crgrp->mon.mon_data_kn;
-			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
+			mkdir_mondata_subdir(parent_kn, hdr, r, crgrp);
 		}
 	}
 }
@@ -3585,14 +3615,14 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
 				       struct rdt_resource *r,
 				       struct rdtgroup *prgrp)
 {
-	struct rdt_mon_domain *dom;
+	struct rdt_domain_hdr *hdr;
 	int ret;
 
 	/* Walking r->domains, ensure it can't race with cpuhp */
 	lockdep_assert_cpus_held();
 
-	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
-		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
+	list_for_each_entry(hdr, &r->mon_domains, list) {
+		ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
 		if (ret)
 			return ret;
 	}
@@ -4406,9 +4436,6 @@ static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
 	if (resctrl_debug)
 		seq_puts(seq, ",debug");
 
-	if (resctrl_arch_get_mb_uses_numa_nid())
-		seq_puts(seq, ",mb_uses_numa_nid");
-
 	if (static_branch_unlikely(&resctrl_abi_playground))
 		seq_puts(seq, ",this_is_not_abi");
 
@@ -4459,7 +4486,7 @@ static void rdtgroup_setup_default(void)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-static void domain_destroy_mon_state(struct rdt_mon_domain *d)
+static void domain_destroy_l3_mon_state(struct rdt_l3_mon_domain *d)
 {
 	int idx;
 
@@ -4481,8 +4508,10 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
 {
+	struct rdt_l3_mon_domain *d;
+
 	mutex_lock(&rdtgroup_mutex);
 
 	/*
@@ -4490,8 +4519,12 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
 	 * per domain monitor data directories.
 	 */
 	if (resctrl_mounted && resctrl_arch_mon_capable())
-		rmdir_mondata_subdir_allrdtgrp(r, d);
+		rmdir_mondata_subdir_allrdtgrp(r, hdr);
+
+	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))
+		goto out_unlock;
 
+	d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
 	if (resctrl_is_mbm_enabled())
 		cancel_delayed_work(&d->mbm_over);
 	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
@@ -4507,13 +4540,13 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
 		cancel_delayed_work(&d->cqm_limbo);
 	}
 
-	domain_destroy_mon_state(d);
-
+	domain_destroy_l3_mon_state(d);
+out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }
 
 /**
- * domain_setup_mon_state() -  Initialise domain monitoring structures.
+ * domain_setup_l3_mon_state() -  Initialise domain monitoring structures.
  * @r:	The resource for the newly online domain.
  * @d:	The newly online domain.
  *
@@ -4521,11 +4554,17 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
  * Called when the first CPU of a domain comes online, regardless of whether
  * the filesystem is mounted.
  * During boot this may be called before global allocations have been made by
- * resctrl_mon_resource_init().
+ * resctrl_l3_mon_resource_init().
+ *
+ * Called during CPU online that may run as soon as CPU online callbacks
+ * are set up during resctrl initialization. The number of supported RMIDs
+ * may be reduced if additional mon_capable resources are enumerated
+ * at mount time. This means the rdt_l3_mon_domain::mbm_states[] and
+ * rdt_l3_mon_domain::rmid_busy_llc allocations may be larger than needed.
  *
- * Returns 0 for success, or -ENOMEM.
+ * Return: 0 for success, or -ENOMEM.
  */
-static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
+static int domain_setup_l3_mon_state(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
 {
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
 	size_t tsize = sizeof(*d->mbm_states[0]);
@@ -4581,13 +4620,18 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d
 	return err;
 }
 
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
 {
-	int err;
+	struct rdt_l3_mon_domain *d;
+	int err = -EINVAL;
 
 	mutex_lock(&rdtgroup_mutex);
 
-	err = domain_setup_mon_state(r, d);
+	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))
+		goto out_unlock;
+
+	d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
+	err = domain_setup_l3_mon_state(r, d);
 	if (err)
 		goto out_unlock;
 
@@ -4600,6 +4644,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
 	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
 		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
 
+	err = 0;
 	/*
 	 * If the filesystem is not mounted then only the default resource group
 	 * exists. Creation of its directories is deferred until mount time
@@ -4607,7 +4652,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
 	 * If resctrl is mounted, add per domain monitor data directories.
 	 */
 	if (resctrl_mounted && resctrl_arch_mon_capable())
-		mkdir_mondata_subdir_allrdtgrp(r, d);
+		mkdir_mondata_subdir_allrdtgrp(r, hdr);
 
 out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
@@ -4633,10 +4678,10 @@ static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
 	}
 }
 
-static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
-						      struct rdt_resource *r)
+static struct rdt_l3_mon_domain *get_mon_domain_from_cpu(int cpu,
+							 struct rdt_resource *r)
 {
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 
 	lockdep_assert_cpus_held();
 
@@ -4652,7 +4697,7 @@ static struct rdt_mon_domain *get_mon_domain_from_cpu(int cpu,
 void resctrl_offline_cpu(unsigned int cpu)
 {
 	struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
-	struct rdt_mon_domain *d;
+	struct rdt_l3_mon_domain *d;
 	struct rdtgroup *rdtgrp;
 
 	mutex_lock(&rdtgroup_mutex);
@@ -4702,13 +4747,13 @@ int resctrl_init(void)
 
 	thread_throttle_mode_init();
 
-	ret = resctrl_mon_resource_init();
+	ret = resctrl_mon_init();
 	if (ret)
 		return ret;
 
 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
 	if (ret) {
-		resctrl_mon_resource_exit();
+		resctrl_mon_exit();
 		return ret;
 	}
 
@@ -4743,7 +4788,7 @@ int resctrl_init(void)
 
 cleanup_mountpoint:
 	sysfs_remove_mount_point(fs_kobj, "resctrl");
-	resctrl_mon_resource_exit();
+	resctrl_mon_exit();
 
 	return ret;
 }
@@ -4779,7 +4824,7 @@ static bool resctrl_online_domains_exist(void)
  * When called by the architecture code, all CPUs and resctrl domains must be
  * offline. This ensures the limbo and overflow handlers are not scheduled to
  * run, meaning the data structures they access can be freed by
- * resctrl_mon_resource_exit().
+ * resctrl_l3_mon_resource_exit().
  *
  * After resctrl_exit() returns, the architecture code should return an
  * error from all resctrl_arch_ functions that can do this.
@@ -4789,14 +4834,12 @@ static bool resctrl_online_domains_exist(void)
 void resctrl_exit(void)
 {
 	cpus_read_lock();
-	get_online_mems();
 	WARN_ON_ONCE(resctrl_online_domains_exist());
 
 	mutex_lock(&rdtgroup_mutex);
 	resctrl_fs_teardown();
 	mutex_unlock(&rdtgroup_mutex);
 
-	put_online_mems();
 	cpus_read_unlock();
 
 	debugfs_remove_recursive(debugfs_resctrl);
@@ -4808,5 +4851,6 @@ void resctrl_exit(void)
 	 * it can be used to umount resctrl.
 	 */
 
-	resctrl_mon_resource_exit();
+	resctrl_mon_exit();
+	free_rmid_lru_list();
 }
diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
index aa7d6e1854741..06827f240cf9e 100644
--- a/include/linux/arm_mpam.h
+++ b/include/linux/arm_mpam.h
@@ -78,9 +78,6 @@ struct rdt_resource;
 void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, enum resctrl_event_id evtid);
 void resctrl_arch_mon_ctx_free(struct rdt_resource *r, enum resctrl_event_id evtid, void *ctx);
 
-bool resctrl_arch_get_mb_uses_numa_nid(void);
-int resctrl_arch_set_mb_uses_numa_nid(bool enabled);
-
 /*
  * The CPU configuration for MPAM is cheap to write, and is only written if it
  * has changed. No need for fine grained enables.
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b74228f9f1ce0..a22521af2d242 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -704,9 +704,9 @@ struct iommu_ops {
 			   const struct iommu_user_data *user_data);
 
 	/* Per group IOMMU features */
-	int (*get_group_qos_params)(struct iommu_group *group, u16 *partition,
+	int (*get_group_qos_params)(struct device *dev, u16 *partition,
 				    u8 *perf_mon_grp);
-	int (*set_group_qos_params)(struct iommu_group *group, u16 partition,
+	int (*set_group_qos_params)(struct device *dev, u16 partition,
 				    u8 perf_mon_grp);
 
 	const struct iommu_domain_ops *default_domain_ops;
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 2a770e7c6ab1e..40eb70ccb09d5 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -126,7 +126,6 @@ struct mem_section;
 #define CPUSET_CALLBACK_PRI	10
 #define MEMTIER_HOTPLUG_PRI	100
 #define KSM_CALLBACK_PRI	100
-#define RESCTRL_CALLBACK_PRI	100
 
 #ifndef CONFIG_MEMORY_HOTPLUG
 static inline void memory_dev_init(void)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index acc5ac1e92491..23f038a162319 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -334,10 +334,4 @@ extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
 void arch_remove_linear_mapping(u64 start, u64 size);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-#if defined(CONFIG_LOCKDEP) && defined(CONFIG_MEMORY_HOTPLUG)
-void lockdep_assert_mems_held(void);
-#else
-static inline void lockdep_assert_mems_held(void) { }
-#endif
-
 #endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 055f27045b4da..2901cbd34459c 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -54,6 +54,7 @@ enum resctrl_res_level {
 	RDT_RESOURCE_L2,
 	RDT_RESOURCE_MBA,
 	RDT_RESOURCE_SMBA,
+	RDT_RESOURCE_PERF_PKG,
 	RDT_RESOURCE_L3_MAX,
 	RDT_RESOURCE_L2_MAX,
 
@@ -134,15 +135,24 @@ enum resctrl_domain_type {
  * @list:		all instances of this resource
  * @id:			unique id for this instance
  * @type:		type of this instance
+ * @rid:		resource id for this instance
  * @cpu_mask:		which CPUs share this resource
  */
 struct rdt_domain_hdr {
 	struct list_head		list;
 	u32				id;
 	enum resctrl_domain_type	type;
+	enum resctrl_res_level		rid;
 	struct cpumask			cpu_mask;
 };
 
+static inline bool domain_header_is_valid(struct rdt_domain_hdr *hdr,
+					  enum resctrl_domain_type type,
+					  enum resctrl_res_level rid)
+{
+	return !WARN_ON_ONCE(hdr->type != type || hdr->rid != rid);
+}
+
 /**
  * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource
  * @hdr:		common header for different domain types
@@ -172,7 +182,7 @@ struct mbm_cntr_cfg {
 };
 
 /**
- * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
+ * struct rdt_l3_mon_domain - group of CPUs sharing RDT_RESOURCE_L3 monitoring
  * @hdr:		common header for different domain types
  * @ci_id:		cache info id for this domain
  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold
@@ -186,7 +196,7 @@ struct mbm_cntr_cfg {
  * @cntr_cfg:		array of assignable counters' configuration (indexed
  *			by counter ID)
  */
-struct rdt_mon_domain {
+struct rdt_l3_mon_domain {
 	struct rdt_domain_hdr		hdr;
 	unsigned int			ci_id;
 	unsigned long			*rmid_busy_llc;
@@ -267,6 +277,7 @@ enum resctrl_scope {
 	RESCTRL_L2_CACHE = 2,
 	RESCTRL_L3_CACHE = 3,
 	RESCTRL_L3_NODE,
+	RESCTRL_PACKAGE,
 };
 
 /**
@@ -294,7 +305,7 @@ enum resctrl_schema_fmt {
  *			events of monitor groups created via mkdir.
  */
 struct resctrl_mon {
-	int			num_rmid;
+	u32			num_rmid;
 	unsigned int		mbm_cfg_mask;
 	int			num_mbm_cntrs;
 	bool			mbm_cntr_assignable;
@@ -378,10 +389,10 @@ struct resctrl_cpu_defaults {
 };
 
 struct resctrl_mon_config_info {
-	struct rdt_resource	*r;
-	struct rdt_mon_domain	*d;
-	u32			evtid;
-	u32			mon_config;
+	struct rdt_resource		*r;
+	struct rdt_l3_mon_domain	*d;
+	u32				evtid;
+	u32				mon_config;
 };
 
 /**
@@ -444,7 +455,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
 u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
-void resctrl_enable_mon_event(enum resctrl_event_id eventid);
+bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
+			      unsigned int binary_bits, void *arch_priv);
 
 bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
 
@@ -541,22 +553,31 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 			    u32 closid, enum resctrl_conf_type type);
 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
-int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
+int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr);
 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
-void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
+void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr);
 void resctrl_online_cpu(unsigned int cpu);
 void resctrl_offline_cpu(unsigned int cpu);
 
+/*
+ * Architecture hook called at beginning of first file system mount attempt.
+ * No locks are held.
+ */
+void resctrl_arch_pre_mount(void);
+
 /**
  * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
  *			      for this resource and domain.
  * @r:			resource that the counter should be read from.
- * @d:			domain that the counter should be read from.
+ * @hdr:		Header of domain that the counter should be read from.
  * @closid:		closid that matches the rmid. Depending on the architecture, the
  *			counter may match traffic of both @closid and @rmid, or @rmid
  *			only.
  * @rmid:		rmid of the counter to read.
  * @eventid:		eventid to read, e.g. L3 occupancy.
+ * @arch_priv:		Architecture private data for this event.
+ *			The @arch_priv provided by the architecture via
+ *			resctrl_enable_mon_event().
  * @val:		result of the counter read in bytes.
  * @arch_mon_ctx:	An architecture specific value from
  *			resctrl_arch_mon_ctx_alloc(), for MPAM this identifies
@@ -572,9 +593,9 @@ void resctrl_offline_cpu(unsigned int cpu);
  * Return:
  * 0 on success, or -EIO, -EINVAL etc on error.
  */
-int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
 			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *arch_mon_ctx);
+			   void *arch_priv, u64 *val, void *arch_mon_ctx);
 
 /**
  * resctrl_arch_rmid_read_context_check()  - warn about invalid contexts
@@ -619,7 +640,7 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 closid, u32 rmid,
 			     enum resctrl_event_id eventid);
 
@@ -632,7 +653,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d);
+void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d);
 
 /**
  * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its
@@ -658,7 +679,7 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r);
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			      enum resctrl_event_id evtid, u32 rmid, u32 closid,
 			      u32 cntr_id, bool assign);
 
@@ -681,7 +702,7 @@ void resctrl_arch_config_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
  * Return:
  * 0 on success, or -EIO, -EINVAL etc on error.
  */
-int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
+int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			   u32 closid, u32 rmid, int cntr_id,
 			   enum resctrl_event_id eventid, u64 *val);
 
@@ -696,7 +717,7 @@ int resctrl_arch_cntr_read(struct rdt_resource *r, struct rdt_mon_domain *d,
  *
  * This can be called from any CPU.
  */
-void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_mon_domain *d,
+void resctrl_arch_reset_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
 			     u32 closid, u32 rmid, int cntr_id,
 			     enum resctrl_event_id eventid);
 
diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
index acfe07860b346..a5f56faa18d22 100644
--- a/include/linux/resctrl_types.h
+++ b/include/linux/resctrl_types.h
@@ -50,6 +50,17 @@ enum resctrl_event_id {
 	QOS_L3_MBM_TOTAL_EVENT_ID	= 0x02,
 	QOS_L3_MBM_LOCAL_EVENT_ID	= 0x03,
 
+	/* Intel Telemetry Events */
+	PMT_EVENT_ENERGY,
+	PMT_EVENT_ACTIVITY,
+	PMT_EVENT_STALLS_LLC_HIT,
+	PMT_EVENT_C1_RES,
+	PMT_EVENT_UNHALTED_CORE_CYCLES,
+	PMT_EVENT_STALLS_LLC_MISS,
+	PMT_EVENT_AUTO_C6_RES,
+	PMT_EVENT_UNHALTED_REF_CYCLES,
+	PMT_EVENT_UOPS_RETIRED,
+
 	/* Must be the last */
 	QOS_NUM_EVENTS,
 };
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89ec5ed8c488b..74318c7877156 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -218,17 +218,6 @@ void put_online_mems(void)
 	percpu_up_read(&mem_hotplug_lock);
 }
 
-#ifdef CONFIG_LOCKDEP
-void lockdep_assert_mems_held(void)
-{
-	/* See lockdep_assert_cpus_held() */
-	if (system_state < SYSTEM_RUNNING)
-                return;
-
-	percpu_rwsem_assert_held(&mem_hotplug_lock);
-}
-#endif
-
 bool movable_node_enabled = false;
 
 static int mhp_default_online_type = -1;