intel · mlim19 · Jan 26, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 9, 2026
diff --git a/README.md b/README.md
@@ -111,6 +111,45 @@ Make sure you are not passing `-s` to the `-ldflags` during your build - `-s` om
     * `smart` - Run both `fp` and `dwarf`, then choose the result with the highest average of stack frames count, per process.
     * `disabled` - Avoids running `perf` at all. See [perf-less mode](#perf-less-mode).
 
+### Hardware event profiling options
+
+gProfiler supports profiling with custom hardware events (PMU events) for generating flamegraphs based on specific hardware performance counters like cache misses, branch mispredictions, etc.
+
+* `--perf-event`: Specify a perf event to use for profiling instead of the default CPU time-based sampling.
+    * **Requires `--mode=cpu`** (the default mode). Not supported with `--mode=allocation` or `--mode=none`.
+    * When this option is used, all language-specific profilers (Java, Python, etc.) are disabled and only `perf` runs. This is because language profilers sample based on OS time, while hardware events sample based on event counts - these are fundamentally different sampling bases and cannot be meaningfully combined.
+    * Supports built-in perf events (e.g., `cache-misses`, `branch-misses`, `instructions`)
+    * Supports hardware cache events (e.g., `L1-dcache-load-misses`, `LLC-load-misses`)
+    * Supports custom PMU events via `--hw-events-file`
+    * Uncore events (prefix `uncore_`) are not supported as they cannot be attributed to specific processes
+
+* `--perf-event-period`: Use period-based sampling instead of frequency-based sampling. This option specifies the number of events between each sample.
+    * Requires `--perf-event` to be specified
+    * Mutually exclusive with `-f/--profiling-frequency`
+    * Example: `--perf-event-period 10000` samples every 10,000 events
+
+* `--hw-events-file`: Path to a JSON file containing custom PMU event definitions. Use this for events not available in `perf list`. See `gprofiler/resources/hw_events_template.json` for the format.
+
+**Examples:**
+
+```bash
+# Profile using cache-misses event with default frequency (11 Hz)
+sudo ./gprofiler --perf-event cache-misses -d 60 -o /tmp
+
+# Profile using cache-misses event with custom frequency (99 Hz)
+sudo ./gprofiler --perf-event cache-misses -f 99 -d 60 -o /tmp
+
+# Profile using cache-misses event with period-based sampling (every 10,000 cache misses)
+sudo ./gprofiler --perf-event cache-misses --perf-event-period 10000 -d 60 -o /tmp
+
+# Profile using a custom event defined in a JSON file
+sudo ./gprofiler --perf-event my-custom-event --hw-events-file /path/to/hw_events.json -d 60 -o /tmp
+```
+
+**Note:** On bare metal systems, hardware events use PEBS (Precise Event-Based Sampling) with `:ppp` modifier for `cycles`/`instructions`, `:pp` for other hardware events. On VMs, a less precise `:p` modifier is used as PEBS support may be limited. Hypervisor detection is logged at startup.
+
+**Custom Events Template:** A template file for custom PMU events is available at `gprofiler/resources/hw_events_template.json`.
-**Custom Events Template:** A template file for custom PMU events is available at `gprofiler/resources/hw_events_template.json`.
+**Custom Events Template:** A template file for custom PMU events is available in the source tree at [`gprofiler/resources/hw_events_template.json`](gprofiler/resources/hw_events_template.json). If you installed gProfiler from a package and this file is not present locally, you can copy it from the project repository.
-**Custom Events Template:** A template file for custom PMU events is available at `gprofiler/resources/hw_events_template.json`.
+**Custom Events Template:** A template file for custom PMU events is available in the source tree at [`gprofiler/resources/hw_events_template.json`](gprofiler/resources/hw_events_template.json). If you installed gProfiler from a package and this file is not present locally, you can copy it from the project repository.
+
 ## Rootless mode
 gProfiler can be run in rootless mode, profiling without root or sudo access with limited functionality by using the `--rootless` argument.
 

diff --git a/gprofiler/main.py b/gprofiler/main.py
@@ -60,6 +60,7 @@
 from gprofiler.platform import is_aarch64, is_linux, is_windows
 from gprofiler.profiler_state import ProfilerState
 from gprofiler.profilers.factory import get_profilers
+from gprofiler.profilers.perf import SystemProfiler
 from gprofiler.profilers.profiler_base import NoopProfiler, ProcessProfilerBase, ProfilerInterface
 from gprofiler.profilers.registry import get_profilers_registry
 from gprofiler.state import State, init_state
@@ -335,6 +336,51 @@ def _snapshot(self) -> None:
             else {"hostname": get_hostname()}
         )
         metadata.update({"profiling_mode": self._profiler_state.profiling_mode})
+
+        # Add sampling event information if custom event is being used
+        if isinstance(self.system_profiler, SystemProfiler) and self.system_profiler._custom_event_name:
+            from gprofiler.platform import get_hypervisor_vendor
+            from gprofiler.utils.hw_events import get_event_type, get_perf_available_events, get_precise_modifier
+
+            event_name = self.system_profiler._custom_event_name
+            hypervisor_vendor = get_hypervisor_vendor()
+            perf_events = get_perf_available_events()
+            event_type = get_event_type(event_name, perf_events)
+
+            # Use "custom" as fallback if event_type is None or empty
+            effective_type = event_type if event_type else "custom"
+            modifier = get_precise_modifier(event_name, effective_type, hypervisor_vendor)
+
+            metadata.update(
+                {
+                    "sampling_event": event_name,
+                    "sampling_mode": "period" if self.system_profiler._perf_period else "frequency",
+                    "precise_modifier": modifier,
+                }
+            )
+
+            if self.system_profiler._perf_period:
+                metadata.update({"sampling_period": self.system_profiler._perf_period})
+            else:
+                metadata.update({"sampling_frequency": self.system_profiler._frequency})
+        elif isinstance(self.system_profiler, SystemProfiler):
+            # Default CPU time-based profiling
+            metadata.update(
+                {
+                    "sampling_event": "cpu-time",
+                    "sampling_mode": "frequency",
+                    "sampling_frequency": self.system_profiler._frequency,
+                }
+            )
+        else:
+            # NoopProfiler - use default values
+            metadata.update(
+                {
+                    "sampling_event": "cpu-time",
+                    "sampling_mode": "frequency",
+                    "sampling_frequency": 11,
+                }
+            )
         metrics = self._system_metrics_monitor.get_metrics()
         hwmetrics = self._hw_metrics_monitor.get_hw_metrics()
         if hwmetrics is None:
@@ -606,6 +652,32 @@ def parse_cmd_args() -> configargparse.Namespace:
 
     _add_profilers_arguments(parser)
 
+    # Custom perf event arguments
+    perf_event_options = parser.add_argument_group("Perf Event")
+    perf_event_options.add_argument(
+        "--perf-event",
+        type=str,
+        dest="perf_event",
+        help="Specify a perf event for flamegraph generation (e.g., cache-misses, page-faults, sched:sched_switch). "
+        "When specified, only perf profiler will be active and all language-specific profilers will be disabled. "
+        "Event can be from 'perf list' or a custom event defined in hw_events.json.",
+    )
+    perf_event_options.add_argument(
+        "--perf-event-period",
+        type=int,
+        dest="perf_event_period",
+        help="Use period-based sampling instead of frequency (-c instead of -F). "
+        "Specify the number of events between samples (e.g., 10000 for sampling every 10000 events). "
+        "Only valid with --perf-event.",
+    )
+    perf_event_options.add_argument(
+        "--hw-events-file",
+        type=str,
+        dest="hw_events_file",
+        help="Path to a JSON file containing custom PMU event definitions. "
+        "Only valid with --perf-event. If not specified, only built-in perf events are available.",
+    )
+
     spark_options = parser.add_argument_group("Spark")
 
     spark_options.add_argument(
@@ -892,6 +964,14 @@ def parse_cmd_args() -> configargparse.Namespace:
     args.perf_inject = args.nodejs_mode == "perf"
     args.perf_node_attach = args.nodejs_mode == "attach-maps"
 
+    # Validate --perf-event-period and -f/--frequency are mutually exclusive
+    # Must check before defaults are applied (args.frequency is None if not explicitly provided)
+    if args.perf_event_period and args.frequency is not None:
+        parser.error(
+            "--perf-event-period and -f/--frequency are mutually exclusive. "
+            "Use --perf-event-period for period-based sampling or -f for frequency-based sampling."
+        )
+
     if args.profiling_mode == CPU_PROFILING_MODE:
         if args.alloc_interval:
             parser.error("--alloc-interval is only allowed in allocation profiling (--mode=allocation)")
@@ -936,6 +1016,40 @@ def parse_cmd_args() -> configargparse.Namespace:
     if args.profile_spawned_processes and args.pids_to_profile is not None:
         parser.error("--pids is not allowed when profiling spawned processes")
 
+    # Validate --perf-event-period only works with --perf-event
+    if args.perf_event_period and not args.perf_event:
+        parser.error("--perf-event-period requires --perf-event to be specified")
+
+    # Validate --hw-events-file only works with --perf-event
+    if getattr(args, "hw_events_file", None) and not args.perf_event:
+        parser.error("--hw-events-file requires --perf-event to be specified")
+
+    # Validate --perf-event only works with cpu profiling mode
+    if args.perf_event and args.profiling_mode != CPU_PROFILING_MODE:
+        parser.error("--perf-event is only supported in cpu profiling mode (--mode=cpu)")
+
+    # Validate and resolve perf event arguments
+    if args.perf_event:
-    if args.perf_event:
+    if args.perf_event:
+        # Perf events are only supported in CPU profiling mode. In other modes (e.g. allocation, none),
+        # the perf profiler may be disabled or process profilers skipped, which can result in no profiling.
+        if args.profiling_mode != CPU_PROFILING_MODE:
+            parser.error("--perf-event is only supported with --mode cpu")
-    if args.perf_event:
+    if args.perf_event:
+        # Perf events are only supported in CPU profiling mode. In other modes (e.g. allocation, none),
+        # the perf profiler may be disabled or process profilers skipped, which can result in no profiling.
+        if args.profiling_mode != CPU_PROFILING_MODE:
+            parser.error("--perf-event is only supported with --mode cpu")
+        from gprofiler.platform import get_hypervisor_vendor
+        from gprofiler.utils.hw_events import validate_and_get_event_args, validate_event_with_fallback
+
+        try:
+            # Detect hypervisor
+            hypervisor_vendor = get_hypervisor_vendor()
+
+            # Validate and resolve event
+            hw_events_file = getattr(args, "hw_events_file", None)
+            event_args = validate_and_get_event_args(args.perf_event, hypervisor_vendor, hw_events_file)
+
+            # Test accessibility with fallback
+            validated_args = validate_event_with_fallback(args.perf_event, event_args, hypervisor_vendor)
+
+            # Store resolved event args in args
+            args.perf_event_args = validated_args
+
+        except (ValueError, RuntimeError) as e:
+            parser.error(f"Perf event validation failed: {e}")
+
     return args
 
 
@@ -1020,6 +1134,7 @@ def log_system_info() -> None:
     logger.info(f"Total RAM: {system_info.memory_capacity_mb / 1024:.2f} GB")
     logger.info(f"Linux distribution: {system_info.os_name} | {system_info.os_release} | {system_info.os_codename}")
     logger.info(f"libc version: {system_info.libc_type}-{system_info.libc_version}")
+    logger.info(f"Hypervisor: {system_info.hypervisor}")
     logger.info(f"Hostname: {system_info.hostname}")
 
 

diff --git a/gprofiler/merge.py b/gprofiler/merge.py
@@ -78,6 +78,20 @@ def _make_profile_metadata(
         "hwmetrics": hwmetrics.metrics_data if hwmetrics is not None else None,
         "htmlblob": hwmetrics.metrics_html if hwmetrics is not None else None,
     }
+
+    # Add sampling event information if present in metadata
+    if "sampling_event" in metadata:
+        profile_metadata["sampling_event"] = metadata["sampling_event"]
+        profile_metadata["sampling_mode"] = metadata.get("sampling_mode", "frequency")
+
+        if metadata.get("sampling_mode") == "period":
+            profile_metadata["sampling_period"] = metadata.get("sampling_period")
+        else:
+            profile_metadata["sampling_frequency"] = metadata.get("sampling_frequency")
+
+        if "precise_modifier" in metadata:
+            profile_metadata["precise_modifier"] = metadata["precise_modifier"]
+
     return "# " + json.dumps(profile_metadata)
 
 

diff --git a/gprofiler/metadata/system_metadata.py b/gprofiler/metadata/system_metadata.py
@@ -193,8 +193,10 @@ class SystemInfo:
     kernel_release: str
     kernel_version: str
     system_name: str
+    hypervisor: str
     processors: int
     cpu_model_name: str
+    cpu_arch_codename: str
     cpu_flags: str
     memory_capacity_mb: int
     hostname: str
@@ -254,15 +256,24 @@ def get_static_system_info() -> SystemInfo:
     run_mode = get_run_mode()
     deployment_type = get_deployment_type(run_mode)
     cpu_model_name, cpu_flags = get_cpu_info()
+
+    # Import here to avoid circular dependency
+    from gprofiler.platform import get_cpu_model, get_hypervisor_vendor
+
+    hypervisor = get_hypervisor_vendor()
+    cpu_arch_codename = get_cpu_model()
+
     return SystemInfo(
         python_version=sys.version,
         run_mode=run_mode,
         deployment_type=deployment_type,
         kernel_release=uname.release,
         kernel_version=uname.version,
         system_name=uname.system,
+        hypervisor=hypervisor,
         processors=cpu_count,
         cpu_model_name=cpu_model_name,
+        cpu_arch_codename=cpu_arch_codename,
         cpu_flags=cpu_flags,
         memory_capacity_mb=round(psutil.virtual_memory().total / 1024 / 1024),
         hostname=hostname,

diff --git a/gprofiler/platform.py b/gprofiler/platform.py
@@ -34,3 +34,108 @@ def is_linux() -> bool:
 @lru_cache(maxsize=None)
 def is_aarch64() -> bool:
     return platform.machine() == "aarch64"
+
+
+@lru_cache(maxsize=None)
+def get_cpu_model() -> str:
+    """
+    Detect Intel CPU model for custom PMU event support.
+    Returns platform code: ICX, SPR, EMR, GNR, or UNKNOWN.
+    """
+    if not is_linux():
+        return "UNKNOWN"
+
+    try:
+        with open("/proc/cpuinfo", "r") as f:
+            cpu_family = None
+            model = None
+
+            for line in f:
+                if line.startswith("cpu family"):
+                    cpu_family = int(line.split(":")[1].strip())
+                elif line.startswith("model") and not line.startswith("model name"):
+                    model = int(line.split(":")[1].strip())
+
+                # Once we have both, we can determine the platform
+                if cpu_family is not None and model is not None:
+                    break
+
+            # All supported platforms are Intel Family 6
+            if cpu_family != 6 or model is None:
+                return "UNKNOWN"
+
+            # Map model numbers to platform codes
+            model_to_platform = {
+                106: "ICX",  # Ice Lake Server
+                143: "SPR",  # Sapphire Rapids
+                207: "EMR",  # Emerald Rapids
+                173: "GNR",  # Granite Rapids
+            }
+
+            return model_to_platform.get(model, "UNKNOWN")
+
+    except Exception:
+        return "UNKNOWN"
+
+
+@lru_cache(maxsize=None)
+def get_hypervisor_vendor() -> str:
+    """
+    Detect hypervisor vendor using CPUID.
+    Returns hypervisor vendor string (e.g., "KVMKVMKVM", "VMwareVMware") or "NONE" for bare metal.
+    """
+    if not is_linux():
+        # Hardware event profiling with custom PMU events uses Linux perf subsystem and is Linux-only.
+        # No plans to support non-Linux platforms as they use different performance monitoring mechanisms.
+        # TODO: Update to return "UNKNOWN" or implement detection when Windows support is enabled.
+        return "NONE"
+
+    try:
+        # Try to use cpuid if available
+        # CPUID leaf 0x1, ECX bit 31 indicates hypervisor presence
+        # If present, CPUID leaf 0x40000000 returns vendor string in EBX, ECX, EDX
+
+        # We need to read from /dev/cpu/*/cpuid or use inline assembly
+        # For simplicity, we'll check if the hypervisor bit is set via /proc/cpuinfo flags
+        # and then try to read the vendor string
+
+        with open("/proc/cpuinfo", "r") as f:
+            for line in f:
+                if line.startswith("flags") or line.startswith("Features"):
+                    flags = line.split(":")[1].strip()
+                    if "hypervisor" in flags:
+                        # Hypervisor detected, try to get vendor
+                        return _read_hypervisor_vendor()
+                    else:
+                        return "NONE"
+
+        return "NONE"
+
+    except Exception:
+        return "NONE"
+
+
+def _read_hypervisor_vendor() -> str:
+    """
+    Read hypervisor vendor string from CPUID leaf 0x40000000.
+    The vendor string is 12 characters: EBX (4 bytes) + ECX (4 bytes) + EDX (4 bytes).
+    """
+    try:
+        import struct
+
+        import cpuid
+
+        # Execute CPUID leaf 0x40000000 for hypervisor vendor
+        eax, ebx, ecx, edx = cpuid.cpuid(0x40000000, 0)
+
+        # Vendor string is in EBX, ECX, EDX (12 characters total)
+        vendor_bytes = struct.pack("<III", ebx, ecx, edx)
+        vendor_string = vendor_bytes.decode("ascii", errors="replace").rstrip("\x00")
+
+        if vendor_string and vendor_string.strip():
+            return vendor_string
+        else:
+            return "VM-UNKNOWN"
+
+    except Exception:
+        return "VM-UNKNOWN"