diff --git a/README.md b/README.md index ad2c9e9dc..833d9ada8 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,45 @@ Make sure you are not passing `-s` to the `-ldflags` during your build - `-s` om * `smart` - Run both `fp` and `dwarf`, then choose the result with the highest average of stack frames count, per process. * `disabled` - Avoids running `perf` at all. See [perf-less mode](#perf-less-mode). +### Hardware event profiling options + +gProfiler supports profiling with custom hardware events (PMU events) for generating flamegraphs based on specific hardware performance counters like cache misses, branch mispredictions, etc. + +* `--perf-event`: Specify a perf event to use for profiling instead of the default CPU time-based sampling. + * **Requires `--mode=cpu`** (the default mode). Not supported with `--mode=allocation` or `--mode=none`. + * When this option is used, all language-specific profilers (Java, Python, etc.) are disabled and only `perf` runs. This is because language profilers sample based on OS time, while hardware events sample based on event counts - these are fundamentally different sampling bases and cannot be meaningfully combined. + * Supports built-in perf events (e.g., `cache-misses`, `branch-misses`, `instructions`) + * Supports hardware cache events (e.g., `L1-dcache-load-misses`, `LLC-load-misses`) + * Supports custom PMU events via `--hw-events-file` + * Uncore events (prefix `uncore_`) are not supported as they cannot be attributed to specific processes + +* `--perf-event-period`: Use period-based sampling instead of frequency-based sampling. This option specifies the number of events between each sample. + * Requires `--perf-event` to be specified + * Mutually exclusive with `-f/--profiling-frequency` + * Example: `--perf-event-period 10000` samples every 10,000 events + +* `--hw-events-file`: Path to a JSON file containing custom PMU event definitions. Use this for events not available in `perf list`. See `gprofiler/resources/hw_events_template.json` for the format. + +**Examples:** + +```bash +# Profile using cache-misses event with default frequency (11 Hz) +sudo ./gprofiler --perf-event cache-misses -d 60 -o /tmp + +# Profile using cache-misses event with custom frequency (99 Hz) +sudo ./gprofiler --perf-event cache-misses -f 99 -d 60 -o /tmp + +# Profile using cache-misses event with period-based sampling (every 10,000 cache misses) +sudo ./gprofiler --perf-event cache-misses --perf-event-period 10000 -d 60 -o /tmp + +# Profile using a custom event defined in a JSON file +sudo ./gprofiler --perf-event my-custom-event --hw-events-file /path/to/hw_events.json -d 60 -o /tmp +``` + +**Note:** On bare metal systems, hardware events use PEBS (Precise Event-Based Sampling) with `:ppp` modifier for `cycles`/`instructions`, `:pp` for other hardware events. On VMs, a less precise `:p` modifier is used as PEBS support may be limited. Hypervisor detection is logged at startup. + +**Custom Events Template:** A template file for custom PMU events is available at `gprofiler/resources/hw_events_template.json`. + ## Rootless mode gProfiler can be run in rootless mode, profiling without root or sudo access with limited functionality by using the `--rootless` argument. diff --git a/gprofiler/main.py b/gprofiler/main.py index 0f4ac645d..9887e2e47 100644 --- a/gprofiler/main.py +++ b/gprofiler/main.py @@ -61,6 +61,7 @@ from gprofiler.platform import is_aarch64, is_linux, is_windows from gprofiler.profiler_state import ProfilerState from gprofiler.profilers.factory import get_profilers +from gprofiler.profilers.perf import SystemProfiler from gprofiler.profilers.profiler_base import NoopProfiler, ProcessProfilerBase, ProfilerInterface from gprofiler.profilers.registry import get_profilers_registry from gprofiler.state import State, init_state @@ -394,6 +395,51 @@ def _snapshot(self) -> None: else {"hostname": get_hostname()} ) metadata.update({"profiling_mode": self._profiler_state.profiling_mode}) + + # Add sampling event information if custom event is being used + if isinstance(self.system_profiler, SystemProfiler) and self.system_profiler._custom_event_name: + from gprofiler.platform import get_hypervisor_vendor + from gprofiler.utils.hw_events import get_event_type, get_perf_available_events, get_precise_modifier + + event_name = self.system_profiler._custom_event_name + hypervisor_vendor = get_hypervisor_vendor() + perf_events = get_perf_available_events() + event_type = get_event_type(event_name, perf_events) + + # Use "custom" as fallback if event_type is None or empty + effective_type = event_type if event_type else "custom" + modifier = get_precise_modifier(event_name, effective_type, hypervisor_vendor) + + metadata.update( + { + "sampling_event": event_name, + "sampling_mode": "period" if self.system_profiler._perf_period else "frequency", + "precise_modifier": modifier, + } + ) + + if self.system_profiler._perf_period: + metadata.update({"sampling_period": self.system_profiler._perf_period}) + else: + metadata.update({"sampling_frequency": self.system_profiler._frequency}) + elif isinstance(self.system_profiler, SystemProfiler): + # Default CPU time-based profiling + metadata.update( + { + "sampling_event": "cpu-time", + "sampling_mode": "frequency", + "sampling_frequency": self.system_profiler._frequency, + } + ) + else: + # NoopProfiler - use default values + metadata.update( + { + "sampling_event": "cpu-time", + "sampling_mode": "frequency", + "sampling_frequency": 11, + } + ) metrics = self._system_metrics_monitor.get_metrics() hwmetrics = self._hw_metrics_monitor.get_hw_metrics() if hwmetrics is None: @@ -721,6 +767,32 @@ def parse_cmd_args() -> configargparse.Namespace: _add_profilers_arguments(parser) + # Custom perf event arguments + perf_event_options = parser.add_argument_group("Perf Event") + perf_event_options.add_argument( + "--perf-event", + type=str, + dest="perf_event", + help="Specify a perf event for flamegraph generation (e.g., cache-misses, page-faults, sched:sched_switch). " + "When specified, only perf profiler will be active and all language-specific profilers will be disabled. " + "Event can be from 'perf list' or a custom event defined in hw_events.json.", + ) + perf_event_options.add_argument( + "--perf-event-period", + type=int, + dest="perf_event_period", + help="Use period-based sampling instead of frequency (-c instead of -F). " + "Specify the number of events between samples (e.g., 10000 for sampling every 10000 events). " + "Only valid with --perf-event.", + ) + perf_event_options.add_argument( + "--hw-events-file", + type=str, + dest="hw_events_file", + help="Path to a JSON file containing custom PMU event definitions. " + "Only valid with --perf-event. If not specified, only built-in perf events are available.", + ) + spark_options = parser.add_argument_group("Spark") spark_options.add_argument( @@ -1023,6 +1095,14 @@ def parse_cmd_args() -> configargparse.Namespace: args.perf_inject = args.nodejs_mode == "perf" args.perf_node_attach = args.nodejs_mode == "attach-maps" + # Validate --perf-event-period and -f/--frequency are mutually exclusive + # Must check before defaults are applied (args.frequency is None if not explicitly provided) + if args.perf_event_period and args.frequency is not None: + parser.error( + "--perf-event-period and -f/--frequency are mutually exclusive. " + "Use --perf-event-period for period-based sampling or -f for frequency-based sampling." + ) + if args.profiling_mode == CPU_PROFILING_MODE: if args.alloc_interval: parser.error("--alloc-interval is only allowed in allocation profiling (--mode=allocation)") @@ -1075,6 +1155,40 @@ def parse_cmd_args() -> configargparse.Namespace: if not args.service_name: parser.error("--enable-heartbeat-server requires --service-name to be provided") + # Validate --perf-event-period only works with --perf-event + if args.perf_event_period and not args.perf_event: + parser.error("--perf-event-period requires --perf-event to be specified") + + # Validate --hw-events-file only works with --perf-event + if getattr(args, "hw_events_file", None) and not args.perf_event: + parser.error("--hw-events-file requires --perf-event to be specified") + + # Validate --perf-event only works with cpu profiling mode + if args.perf_event and args.profiling_mode != CPU_PROFILING_MODE: + parser.error("--perf-event is only supported in cpu profiling mode (--mode=cpu)") + + # Validate and resolve perf event arguments + if args.perf_event: + from gprofiler.platform import get_hypervisor_vendor + from gprofiler.utils.hw_events import validate_and_get_event_args, validate_event_with_fallback + + try: + # Detect hypervisor + hypervisor_vendor = get_hypervisor_vendor() + + # Validate and resolve event + hw_events_file = getattr(args, "hw_events_file", None) + event_args = validate_and_get_event_args(args.perf_event, hypervisor_vendor, hw_events_file) + + # Test accessibility with fallback + validated_args = validate_event_with_fallback(args.perf_event, event_args, hypervisor_vendor) + + # Store resolved event args in args + args.perf_event_args = validated_args + + except (ValueError, RuntimeError) as e: + parser.error(f"Perf event validation failed: {e}") + return args @@ -1159,6 +1273,7 @@ def log_system_info() -> None: logger.info(f"Total RAM: {system_info.memory_capacity_mb / 1024:.2f} GB") logger.info(f"Linux distribution: {system_info.os_name} | {system_info.os_release} | {system_info.os_codename}") logger.info(f"libc version: {system_info.libc_type}-{system_info.libc_version}") + logger.info(f"Hypervisor: {system_info.hypervisor}") logger.info(f"Hostname: {system_info.hostname}") diff --git a/gprofiler/merge.py b/gprofiler/merge.py index bbae2c19a..21073e0a1 100644 --- a/gprofiler/merge.py +++ b/gprofiler/merge.py @@ -80,6 +80,20 @@ def _make_profile_metadata( "htmlblob": hwmetrics.metrics_html if hwmetrics is not None else None, "flamegraph_html": flamegraph_html, } + + # Add sampling event information if present in metadata + if "sampling_event" in metadata: + profile_metadata["sampling_event"] = metadata["sampling_event"] + profile_metadata["sampling_mode"] = metadata.get("sampling_mode", "frequency") + + if metadata.get("sampling_mode") == "period": + profile_metadata["sampling_period"] = metadata.get("sampling_period") + else: + profile_metadata["sampling_frequency"] = metadata.get("sampling_frequency") + + if "precise_modifier" in metadata: + profile_metadata["precise_modifier"] = metadata["precise_modifier"] + return "# " + json.dumps(profile_metadata) diff --git a/gprofiler/metadata/system_metadata.py b/gprofiler/metadata/system_metadata.py index 73ed117c5..4f3a2399a 100644 --- a/gprofiler/metadata/system_metadata.py +++ b/gprofiler/metadata/system_metadata.py @@ -193,8 +193,10 @@ class SystemInfo: kernel_release: str kernel_version: str system_name: str + hypervisor: str processors: int cpu_model_name: str + cpu_arch_codename: str cpu_flags: str memory_capacity_mb: int hostname: str @@ -254,6 +256,13 @@ def get_static_system_info() -> SystemInfo: run_mode = get_run_mode() deployment_type = get_deployment_type(run_mode) cpu_model_name, cpu_flags = get_cpu_info() + + # Import here to avoid circular dependency + from gprofiler.platform import get_cpu_model, get_hypervisor_vendor + + hypervisor = get_hypervisor_vendor() + cpu_arch_codename = get_cpu_model() + return SystemInfo( python_version=sys.version, run_mode=run_mode, @@ -261,8 +270,10 @@ def get_static_system_info() -> SystemInfo: kernel_release=uname.release, kernel_version=uname.version, system_name=uname.system, + hypervisor=hypervisor, processors=cpu_count, cpu_model_name=cpu_model_name, + cpu_arch_codename=cpu_arch_codename, cpu_flags=cpu_flags, memory_capacity_mb=round(psutil.virtual_memory().total / 1024 / 1024), hostname=hostname, diff --git a/gprofiler/platform.py b/gprofiler/platform.py index d5b803fd5..cb70c306e 100644 --- a/gprofiler/platform.py +++ b/gprofiler/platform.py @@ -34,3 +34,108 @@ def is_linux() -> bool: @lru_cache(maxsize=None) def is_aarch64() -> bool: return platform.machine() == "aarch64" + + +@lru_cache(maxsize=None) +def get_cpu_model() -> str: + """ + Detect Intel CPU model for custom PMU event support. + Returns platform code: ICX, SPR, EMR, GNR, or UNKNOWN. + """ + if not is_linux(): + return "UNKNOWN" + + try: + with open("/proc/cpuinfo", "r") as f: + cpu_family = None + model = None + + for line in f: + if line.startswith("cpu family"): + cpu_family = int(line.split(":")[1].strip()) + elif line.startswith("model") and not line.startswith("model name"): + model = int(line.split(":")[1].strip()) + + # Once we have both, we can determine the platform + if cpu_family is not None and model is not None: + break + + # All supported platforms are Intel Family 6 + if cpu_family != 6 or model is None: + return "UNKNOWN" + + # Map model numbers to platform codes + model_to_platform = { + 106: "ICX", # Ice Lake Server + 143: "SPR", # Sapphire Rapids + 207: "EMR", # Emerald Rapids + 173: "GNR", # Granite Rapids + } + + return model_to_platform.get(model, "UNKNOWN") + + except Exception: + return "UNKNOWN" + + +@lru_cache(maxsize=None) +def get_hypervisor_vendor() -> str: + """ + Detect hypervisor vendor using CPUID. + Returns hypervisor vendor string (e.g., "KVMKVMKVM", "VMwareVMware") or "NONE" for bare metal. + """ + if not is_linux(): + # Hardware event profiling with custom PMU events uses Linux perf subsystem and is Linux-only. + # No plans to support non-Linux platforms as they use different performance monitoring mechanisms. + # TODO: Update to return "UNKNOWN" or implement detection when Windows support is enabled. + return "NONE" + + try: + # Try to use cpuid if available + # CPUID leaf 0x1, ECX bit 31 indicates hypervisor presence + # If present, CPUID leaf 0x40000000 returns vendor string in EBX, ECX, EDX + + # We need to read from /dev/cpu/*/cpuid or use inline assembly + # For simplicity, we'll check if the hypervisor bit is set via /proc/cpuinfo flags + # and then try to read the vendor string + + with open("/proc/cpuinfo", "r") as f: + for line in f: + if line.startswith("flags") or line.startswith("Features"): + flags = line.split(":")[1].strip() + if "hypervisor" in flags: + # Hypervisor detected, try to get vendor + return _read_hypervisor_vendor() + else: + return "NONE" + + return "NONE" + + except Exception: + return "NONE" + + +def _read_hypervisor_vendor() -> str: + """ + Read hypervisor vendor string from CPUID leaf 0x40000000. + The vendor string is 12 characters: EBX (4 bytes) + ECX (4 bytes) + EDX (4 bytes). + """ + try: + import struct + + import cpuid + + # Execute CPUID leaf 0x40000000 for hypervisor vendor + eax, ebx, ecx, edx = cpuid.cpuid(0x40000000, 0) + + # Vendor string is in EBX, ECX, EDX (12 characters total) + vendor_bytes = struct.pack(" Dict[str, str]: + """ + Run 'perf list' and parse available events with their types. + Returns dict mapping event names to types (hardware, software, tracepoint, cache). + """ + try: + result = run_process([perf_path(), "list"], suppress_log=True) + raw_output = result.stdout + + # Decode bytes to string if necessary + if isinstance(raw_output, bytes): + output = raw_output.decode("utf-8", errors="replace") + else: + output = raw_output + + events: Dict[str, str] = {} + current_type = DEFAULT_TYPE + + for line in output.splitlines(): + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith("#"): + continue + + # Detect section headers (lines that are ONLY section markers) + if line.startswith("List of") or line.endswith(":"): + # Section headers like "cpu:", "List of pre-defined events" + continue + + # Parse event lines (format: "event_name [description]" or "event_name OR alias") + # Extract event name (everything before the bracket or first whitespace block) + match = re.match(r"^\s*([a-zA-Z0-9_\-:./]+(?:\s+OR\s+[a-zA-Z0-9_\-:./]+)?)\s*(?:\[(.+?)\])?", line) + if match: + event_part = match.group(1) + event_tag = match.group(2) + + # Extract the primary event name (before "OR") + event_name = event_part.split()[0] if event_part else None + + if event_name: + # Determine event type from tag if present + if event_tag: + # Treat Hardware event, Hardware cache event, and Kernel PMU event as hardware + if "Hardware" in event_tag or "Kernel PMU" in event_tag: + events[event_name] = "hardware" + elif "Software" in event_tag: + events[event_name] = "software" + elif "Tool event" in event_tag: + events[event_name] = "software" + elif "Tracepoint" in event_tag: + events[event_name] = "tracepoint" + else: + events[event_name] = current_type + else: + # No tag, use current section type + events[event_name] = current_type + + return events + + except (CalledProcessError, Exception): + # Cannot use logger here as it may be called before state initialization + return {} + + +def load_custom_events(hw_events_file: Optional[str] = None) -> Dict: + """ + Load custom PMU event definitions from a JSON file. + Returns dict with event definitions per platform. + + Args: + hw_events_file: Path to the JSON file. If None, returns empty dict. + """ + if hw_events_file is None: + return {} + + try: + json_path = Path(hw_events_file) + if not json_path.exists(): + raise ValueError(f"Hardware events file not found: {hw_events_file}") + + with open(json_path, "r") as f: + events = json.load(f) + + if not isinstance(events, dict): + raise ValueError(f"Hardware events file must contain a JSON object, got {type(events).__name__}") + + # Filter out metadata fields (starting with _) + custom_events = {k: v for k, v in events.items() if not k.startswith("_")} + return custom_events + + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON in hardware events file: {e}") + except Exception as e: + raise ValueError(f"Failed to load hardware events file: {e}") + + +def get_event_type(event_name: str, perf_events: Dict[str, str], hw_events_file: Optional[str] = None) -> Optional[str]: + """ + Get the type of an event (hardware, software, tracepoint, cache, custom). + Returns event type or None if not found. + """ + if event_name in perf_events: + return perf_events[event_name] + + # Check if it's a custom event (only if hw_events_file is provided) + if hw_events_file: + custom_events = load_custom_events(hw_events_file) + if event_name in custom_events: + return "custom" + + return None + + +def get_precise_modifier(event_name: str, event_type: str, hypervisor_vendor: str) -> str: + """ + Determine the precise event modifier based on event type and hypervisor status. + + Bare metal (hypervisor="NONE"): + - cycles, instructions → :ppp + - ocr.* → :p + - other HW → :pp + - SW/tracepoint → no modifier + + VM (hypervisor set): + - all HW → :p + - SW/tracepoint → no modifier + """ + is_vm = hypervisor_vendor != "NONE" + + # Software events and tracepoints don't use PEBS modifiers + if event_type in ("software", "tracepoint"): + return "" + + # VM: all hardware events get :p + if is_vm: + return ":p" + + # Bare metal: different modifiers based on event + if event_type in ("hardware", "cache", "custom"): + # Special cases + if event_name in ("cycles", "instructions"): + return ":ppp" + elif event_name.startswith("ocr.") or event_name.startswith("OCR."): + return ":p" + else: + return ":pp" + + # Unknown type, no modifier + return "" + + +def validate_and_get_event_args( + event_name: str, hypervisor_vendor: str, hw_events_file: Optional[str] = None +) -> List[str]: + """ + Validate event and return perf arguments for it. + + Resolution order: + 1. Check perf list for built-in events + 2. Check custom events file (if specified) + 3. Raise error if not found + + Returns list like ["-e", "event_name:modifier"] + """ + # Check if it's an uncore event (not supported for flamegraphs) + if event_name.startswith("uncore_") or "/uncore_" in event_name: + raise ValueError( + f"Uncore event '{event_name}' is not supported for flamegraph generation. " + f"Uncore events measure system-wide hardware activity and cannot be attributed to specific " + f"processes/threads." + ) + + # First check perf list + perf_events = get_perf_available_events() + event_type = get_event_type(event_name, perf_events, hw_events_file) + + if event_type and event_type != "custom": + # Found in perf list + modifier = get_precise_modifier(event_name, event_type, hypervisor_vendor) + event_with_modifier = f"{event_name}{modifier}" + return ["-e", event_with_modifier] + + # Not in perf list, check custom events + custom_events = load_custom_events(hw_events_file) + if event_name not in custom_events: + # Event not found anywhere + available_builtin = list(perf_events.keys())[:10] # Show first 10 + available_custom = list(custom_events.keys()) + + error_msg = f"Event '{event_name}' not found in perf built-in events" + if hw_events_file: + error_msg += f" or custom events file ({hw_events_file})" + error_msg += ".\n" + error_msg += f" Available built-in events (first 10): {available_builtin}\n" + if available_custom: + error_msg += f" Available custom events: {available_custom}\n" + else: + error_msg += " No custom events file provided. Use --hw-events-file to specify one.\n" + error_msg += f" Run '{perf_path()} list' to see all built-in events." + + raise ValueError(error_msg) + + # Found in custom events, get platform-specific config + platform = get_cpu_model() + event_config = custom_events[event_name] + + if platform not in event_config: + supported_platforms = [k for k in event_config.keys() if not k.startswith("_")] + error_msg = ( + f"Custom event '{event_name}' not supported on platform '{platform}'.\n" + f" Supported platforms: {supported_platforms}" + ) + raise ValueError(error_msg) + + # Get raw event code for this platform + platform_config = event_config[platform] + raw_event = platform_config.get("raw") + + if not raw_event: + error_msg = f"Custom event '{event_name}' missing 'raw' field for platform '{platform}'" + raise ValueError(error_msg) + + # Apply modifier for custom events (treated as hardware events) + modifier = get_precise_modifier(event_name, "custom", hypervisor_vendor) + event_with_modifier = f"{raw_event}{modifier}" + + return ["-e", event_with_modifier] + + +def test_perf_event_accessible(event_args: List[str]) -> bool: + """ + Test if a perf event is accessible by running a quick perf record test. + Returns True if accessible, False otherwise. + """ + try: + run_process( + [perf_path(), "record", "-o", "/dev/null"] + event_args + ["--", "sleep", "0.1"], + suppress_log=True, + ) + return True + except (CalledProcessError, Exception): + return False + + +def validate_event_with_fallback(event_name: str, event_args: List[str], hypervisor_vendor: str) -> List[str]: + """ + Validate event accessibility with fallback for VMs. + + For VMs: if event with :p modifier fails, retry without modifier. + For bare metal: no fallback, event must work as-is. + + Returns validated event args or raises error. + """ + is_vm = hypervisor_vendor != "NONE" + + # Test the event + if test_perf_event_accessible(event_args): + return event_args + + # Failed - try fallback for VMs + if is_vm and event_args[1].endswith(":p"): + # Remove modifier + event_without_modifier = event_args[1].rstrip(":p") + fallback_args = ["-e", event_without_modifier] + + if test_perf_event_accessible(fallback_args): + return fallback_args + + # No fallback worked + error_msg = f"Cannot access perf event '{event_name}'. Check permissions and PMU availability." + raise RuntimeError(error_msg) diff --git a/gprofiler/utils/perf.py b/gprofiler/utils/perf.py index c32e1cf76..c80c86027 100644 --- a/gprofiler/utils/perf.py +++ b/gprofiler/utils/perf.py @@ -183,6 +183,7 @@ def parse_perf_script_from_iterator( pid_to_collapsed_stacks_counters: ProcessToStackSampleCounters = defaultdict(Counter) current_sample_lines: List[str] = [] + sample_count = 0 for line in perf_iterator: # Empty line indicates end of sample block @@ -191,6 +192,7 @@ def parse_perf_script_from_iterator( # Process the accumulated sample sample = "\n".join(current_sample_lines) _process_single_sample(sample, pid_to_collapsed_stacks_counters, insert_dso_name) + sample_count += 1 current_sample_lines = [] else: # Accumulate lines for current sample @@ -200,6 +202,9 @@ def parse_perf_script_from_iterator( if current_sample_lines: sample = "\n".join(current_sample_lines) _process_single_sample(sample, pid_to_collapsed_stacks_counters, insert_dso_name) + sample_count += 1 + + logger.debug(f"Parsed perf script output: {sample_count} samples") return pid_to_collapsed_stacks_counters diff --git a/gprofiler/utils/perf_process.py b/gprofiler/utils/perf_process.py index 29243a95b..282b3dbe7 100644 --- a/gprofiler/utils/perf_process.py +++ b/gprofiler/utils/perf_process.py @@ -51,6 +51,9 @@ def __init__( use_cgroups: bool = False, max_cgroups: int = 50, max_docker_containers: int = 0, + custom_event_name: Optional[str] = None, + use_period: bool = False, + period_value: Optional[int] = None, ): self._start_time = 0.0 self._frequency = frequency @@ -132,12 +135,21 @@ def __init__( self._extra_args = extra_args self._switch_timeout_s = switch_timeout_s self._process: Optional[Popen] = None + self._custom_event_name = custom_event_name + self._use_period = use_period + self._period_value = period_value @property def _log_name(self) -> str: return f"perf ({self._type} mode)" def _get_perf_cmd(self) -> List[str]: + # Use period-based sampling if specified, otherwise frequency-based + if self._use_period and self._period_value is not None: + sampling_args = ["-c", str(self._period_value)] + else: + sampling_args = ["-F", str(self._frequency)] + # When using cgroups, perf requires events to be specified before cgroups. # If no explicit events are provided but cgroups are used, add default event. # For multiple cgroups, perf requires one event per cgroup. @@ -180,8 +192,9 @@ def _get_perf_cmd(self) -> List[str]: [ perf_path(), "record", - "-F", - str(self._frequency), + ] + + sampling_args + + [ "-g", "-o", self._output_path, diff --git a/mypy.ini b/mypy.ini index 8eaad0dc4..abb6d6b3a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -30,3 +30,5 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-humanfriendly.*] ignore_missing_imports = True +[mypy-cpuid.*] +ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt index 01f0d2033..c05c441dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ WMI==1.5.1; sys.platform == "win32" humanfriendly==10.0 beautifulsoup4==4.13.3 backports.tarfile==1.2.0 +cpuid==0.0.11 ; platform_machine == "x86_64" # For CPUID instruction access to detect hypervisor and CPU model (x86 only) diff --git a/scripts/check_pyinstaller.sh b/scripts/check_pyinstaller.sh index 5d44e4bda..348b0dc3e 100755 --- a/scripts/check_pyinstaller.sh +++ b/scripts/check_pyinstaller.sh @@ -17,7 +17,7 @@ set -uo pipefail # grep returns 0 if a match is found and 1 if no match is found -result=$(grep "gprofiler\." "build/pyinstaller/warn-pyinstaller.txt" | grep -v 'missing module named wmi' | grep -v 'missing module named pythoncom' | grep -v 'missing module named netifaces') +result=$(grep "gprofiler\." "build/pyinstaller/warn-pyinstaller.txt" | grep -v 'missing module named wmi' | grep -v 'missing module named pythoncom' | grep -v 'missing module named netifaces' | grep -v 'missing module named cpuid') if [ -n "$result" ]; then echo "$result"