From 0725bd0c1546fcebad220eb598eeecaf91d5eeaa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 22:09:26 +0000 Subject: [PATCH 1/4] Improve performance: cache condition parsing, type hints, and optimize string operations Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/96b970a7-15bc-4b53-bbeb-51ca1c790721 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> --- sigma/conditions.py | 20 ++++++++++- sigma/modifiers.py | 15 ++++++++- sigma/types.py | 82 +++++++++++++++++++++++++++------------------ 3 files changed, 83 insertions(+), 34 deletions(-) diff --git a/sigma/conditions.py b/sigma/conditions.py index 63bb44e6..7e17b5ba 100644 --- a/sigma/conditions.py +++ b/sigma/conditions.py @@ -1,5 +1,7 @@ from dataclasses import dataclass, field from abc import ABC +import copy +from functools import lru_cache import re from sigma.processing.tracking import ProcessingItemTrackingMixin from pyparsing import ( @@ -10,11 +12,15 @@ opAssoc, ParseResults, ParseException, + ParserElement, ) from typing import ClassVar, Optional, Union, Type, cast, TYPE_CHECKING from sigma.types import SigmaType from sigma.exceptions import SigmaConditionError, SigmaRuleLocation +# Enable packrat parsing for faster parsing of complex condition expressions +ParserElement.enable_packrat(cache_size_limit=128) + if TYPE_CHECKING: from sigma.rule.detection import SigmaDetection, SigmaDetectionItem, SigmaDetections @@ -334,6 +340,17 @@ class ConditionValueExpression(ParentChainMixin): ) +@lru_cache(maxsize=256) +def _parse_condition_string( + condition_str: str, +) -> ConditionItem: + """Parse a condition string using pyparsing, with caching for repeated strings. + + Results are deep-copied on retrieval since postprocessing mutates the parse tree. + """ + return cast(ConditionItem, condition.parse_string(condition_str, parse_all=True)[0]) + + @dataclass class SigmaCondition(ProcessingItemTrackingMixin): condition: str @@ -356,7 +373,8 @@ def parse( "The pipe syntax in Sigma conditions has been deprecated and replaced by Sigma correlations. pySigma doesn't supports this syntax." ) try: - parsed = cast(ConditionItem, condition.parse_string(self.condition, parse_all=True)[0]) + # Use cached parse result, deep-copied since postprocessing mutates the tree + parsed = copy.deepcopy(_parse_condition_string(self.condition)) if postprocess: return parsed.postprocess(self.detections, source=self.source) else: diff --git a/sigma/modifiers.py b/sigma/modifiers.py index ee401bef..e8193bc6 100644 --- a/sigma/modifiers.py +++ b/sigma/modifiers.py @@ -64,9 +64,22 @@ def __init__( self.applied_modifiers = applied_modifiers self.source = source + # Cache for type hints resolved from modify() method, keyed by class + _type_hint_cache: ClassVar[dict[type, Any]] = {} + + def _get_modify_type_hint(self) -> Any: + """Get the type hint for the 'val' parameter of the modify method, with caching per class.""" + cls = type(self) + try: + return SigmaModifier._type_hint_cache[cls] + except KeyError: + th = get_type_hints(self.modify)["val"] + SigmaModifier._type_hint_cache[cls] = th + return th + def type_check(self, val: Any, explicit_type: Optional[Type[Any]] = None) -> bool: th = ( - explicit_type or get_type_hints(self.modify)["val"] + explicit_type or self._get_modify_type_hint() ) # get type annotation from val parameter of apply method or explicit_type parameter if th is Any: return True diff --git a/sigma/types.py b/sigma/types.py index afe99709..45d9f274 100644 --- a/sigma/types.py +++ b/sigma/types.py @@ -335,20 +335,14 @@ def replace_with_placeholder(self, regex: Pattern[str], placeholder_name: str) - def _merge_strs(self) -> "SigmaString": """Merge consecutive plain strings in self.s.""" - src = list(reversed(self.s)) - res: list[SigmaStringPartType] = [] - while src: - item = src.pop() - try: - if isinstance(res[-1], str) and isinstance( - item, str - ): # append current item to last result element if both are strings - res[-1] += item - else: - res.append(item) - except IndexError: # first element + if not self.s: + return self + res: list[SigmaStringPartType] = [self.s[0]] + for item in self.s[1:]: + if isinstance(res[-1], str) and isinstance(item, str): + res[-1] += item + else: res.append(item) - self.s = res return self @@ -450,7 +444,7 @@ def endswith(self, val: SigmaStringPartType) -> bool: def contains_special(self) -> bool: """Check if string contains special characters.""" - return any([isinstance(item, SpecialChars) for item in self.s]) + return any(isinstance(item, SpecialChars) for item in self.s) def contains_placeholder( self, include: Optional[list[str]] = None, exclude: Optional[list[str]] = None @@ -572,40 +566,56 @@ def convert( Setting one of the wildcard or multiple parameters to None indicates that this feature is not supported. Appearance of these characters in a string will raise a SigmaValueError. """ - s = "" + result = [] escaped_chars = frozenset((wildcard_multi or "") + (wildcard_single or "") + add_escaped) - - for c in iter(self): - if isinstance(c, str): # c is plain character - if c in filter_chars: # Skip filtered characters - continue - if c in escaped_chars: - s += escape_char - s += c - elif isinstance(c, SpecialChars): # special handling for special characters - if c == SpecialChars.WILDCARD_MULTI: + filter_set = frozenset(filter_chars) if filter_chars else frozenset() + + for part in self.s: + if isinstance(part, str): # part is a plain string segment + if not filter_set and not escaped_chars: + # Fast path: no escaping or filtering needed + result.append(part) + elif not filter_set and escaped_chars: + # Only escaping needed, process character-by-character only if necessary + if any(c in escaped_chars for c in part): + for c in part: + if c in escaped_chars: + result.append(escape_char) + result.append(c) + else: + result.append(part) + else: + # Both filtering and escaping + for c in part: + if c in filter_set: + continue + if c in escaped_chars: + result.append(escape_char) + result.append(c) + elif isinstance(part, SpecialChars): # special handling for special characters + if part == SpecialChars.WILDCARD_MULTI: if wildcard_multi is not None: - s += wildcard_multi + result.append(wildcard_multi) else: raise SigmaValueError( "Multi-character wildcard not specified for conversion" ) - elif c == SpecialChars.WILDCARD_SINGLE: + elif part == SpecialChars.WILDCARD_SINGLE: if wildcard_single is not None: - s += wildcard_single + result.append(wildcard_single) else: raise SigmaValueError( "Single-character wildcard not specified for conversion" ) - elif isinstance(c, Placeholder): + elif isinstance(part, Placeholder): raise SigmaPlaceholderError( - f"Attempt to convert unhandled placeholder '{c.name}' into query." + f"Attempt to convert unhandled placeholder '{part.name}' into query." ) else: raise SigmaValueError( - f"Trying to convert SigmaString containing part of type '{type(c).__name__}'" + f"Trying to convert SigmaString containing part of type '{type(part).__name__}'" ) - return s + return "".join(result) def to_regex(self, custom_escaped: str = "") -> "SigmaRegularExpression": """Convert SigmaString into a regular expression.""" @@ -1011,6 +1021,14 @@ class SigmaExpansion(NoPlainConversionMixin, SigmaType): def sigma_type(v: Optional[Union[int, float, str, bool]]) -> SigmaType: """Return Sigma type from Python value""" + # Check bool before int since bool is a subclass of int in Python + t = type(v) + if t is bool: + return SigmaBool(v) + st = type_map.get(t) + if st is not None: + return st(v) + # Fallback to isinstance checks for subclasses for t, st in type_map.items(): if isinstance(v, t): return st(v) From 8f67463edf924f909eeee9fae6a524258d9b0fb2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 5 Apr 2026 22:11:07 +0000 Subject: [PATCH 2/4] Address review feedback: fix variable shadowing and frozenset creation Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/96b970a7-15bc-4b53-bbeb-51ca1c790721 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> --- sigma/types.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sigma/types.py b/sigma/types.py index 45d9f274..2b6defd9 100644 --- a/sigma/types.py +++ b/sigma/types.py @@ -568,7 +568,7 @@ def convert( """ result = [] escaped_chars = frozenset((wildcard_multi or "") + (wildcard_single or "") + add_escaped) - filter_set = frozenset(filter_chars) if filter_chars else frozenset() + filter_set = frozenset(filter_chars) for part in self.s: if isinstance(part, str): # part is a plain string segment @@ -1022,12 +1022,12 @@ class SigmaExpansion(NoPlainConversionMixin, SigmaType): def sigma_type(v: Optional[Union[int, float, str, bool]]) -> SigmaType: """Return Sigma type from Python value""" # Check bool before int since bool is a subclass of int in Python - t = type(v) - if t is bool: + vtype = type(v) + if vtype is bool: return SigmaBool(v) - st = type_map.get(t) - if st is not None: - return st(v) + matched = type_map.get(vtype) + if matched is not None: + return matched(v) # Fallback to isinstance checks for subclasses for t, st in type_map.items(): if isinstance(v, t): From a633790f82f6dd4834f16e3e47587ed74afe8a6f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 23:12:18 +0000 Subject: [PATCH 3/4] Fix mypy type checking errors in sigma/types.py Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/29062a21-3d71-4f22-b83a-15c994c5ead9 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> --- sigma/types.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sigma/types.py b/sigma/types.py index 096fdba0..78171119 100644 --- a/sigma/types.py +++ b/sigma/types.py @@ -582,7 +582,7 @@ def convert( # Only escaping needed, process character-by-character only if necessary if any(c in escaped_chars for c in part): for c in part: - if c in escaped_chars: + if c in escaped_chars and escape_char is not None: result.append(escape_char) result.append(c) else: @@ -592,7 +592,7 @@ def convert( for c in part: if c in filter_set: continue - if c in escaped_chars: + if c in escaped_chars and escape_char is not None: result.append(escape_char) result.append(c) elif isinstance(part, SpecialChars): # special handling for special characters @@ -1025,10 +1025,9 @@ class SigmaExpansion(NoPlainConversionMixin, SigmaType): def sigma_type(v: (int | float | str | bool) | None) -> SigmaType: """Return Sigma type from Python value""" # Check bool before int since bool is a subclass of int in Python - vtype = type(v) - if vtype is bool: + if isinstance(v, bool): return SigmaBool(v) - matched = type_map.get(vtype) + matched = type_map.get(type(v)) if matched is not None: return matched(v) # Fallback to isinstance checks for subclasses From 5c8a3d623884978aedd45b30dabeac24bb0b0058 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 6 Apr 2026 23:13:24 +0000 Subject: [PATCH 4/4] Fix docstring for _parse_condition_string to clarify deep-copy responsibility Agent-Logs-Url: https://github.com/SigmaHQ/pySigma/sessions/29062a21-3d71-4f22-b83a-15c994c5ead9 Co-authored-by: thomaspatzke <1845601+thomaspatzke@users.noreply.github.com> --- sigma/conditions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sigma/conditions.py b/sigma/conditions.py index 88b17106..7a1127ff 100644 --- a/sigma/conditions.py +++ b/sigma/conditions.py @@ -294,7 +294,7 @@ def _parse_condition_string( ) -> ConditionItem: """Parse a condition string using pyparsing, with caching for repeated strings. - Results are deep-copied on retrieval since postprocessing mutates the parse tree. + Callers must deep-copy the returned result since postprocessing mutates the parse tree. """ return cast(ConditionItem, condition.parse_string(condition_str, parse_all=True)[0])