diff --git a/arcflow/main.py b/arcflow/main.py index 3d503c0..b3d6903 100644 --- a/arcflow/main.py +++ b/arcflow/main.py @@ -745,6 +745,73 @@ def _get_nontarget_agent_criteria(self, modified_since=0): return criteria + def _validate_solr_query_part(self, query_part): + """ + Validate a Solr query part to prevent injection attacks. + + This validation protects against both shell command injection and + Solr-specific query injection attacks. While the current code only uses + internally-controlled strings, this defense-in-depth approach prevents + future security issues if user-controlled data is added. + + Args: + query_part (str): A single query part to validate + + Returns: + bool: True if the query part appears safe, False otherwise + """ + if not isinstance(query_part, str): + return False + + # Reject empty or whitespace-only strings + if not query_part.strip(): + return False + + # Check for suspicious patterns that could indicate injection attempts + # This includes both shell injection and Solr-specific patterns + suspicious_patterns = [ + # Shell/command injection patterns + r';\s*\w', # Semicolon followed by word character + r'\$\{', # Variable interpolation attempts (e.g., Log4Shell) + r'`', # Command substitution + r'\|\|', # Shell command chaining (|| operator) + r'&&\s*[a-zA-Z]', # Shell command chaining (but allow Solr AND operator) + + # Solr-specific injection patterns + r'\{!', # LocalParams injection attempts (e.g., {!type=xmlparser}) + r'