From 5810fa9e6b936fc31435dc2208e1d2e6bf3f0589 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 21:35:35 +0000 Subject: [PATCH 1/3] Initial plan From 411b5b5979b3ddccd275028e7de9c8cbaadbfdfd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 21:38:32 +0000 Subject: [PATCH 2/3] Add input validation for Solr queries to prevent injection attacks Co-authored-by: alexdryden <47127862+alexdryden@users.noreply.github.com> --- arcflow/main.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/arcflow/main.py b/arcflow/main.py index 3d503c0..5686d00 100644 --- a/arcflow/main.py +++ b/arcflow/main.py @@ -745,6 +745,61 @@ def _get_nontarget_agent_criteria(self, modified_since=0): return criteria + def _validate_solr_query_part(self, query_part): + """ + Validate a Solr query part to prevent injection attacks. + + Args: + query_part (str): A single query part to validate + + Returns: + bool: True if the query part appears safe, False otherwise + """ + if not isinstance(query_part, str): + return False + + # Reject empty or whitespace-only strings + if not query_part.strip(): + return False + + # Check for suspicious patterns that could indicate injection attempts + # Allow standard Solr query syntax but reject obvious malicious patterns + suspicious_patterns = [ + r';\s*\w', # Semicolon followed by word character (command injection) + r'\$\{', # Variable interpolation attempts + r'`', # Command substitution + r'\|\|', # Shell command chaining (|| operator) + r'&&\s*[a-zA-Z]', # Shell command chaining (but allow Solr AND operator) + ] + + for pattern in suspicious_patterns: + if re.search(pattern, query_part): + self.log.error(f"Rejected suspicious query part: {query_part}") + return False + + return True + + def _validate_solr_field_name(self, field_name): + """ + Validate a Solr field name to prevent injection attacks. + + Args: + field_name (str): A field name to validate + + Returns: + bool: True if the field name appears safe, False otherwise + """ + if not isinstance(field_name, str): + return False + + # Field names should only contain alphanumeric characters, underscores, and hyphens + # This is stricter than Solr's actual requirements but provides better security + if not re.match(r'^[a-zA-Z0-9_-]+$', field_name): + self.log.error(f"Rejected invalid field name: {field_name}") + return False + + return True + def _execute_solr_query(self, query_parts, solr_url=None, fields=['id'], indent_size=0): """ A generic function to execute a query against the Solr index. @@ -765,6 +820,18 @@ def _execute_solr_query(self, query_parts, solr_url=None, fields=['id'], indent_ if not solr_url: solr_url = self.solr_url + # Validate all query parts before constructing the query + for part in query_parts: + if not self._validate_solr_query_part(part): + self.log.error(f"Query validation failed for: {part}") + return [] + + # Validate all field names + for field in fields: + if not self._validate_solr_field_name(field): + self.log.error(f"Field name validation failed for: {field}") + return [] + query_string = " AND ".join(query_parts) self.log.info(f"{indent}Executing Solr query: {query_string}") From 682cb1809a9552e8660e5ac1d488a440b2bb21ca Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 26 Feb 2026 21:40:56 +0000 Subject: [PATCH 3/3] Enhance validation to include Solr-specific injection patterns Co-authored-by: alexdryden <47127862+alexdryden@users.noreply.github.com> --- arcflow/main.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/arcflow/main.py b/arcflow/main.py index 5686d00..b3d6903 100644 --- a/arcflow/main.py +++ b/arcflow/main.py @@ -749,6 +749,11 @@ def _validate_solr_query_part(self, query_part): """ Validate a Solr query part to prevent injection attacks. + This validation protects against both shell command injection and + Solr-specific query injection attacks. While the current code only uses + internally-controlled strings, this defense-in-depth approach prevents + future security issues if user-controlled data is added. + Args: query_part (str): A single query part to validate @@ -763,17 +768,24 @@ def _validate_solr_query_part(self, query_part): return False # Check for suspicious patterns that could indicate injection attempts - # Allow standard Solr query syntax but reject obvious malicious patterns + # This includes both shell injection and Solr-specific patterns suspicious_patterns = [ - r';\s*\w', # Semicolon followed by word character (command injection) - r'\$\{', # Variable interpolation attempts - r'`', # Command substitution - r'\|\|', # Shell command chaining (|| operator) - r'&&\s*[a-zA-Z]', # Shell command chaining (but allow Solr AND operator) + # Shell/command injection patterns + r';\s*\w', # Semicolon followed by word character + r'\$\{', # Variable interpolation attempts (e.g., Log4Shell) + r'`', # Command substitution + r'\|\|', # Shell command chaining (|| operator) + r'&&\s*[a-zA-Z]', # Shell command chaining (but allow Solr AND operator) + + # Solr-specific injection patterns + r'\{!', # LocalParams injection attempts (e.g., {!type=xmlparser}) + r'