Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 19 additions & 23 deletions src/agents/acknowledgment_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
Intelligent Acknowledgment Agent for evaluating violation acknowledgment requests.
"""

import logging
from typing import Any

import structlog
from langchain_core.messages import HumanMessage, SystemMessage
from langgraph.graph import StateGraph

from src.agents.acknowledgment_agent.prompts import create_evaluation_prompt, get_system_prompt
from src.agents.base import AgentResult, BaseAgent

logger = logging.getLogger(__name__)
logger = structlog.get_logger()


class AcknowledgmentAgent(BaseAgent):
Expand All @@ -29,7 +29,7 @@ def __init__(self, max_retries: int = 3, timeout: float = 30.0):
# Call super class __init__ first
super().__init__(max_retries=max_retries)
self.timeout = timeout
logger.info(f"🧠 Acknowledgment agent initialized with timeout: {timeout}s")
logger.info("acknowledgment_agent_initialized_with_timeout_s", timeout=timeout)

def _build_graph(self) -> StateGraph:
"""
Expand Down Expand Up @@ -62,7 +62,7 @@ async def _evaluate_node(self, state):
)
return result
except Exception as e:
logger.error(f"🧠 Error in evaluation node: {e}")
logger.error("error_in_evaluation_node", e=e)
return AgentResult(success=False, message=f"Evaluation failed: {str(e)}", data={"error": str(e)})

@staticmethod
Expand All @@ -85,8 +85,8 @@ async def evaluate_acknowledgment(
Intelligently evaluate an acknowledgment request based on rule descriptions and context.
"""
try:
logger.info(f"🧠 Evaluating acknowledgment request from {commenter}")
logger.info(f"🧠 Reason: {acknowledgment_reason}")
logger.info("evaluating_acknowledgment_request_from", commenter=commenter)
logger.info("reason", acknowledgment_reason=acknowledgment_reason)
logger.info(f"🧠 Violations to evaluate: {len(violations)}")
Comment on lines +88 to 90
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Remove raw acknowledgment reason and LLM reasoning from logs.

Both fields may contain sensitive content and should not be emitted verbatim. Keep only safe summaries/metadata.

Safer logging example
-            logger.info("reason", acknowledgment_reason=acknowledgment_reason)
+            logger.info("acknowledgment_reason_received", reason_length=len(acknowledgment_reason))
...
-            logger.info("reasoning", reasoning=structured_result.reasoning)
+            logger.info("acknowledgment_reasoning_generated", reasoning_present=bool(structured_result.reasoning))

Based on learnings "Applies to **/*.py : Strip secrets/PII from agent prompts; scope tools; keep raw reasoning out of logs (store summaries only)".

Also applies to: 184-187

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/agents/acknowledgment_agent/agent.py` around lines 88 - 90, The logs
currently emit raw sensitive fields (acknowledgment_reason and the LLM's raw
reasoning) — update the logging in the acknowledgment agent to stop printing
verbatim content: in the block using
logger.info("evaluating_acknowledgment_request_from", commenter=commenter) and
logger.info("reason", acknowledgment_reason=acknowledgment_reason) replace the
raw-field logs with non-sensitive metadata (e.g., redacted/summarized flags,
lengths, hash or boolean indicators like has_sensitive_content) and keep only
safe counts such as len(violations); likewise locate the later LLM reasoning
emission (around the LLM response handling at the referenced lines ~184-187) and
log only a sanitized summary or metadata rather than the full LLM output;
reference variables/functions to change: acknowledgment_reason, violations,
commenter, and the LLM response handling code in this module.


# Validate inputs
Expand All @@ -101,7 +101,7 @@ async def evaluate_acknowledgment(
evaluation_prompt = create_evaluation_prompt(acknowledgment_reason, violations, pr_data, commenter, rules)

# Get LLM evaluation with structured output
logger.info("🧠 Requesting LLM evaluation with structured output...")
logger.info("requesting_llm_evaluation_with_structured_output")

# Use the same pattern as engine agent: direct structured output
from langchain_openai import ChatOpenAI
Expand All @@ -117,12 +117,12 @@ async def evaluate_acknowledgment(
structured_result = await self._execute_with_timeout(structured_llm.ainvoke(messages), timeout=self.timeout)

if not structured_result:
logger.error("🧠 Empty LLM response received")
logger.error("empty_llm_response_received")
return AgentResult(
success=False, message="Empty response from LLM", data={"error": "LLM returned empty response"}
)

logger.info("🧠 Successfully received structured LLM evaluation result")
logger.info("successfully_received_structured_llm_evaluation_result")

# Map LLM decisions back to original violations using rule_description
acknowledgable_violations = []
Expand All @@ -143,11 +143,9 @@ async def evaluate_acknowledgment(
# Fallback: try to find by rule_description
original_violation = self._find_violation_by_rule_description(rule_description, violations)
if original_violation:
logger.info(f"🧠 Found violation by rule description: '{rule_description}'")
logger.info("found_violation_by_rule_description", rule_description=rule_description)
else:
logger.warning(
f"🧠 LLM returned rule_description '{rule_description}' not found in original violations"
)
logger.warning("llm_returned_ruledescription_not_found_in", rule_description=rule_description)

if original_violation:
violation_copy = original_violation.copy()
Expand All @@ -173,24 +171,22 @@ async def evaluate_acknowledgment(
# Fallback: try to find by rule_description
original_violation = self._find_violation_by_rule_description(rule_description, violations)
if original_violation:
logger.info(f"🧠 Found violation by rule description: '{rule_description}'")
logger.info("found_violation_by_rule_description", rule_description=rule_description)
else:
logger.warning(
f"🧠 LLM returned rule_description '{rule_description}' not found in original violations"
)
logger.warning("llm_returned_ruledescription_not_found_in", rule_description=rule_description)

if original_violation:
violation_copy = original_violation.copy()
# Add fix-specific fields
violation_copy.update({"fix_reason": llm_violation.reason, "priority": llm_violation.priority})
require_fixes.append(violation_copy)

logger.info("🧠 Intelligent evaluation completed:")
logger.info(f" Valid: {structured_result.is_valid}")
logger.info(f" Reasoning: {structured_result.reasoning}")
logger.info("intelligent_evaluation_completed")
logger.info("valid", is_valid=structured_result.is_valid)
logger.info("reasoning", reasoning=structured_result.reasoning)
logger.info(f" Acknowledged violations: {len(acknowledgable_violations)}")
logger.info(f" Require fixes: {len(require_fixes)}")
logger.info(f" Confidence: {structured_result.confidence}")
logger.info("confidence", confidence=structured_result.confidence)

return AgentResult(
success=True,
Expand All @@ -206,7 +202,7 @@ async def evaluate_acknowledgment(
)

except Exception as e:
logger.error(f"🧠 Error in acknowledgment evaluation: {e}")
logger.error("error_in_acknowledgment_evaluation", e=e)
import traceback

logger.error(f"🧠 Traceback: {traceback.format_exc()}")
Expand All @@ -218,5 +214,5 @@ async def execute(self, event_type: str, event_data: dict[str, Any], rules: list
"""
Legacy method for compatibility - not used for acknowledgment evaluation.
"""
logger.warning("🧠 execute() method called on AcknowledgmentAgent - this should not happen")
logger.warning("execute_method_called_on_acknowledgmentagent_this")
return AgentResult(success=False, message="AcknowledgmentAgent does not support execute() method", data={})
22 changes: 11 additions & 11 deletions src/agents/acknowledgment_agent/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
"""

import asyncio
import logging

import structlog

from .agent import AcknowledgmentAgent

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger = structlog.get_logger()


async def test_acknowledgment_agent():
Expand Down Expand Up @@ -69,7 +69,7 @@ async def test_acknowledgment_agent():
},
]

logger.info("🧠 Testing Intelligent Acknowledgment Agent...")
logger.info("testing_intelligent_acknowledgment_agent")

try:
# Test evaluation
Expand All @@ -82,7 +82,7 @@ async def test_acknowledgment_agent():
)

if result.success:
logger.info("✅ Acknowledgment evaluation completed successfully")
logger.info("acknowledgment_evaluation_completed_successfully")
logger.info(f" Valid: {result.data.get('is_valid', False)}")
logger.info(f" Reasoning: {result.data.get('reasoning', 'No reasoning')}")
logger.info(f" Acknowledged violations: {len(result.data.get('acknowledgable_violations', []))}")
Expand All @@ -91,25 +91,25 @@ async def test_acknowledgment_agent():

# Print detailed results
if result.data.get("acknowledgable_violations"):
logger.info("\n📋 Acknowledged Violations:")
logger.info("n_acknowledged_violations")
for violation in result.data["acknowledgable_violations"]:
logger.info(f" • {violation.get('rule_name')} - {violation.get('reason')}")

if result.data.get("require_fixes"):
logger.info("\n⚠️ Violations Requiring Fixes:")
logger.info("n_violations_requiring_fixes")
for violation in result.data["require_fixes"]:
logger.info(f" • {violation.get('rule_name')} - {violation.get('reason')}")

if result.data.get("recommendations"):
logger.info("\n💡 Recommendations:")
logger.info("n_recommendations")
for rec in result.data["recommendations"]:
logger.info(f" • {rec}")
logger.info("event", rec=rec)

else:
logger.error(f"❌ Acknowledgment evaluation failed: {result.message}")
logger.error("acknowledgment_evaluation_failed", message=result.message)

except Exception as e:
logger.error(f"❌ Test failed with error: {e}")
logger.error("test_failed_with_error", e=e)


if __name__ == "__main__":
Expand Down
12 changes: 6 additions & 6 deletions src/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
"""

import asyncio
import logging
from abc import ABC, abstractmethod
from typing import Any, TypeVar

import structlog
from langchain_openai import ChatOpenAI

from src.core.config import config

logger = logging.getLogger(__name__)
logger = structlog.get_logger()

T = TypeVar("T")

Expand Down Expand Up @@ -53,7 +53,7 @@ def __init__(self, max_retries: int = 3, retry_delay: float = 1.0):
temperature=config.ai.temperature,
)
self.graph = self._build_graph()
logger.info(f"🔧 {self.__class__.__name__} initialized with max_retries={max_retries}")
logger.info("initialized_with_maxretries", __name__=self.__class__.__name__, max_retries=max_retries)

@abstractmethod
def _build_graph(self):
Expand Down Expand Up @@ -82,15 +82,15 @@ async def _retry_structured_output(self, llm, output_model, prompt, **kwargs) ->
try:
result = await structured_llm.ainvoke(prompt, **kwargs)
if attempt > 0:
logger.info(f"✅ Structured output succeeded on attempt {attempt + 1}")
logger.info("structured_output_succeeded_on_attempt")
return result
except Exception as e:
if attempt == self.max_retries - 1:
logger.error(f"❌ Structured output failed after {self.max_retries} attempts: {e}")
logger.error("structured_output_failed_after_attempts", max_retries=self.max_retries, e=e)
raise Exception(f"Structured output failed after {self.max_retries} attempts: {str(e)}") from e

wait_time = self.retry_delay * (2**attempt)
logger.warning(f"⚠️ Structured output attempt {attempt + 1} failed, retrying in {wait_time}s: {e}")
logger.warning("structured_output_attempt_failed_retrying_in", wait_time=wait_time, e=e)
await asyncio.sleep(wait_time)

raise Exception(f"Structured output failed after {self.max_retries} attempts")
Expand Down
22 changes: 11 additions & 11 deletions src/agents/engine_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
Focuses on rule descriptions and parameters, using fast validators with LLM reasoning as fallback.
"""

import logging
import time
from typing import Any

import structlog
from langgraph.graph import END, START, StateGraph

from src.agents.base import AgentResult, BaseAgent
Expand All @@ -28,7 +28,7 @@
)
from src.rules.validators import get_validator_descriptions

logger = logging.getLogger(__name__)
logger = structlog.get_logger()


class RuleEngineAgent(BaseAgent):
Expand All @@ -47,9 +47,9 @@ def __init__(self, max_retries: int = 3, timeout: float = 60.0):
super().__init__(max_retries=max_retries)
self.timeout = timeout

logger.info("🔧 Rule Engine agent initializing...")
logger.info("rule_engine_agent_initializing")
logger.info(f"🔧 Available validators: {list(get_validator_descriptions())}")
logger.info("🔧 Validation strategy: Hybrid (validators + LLM fallback)")
logger.info("validation_strategy_hybrid_validators_llm_fallback")

def _build_graph(self) -> StateGraph:
"""Build the LangGraph workflow for hybrid rule evaluation."""
Expand Down Expand Up @@ -102,13 +102,13 @@ async def execute(self, event_type: str, event_data: dict[str, Any], rules: list
llm_usage=0,
)

logger.info("🔧 Rule Engine initial state prepared")
logger.info("rule_engine_initial_state_prepared")

# Run the hybrid graph with timeout
result = await self._execute_with_timeout(self.graph.ainvoke(initial_state), timeout=self.timeout)

execution_time = time.time() - start_time
logger.info(f"🔧 Rule Engine evaluation completed in {execution_time:.2f}s")
logger.info("rule_engine_evaluation_completed_in_s")

# Extract violations from result
violations = result.violations if hasattr(result, "violations") else []
Expand Down Expand Up @@ -143,9 +143,9 @@ async def execute(self, event_type: str, event_data: dict[str, Any], rules: list
llm_usage=result.llm_usage if hasattr(result, "llm_usage") else 0,
)

logger.info("🔧 Rule Engine evaluation completed successfully")
logger.info(f"🔧 Validator usage: {evaluation_result.validator_usage}")
logger.info(f"🔧 LLM usage: {evaluation_result.llm_usage} calls")
logger.info("rule_engine_evaluation_completed_successfully")
logger.info("validator_usage", validator_usage=evaluation_result.validator_usage)
logger.info("llm_usage_calls", llm_usage=evaluation_result.llm_usage)

return AgentResult(
success=len(violations) == 0,
Expand All @@ -160,7 +160,7 @@ async def execute(self, event_type: str, event_data: dict[str, Any], rules: list
)
except Exception as e:
execution_time = time.time() - start_time
logger.error(f"🔧 Error in Rule Engine evaluation: {e}")
logger.error("error_in_rule_engine_evaluation", e=e)
return AgentResult(
success=False,
message=f"Rule Engine evaluation failed: {str(e)}",
Expand Down Expand Up @@ -233,5 +233,5 @@ async def evaluate(

async def evaluate_pull_request(self, rules: list[Any], event_data: dict[str, Any]) -> dict[str, Any]:
"""Legacy method for backwards compatibility."""
logger.warning("evaluate_pull_request is deprecated. Use evaluate() with event_type='pull_request'")
logger.warning("evaluatepullrequest_is_deprecated_use_evaluate_with")
return await self.evaluate("pull_request", rules, event_data, "")
4 changes: 2 additions & 2 deletions src/agents/engine_agent/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
Data models for the Rule Engine Agent.
"""

from enum import Enum
from enum import StrEnum
from typing import Any

from pydantic import BaseModel, Field


class ValidationStrategy(str, Enum):
class ValidationStrategy(StrEnum):
"""Validation strategies for rule evaluation."""

VALIDATOR = "validator" # Use fast validator
Expand Down
Loading