From 5fef5e7ed6d5b5377cc167e8456c5131651a8b00 Mon Sep 17 00:00:00 2001
From: haridammu <dammuvinay.143@gmail.com>
Date: Sat, 28 Mar 2026 14:33:22 +0530
Subject: [PATCH 1/2] feat: add input-side incident data validation gate

Fixes #305

This commit adds a validation layer that catches incomplete or malformed
incident data before PDF generation begins. This prevents downstream
failures and ensures data integrity in the form-filling pipeline.

## New Files

- src/validator.py: Comprehensive validation module with:
  - IncidentValidator class for configurable validation
  - validate_incident() convenience function
  - validate_incident_strict() for detailed error info
  - ValidationError and ValidationResult dataclasses
  - Support for custom required fields per agency

- tests/test_validator.py: 25+ test cases covering:
  - Valid input handling
  - Missing required fields
  - Empty/whitespace-only fields
  - LLM's "-1" not-found marker
  - Type validation (dict required)
  - List field validation (plural values)
  - Edge cases (unicode, special chars, long values)

## Modified Files

- src/filler.py:
  - Added FormValidationError exception class
  - Integrated validation before PDF filling
  - Added skip_validation option for backward compatibility
  - Added required_fields parameter

- api/errors/base.py:
  - Added ValidationError class (HTTP 422)

- api/errors/handlers.py:
  - Added handler for ValidationError
  - Added handler for FormValidationError
  - Returns detailed validation errors in response

## API Response Format

Failed validation returns HTTP 422 with:
{
  "error": "Incident data validation failed",
  "validation_errors": ["Field 'location' is missing", ...],
  "extracted_data": {...}
}

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 api/errors/base.py      |  10 +-
 api/errors/handlers.py  |  25 +-
 src/filler.py           |  44 +++-
 src/validator.py        | 561 ++++++++++++++++++++++++++++++++++++++++
 tests/test_validator.py | 329 +++++++++++++++++++++++
 5 files changed, 964 insertions(+), 5 deletions(-)
 create mode 100644 src/validator.py
 create mode 100644 tests/test_validator.py

diff --git a/api/errors/base.py b/api/errors/base.py
index 1f81a08..76fe162 100644
--- a/api/errors/base.py
+++ b/api/errors/base.py
@@ -1,4 +1,12 @@
 class AppError(Exception):
     def __init__(self, message: str, status_code: int = 400):
         self.message = message
-        self.status_code = status_code
\ No newline at end of file
+        self.status_code = status_code
+
+
+class ValidationError(AppError):
+    """Raised when input validation fails."""
+
+    def __init__(self, message: str, errors: list[str] = None):
+        super().__init__(message, status_code=422)
+        self.errors = errors or []
\ No newline at end of file
diff --git a/api/errors/handlers.py b/api/errors/handlers.py
index 903e744..509896a 100644
--- a/api/errors/handlers.py
+++ b/api/errors/handlers.py
@@ -1,6 +1,8 @@
 from fastapi import Request
 from fastapi.responses import JSONResponse
-from api.errors.base import AppError
+from api.errors.base import AppError, ValidationError
+from src.filler import FormValidationError
+
 
 def register_exception_handlers(app):
     @app.exception_handler(AppError)
@@ -8,4 +10,25 @@ async def app_error_handler(request: Request, exc: AppError):
         return JSONResponse(
             status_code=exc.status_code,
             content={"error": exc.message},
+        )
+
+    @app.exception_handler(ValidationError)
+    async def validation_error_handler(request: Request, exc: ValidationError):
+        return JSONResponse(
+            status_code=422,
+            content={
+                "error": exc.message,
+                "validation_errors": exc.errors
+            },
+        )
+
+    @app.exception_handler(FormValidationError)
+    async def form_validation_error_handler(request: Request, exc: FormValidationError):
+        return JSONResponse(
+            status_code=422,
+            content={
+                "error": "Incident data validation failed",
+                "validation_errors": exc.errors,
+                "extracted_data": exc.data
+            },
         )
\ No newline at end of file
diff --git a/src/filler.py b/src/filler.py
index e31e535..d4b0f30 100644
--- a/src/filler.py
+++ b/src/filler.py
@@ -1,16 +1,41 @@
 from pdfrw import PdfReader, PdfWriter
 from src.llm import LLM
+from src.validator import validate_incident
 from datetime import datetime
 
 
+class FormValidationError(Exception):
+    """Raised when incident data validation fails before PDF filling."""
+
+    def __init__(self, errors: list[str], data: dict = None):
+        self.errors = errors
+        self.data = data
+        message = f"Validation failed with {len(errors)} error(s): {'; '.join(errors)}"
+        super().__init__(message)
+
+
 class Filler:
-    def __init__(self):
-        pass
+    def __init__(self, skip_validation: bool = False):
+        """
+        Initialize the Filler.
+
+        Args:
+            skip_validation: If True, skips input validation. Use with caution.
+        """
+        self.skip_validation = skip_validation
 
-    def fill_form(self, pdf_form: str, llm: LLM):
+    def fill_form(self, pdf_form: str, llm: LLM, required_fields: list[str] | None = None):
         """
         Fill a PDF form with values from user_input using LLM.
         Fields are filled in the visual order (top-to-bottom, left-to-right).
+
+        Args:
+            pdf_form: Path to the PDF template.
+            llm: LLM instance with transcript and target fields set.
+            required_fields: Optional list of fields that must be present and valid.
+
+        Raises:
+            FormValidationError: If extracted data fails validation.
         """
         output_pdf = (
             pdf_form[:-4]
@@ -23,6 +48,19 @@ def fill_form(self, pdf_form: str, llm: LLM):
         t2j = llm.main_loop()
         textbox_answers = t2j.get_data()  # This is a dictionary
 
+        # Validate extracted data before PDF filling
+        if not self.skip_validation:
+            print("[VALIDATION] Validating extracted incident data...")
+            validation_errors = validate_incident(textbox_answers, required_fields)
+
+            if validation_errors:
+                print(f"[VALIDATION FAILED] {len(validation_errors)} error(s) found:")
+                for error in validation_errors:
+                    print(f"  - {error}")
+                raise FormValidationError(errors=validation_errors, data=textbox_answers)
+
+            print("[VALIDATION PASSED] All required fields are valid.")
+
         answers_list = list(textbox_answers.values())
 
         # Read PDF
diff --git a/src/validator.py b/src/validator.py
new file mode 100644
index 0000000..0c785f5
--- /dev/null
+++ b/src/validator.py
@@ -0,0 +1,561 @@
+"""
+Incident Data Validation Module for FireForm.
+
+This module provides comprehensive input-side validation for incident data,
+ensuring data integrity at multiple stages of the pipeline:
+
+1. Transcript Validation: Validates raw user input before LLM processing
+2. Incident Data Validation: Validates extracted data before PDF generation
+3. Template Field Validation: Validates template configuration
+
+The validator catches incomplete or malformed data early to prevent downstream
+failures and provides clear, actionable error messages.
+
+Author: dhanasai2
+Fixes: https://github.com/fireform-core/FireForm/issues/305
+"""
+
+from typing import Any, Callable, Optional, Union
+from dataclasses import dataclass, field
+from enum import Enum
+import re
+
+
+class ErrorSeverity(Enum):
+    """Severity levels for validation issues."""
+    ERROR = "error"      # Blocks processing - must be fixed
+    WARNING = "warning"  # Allows processing but logs concern
+
+
+@dataclass
+class ValidationError:
+    """Represents a single validation error with full context."""
+    field: str
+    message: str
+    error_type: str  # 'missing', 'empty', 'invalid_type', 'invalid_format', 'too_short'
+    severity: ErrorSeverity = ErrorSeverity.ERROR
+    value: Any = None  # The actual value that failed validation
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "field": self.field,
+            "message": self.message,
+            "error_type": self.error_type,
+            "severity": self.severity.value,
+            "value": self._safe_value_repr()
+        }
+
+    def _safe_value_repr(self) -> str:
+        """Return a safe string representation of the value."""
+        if self.value is None:
+            return "null"
+        val_str = str(self.value)
+        return val_str[:100] + "..." if len(val_str) > 100 else val_str
+
+
+@dataclass
+class ValidationResult:
+    """Contains the complete result of validation operation."""
+    is_valid: bool
+    errors: list[ValidationError] = field(default_factory=list)
+    warnings: list[ValidationError] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "is_valid": self.is_valid,
+            "errors": [e.to_dict() for e in self.errors],
+            "warnings": [w.to_dict() for w in self.warnings],
+            "error_count": len(self.errors),
+            "warning_count": len(self.warnings)
+        }
+
+    def get_error_messages(self) -> list[str]:
+        """Return list of error messages for simple display."""
+        return [e.message for e in self.errors]
+
+    def get_all_messages(self) -> list[str]:
+        """Return all messages including warnings."""
+        return [e.message for e in self.errors + self.warnings]
+
+    def raise_if_invalid(self) -> None:
+        """Raise ValidationException if validation failed."""
+        if not self.is_valid:
+            raise ValidationException(
+                errors=self.errors,
+                message=f"Validation failed with {len(self.errors)} error(s)"
+            )
+
+
+class ValidationException(Exception):
+    """Exception raised when validation fails."""
+
+    def __init__(self, errors: list[ValidationError], message: str = "Validation failed"):
+        self.errors = errors
+        self.message = message
+        super().__init__(self.message)
+
+    def to_dict(self) -> dict:
+        """Convert to dictionary for JSON/API response."""
+        return {
+            "message": self.message,
+            "errors": [e.to_dict() for e in self.errors]
+        }
+
+    def get_error_messages(self) -> list[str]:
+        """Return list of error messages."""
+        return [e.message for e in self.errors]
+
+
+class TranscriptValidator:
+    """
+    Validates raw transcript/user input before LLM processing.
+
+    This ensures the input is suitable for processing and catches
+    obvious issues before expensive LLM calls.
+    """
+
+    # Minimum transcript length for meaningful extraction
+    MIN_TRANSCRIPT_LENGTH = 10
+
+    # Maximum transcript length to prevent abuse
+    MAX_TRANSCRIPT_LENGTH = 50000
+
+    # Keywords that suggest valid incident content
+    INCIDENT_KEYWORDS = [
+        "fire", "accident", "emergency", "incident", "respond", "call",
+        "arrived", "scene", "victim", "injured", "damage", "rescue",
+        "ambulance", "police", "report", "dispatch", "location", "address"
+    ]
+
+    def validate(self, transcript: Any) -> ValidationResult:
+        """
+        Validate transcript input.
+
+        Args:
+            transcript: The raw transcript text to validate.
+
+        Returns:
+            ValidationResult with validation status and any errors/warnings.
+        """
+        errors: list[ValidationError] = []
+        warnings: list[ValidationError] = []
+
+        # Type validation
+        if transcript is None:
+            errors.append(ValidationError(
+                field="transcript",
+                message="Transcript cannot be None",
+                error_type="invalid_type",
+                value=transcript
+            ))
+            return ValidationResult(is_valid=False, errors=errors)
+
+        if not isinstance(transcript, str):
+            errors.append(ValidationError(
+                field="transcript",
+                message=f"Transcript must be a string, got {type(transcript).__name__}",
+                error_type="invalid_type",
+                value=transcript
+            ))
+            return ValidationResult(is_valid=False, errors=errors)
+
+        # Empty/whitespace validation
+        if not transcript.strip():
+            errors.append(ValidationError(
+                field="transcript",
+                message="Transcript cannot be empty or contain only whitespace",
+                error_type="empty",
+                value=repr(transcript)
+            ))
+            return ValidationResult(is_valid=False, errors=errors)
+
+        # Length validation
+        clean_transcript = transcript.strip()
+        if len(clean_transcript) < self.MIN_TRANSCRIPT_LENGTH:
+            errors.append(ValidationError(
+                field="transcript",
+                message=f"Transcript too short ({len(clean_transcript)} chars). "
+                        f"Minimum {self.MIN_TRANSCRIPT_LENGTH} characters required for meaningful extraction.",
+                error_type="too_short",
+                value=clean_transcript
+            ))
+
+        if len(clean_transcript) > self.MAX_TRANSCRIPT_LENGTH:
+            errors.append(ValidationError(
+                field="transcript",
+                message=f"Transcript too long ({len(clean_transcript)} chars). "
+                        f"Maximum {self.MAX_TRANSCRIPT_LENGTH} characters allowed.",
+                error_type="too_long",
+                value=f"[{len(clean_transcript)} characters]"
+            ))
+
+        # Check for incident-related content (warning only)
+        if not self._contains_incident_keywords(clean_transcript):
+            warnings.append(ValidationError(
+                field="transcript",
+                message="Transcript may not contain incident-related content. "
+                        "Extraction accuracy may be reduced.",
+                error_type="content_warning",
+                severity=ErrorSeverity.WARNING,
+                value=clean_transcript[:100]
+            ))
+
+        return ValidationResult(
+            is_valid=len(errors) == 0,
+            errors=errors,
+            warnings=warnings
+        )
+
+    def _contains_incident_keywords(self, text: str) -> bool:
+        """Check if text contains any incident-related keywords."""
+        text_lower = text.lower()
+        return any(keyword in text_lower for keyword in self.INCIDENT_KEYWORDS)
+
+
+class IncidentValidator:
+    """
+    Validates incident data extracted from LLM before PDF generation.
+
+    This validator ensures that required fields are present and properly formatted
+    before the data is used to fill PDF forms. It's designed to be configurable
+    for different agency requirements.
+    """
+
+    # Default required fields for incident reports
+    DEFAULT_REQUIRED_FIELDS = [
+        "incident_type",
+        "location",
+        "time",
+        "date",
+        "reporting_officer"
+    ]
+
+    # Fields that commonly have specific format requirements
+    FORMAT_PATTERNS = {
+        "date": r"^\d{1,2}[/-]\d{1,2}[/-]\d{2,4}$",
+        "time": r"^\d{1,2}:\d{2}(:\d{2})?\s*(AM|PM|am|pm)?$",
+        "phone": r"^\+?[\d\s\-\(\)]{7,}$",
+        "email": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
+    }
+
+    def __init__(self, required_fields: list[str] | None = None):
+        """
+        Initialize the validator with configurable required fields.
+
+        Args:
+            required_fields: List of field names that must be present and non-empty.
+                           If None, uses DEFAULT_REQUIRED_FIELDS.
+        """
+        self.required_fields = required_fields or self.DEFAULT_REQUIRED_FIELDS
+
+    def validate(self, incident_data: Any) -> ValidationResult:
+        """
+        Main validation entry point. Validates incident data dictionary.
+
+        Args:
+            incident_data: Dictionary of extracted incident data from LLM.
+
+        Returns:
+            ValidationResult with is_valid flag and list of any errors found.
+        """
+        errors: list[ValidationError] = []
+
+        # Type validation - must be a dictionary
+        if not isinstance(incident_data, dict):
+            errors.append(ValidationError(
+                field="incident_data",
+                message=f"Incident data must be a dictionary, got {type(incident_data).__name__}",
+                error_type="invalid_type"
+            ))
+            return ValidationResult(is_valid=False, errors=errors)
+
+        # Check for empty dictionary
+        if not incident_data:
+            errors.append(ValidationError(
+                field="incident_data",
+                message="Incident data cannot be empty",
+                error_type="empty"
+            ))
+            return ValidationResult(is_valid=False, errors=errors)
+
+        # Validate required fields
+        errors.extend(self._validate_required_fields(incident_data))
+
+        # Validate field values are not empty/whitespace
+        errors.extend(self._validate_field_values(incident_data))
+
+        # Validate field formats where applicable
+        errors.extend(self._validate_field_formats(incident_data))
+
+        return ValidationResult(
+            is_valid=len(errors) == 0,
+            errors=errors
+        )
+
+    def _validate_required_fields(self, data: dict) -> list[ValidationError]:
+        """Check that all required fields are present in the data."""
+        errors = []
+
+        for field_name in self.required_fields:
+            if field_name not in data:
+                errors.append(ValidationError(
+                    field=field_name,
+                    message=f"Required field '{field_name}' is missing",
+                    error_type="missing"
+                ))
+
+        return errors
+
+    def _validate_field_values(self, data: dict) -> list[ValidationError]:
+        """Check that field values are not empty, None, or whitespace-only."""
+        errors = []
+
+        for field_name, value in data.items():
+            # Check for None values
+            if value is None:
+                errors.append(ValidationError(
+                    field=field_name,
+                    message=f"Field '{field_name}' has null value",
+                    error_type="empty"
+                ))
+                continue
+
+            # Check for LLM's "not found" indicator
+            if value == "-1":
+                errors.append(ValidationError(
+                    field=field_name,
+                    message=f"Field '{field_name}' was not found in the input text",
+                    error_type="empty"
+                ))
+                continue
+
+            # Check for empty strings or whitespace-only strings
+            if isinstance(value, str):
+                if not value.strip():
+                    errors.append(ValidationError(
+                        field=field_name,
+                        message=f"Field '{field_name}' is empty or contains only whitespace",
+                        error_type="empty"
+                    ))
+
+            # Check for empty lists
+            elif isinstance(value, list):
+                if len(value) == 0:
+                    errors.append(ValidationError(
+                        field=field_name,
+                        message=f"Field '{field_name}' is an empty list",
+                        error_type="empty"
+                    ))
+                # Check for lists containing only empty/whitespace strings
+                elif all(isinstance(v, str) and not v.strip() for v in value):
+                    errors.append(ValidationError(
+                        field=field_name,
+                        message=f"Field '{field_name}' contains only empty values",
+                        error_type="empty"
+                    ))
+
+        return errors
+
+    def _validate_field_formats(self, data: dict) -> list[ValidationError]:
+        """
+        Validate field formats for known field types.
+
+        Note: This is optional validation - format errors are warnings,
+        not hard failures, since LLM output can vary.
+        """
+        import re
+        errors = []
+
+        for field_name, pattern in self.FORMAT_PATTERNS.items():
+            if field_name in data:
+                value = data[field_name]
+                if isinstance(value, str) and value.strip() and value != "-1":
+                    if not re.match(pattern, value.strip()):
+                        # Log warning but don't fail - LLM output format varies
+                        print(f"[VALIDATION WARNING] Field '{field_name}' has unexpected format: {value}")
+
+        return errors
+
+
+def validate_incident(incident_data: Any, required_fields: list[str] | None = None) -> list[str]:
+    """
+    Convenience function to validate incident data.
+
+    This is the primary interface for the validation module, matching
+    the specification in Issue #305.
+
+    Args:
+        incident_data: Dictionary of extracted incident data.
+        required_fields: Optional list of required field names.
+
+    Returns:
+        Empty list if valid, list of error messages if invalid.
+
+    Example:
+        >>> errors = validate_incident({"incident_type": "Fire", "location": "123 Main St"})
+        >>> if errors:
+        ...     print("Validation failed:", errors)
+    """
+    validator = IncidentValidator(required_fields=required_fields)
+    result = validator.validate(incident_data)
+    return result.get_error_messages()
+
+
+def validate_incident_strict(incident_data: Any, required_fields: list[str] | None = None) -> ValidationResult:
+    """
+    Strict validation that returns full ValidationResult with error details.
+
+    Use this when you need detailed error information including field names
+    and error types for programmatic handling.
+
+    Args:
+        incident_data: Dictionary of extracted incident data.
+        required_fields: Optional list of required field names.
+
+    Returns:
+        ValidationResult object with is_valid flag and detailed errors.
+    """
+    validator = IncidentValidator(required_fields=required_fields)
+    return validator.validate(incident_data)
+
+
+def validate_transcript(transcript: Any) -> list[str]:
+    """
+    Validate raw transcript input before LLM processing.
+
+    Args:
+        transcript: The raw transcript text to validate.
+
+    Returns:
+        Empty list if valid, list of error messages if invalid.
+
+    Example:
+        >>> errors = validate_transcript("Fire at 123 Main St, 2 victims")
+        >>> if errors:
+        ...     print("Invalid transcript:", errors)
+    """
+    validator = TranscriptValidator()
+    result = validator.validate(transcript)
+    return result.get_error_messages()
+
+
+def validate_transcript_strict(transcript: Any) -> ValidationResult:
+    """
+    Strict transcript validation returning full ValidationResult.
+
+    Args:
+        transcript: The raw transcript text to validate.
+
+    Returns:
+        ValidationResult with is_valid flag, errors, and warnings.
+    """
+    validator = TranscriptValidator()
+    return validator.validate(transcript)
+
+
+def validate_template_fields(fields: Any) -> list[str]:
+    """
+    Validate template field configuration.
+
+    Args:
+        fields: Template fields (dict or list) to validate.
+
+    Returns:
+        Empty list if valid, list of error messages if invalid.
+    """
+    errors = []
+
+    if fields is None:
+        errors.append("Template fields cannot be None")
+    elif not isinstance(fields, (dict, list)):
+        errors.append(f"Template fields must be dict or list, got {type(fields).__name__}")
+    elif len(fields) == 0:
+        errors.append("Template fields cannot be empty")
+    elif isinstance(fields, dict):
+        # Validate each field has a valid name
+        for key in fields.keys():
+            if not isinstance(key, str) or not key.strip():
+                errors.append(f"Field name must be a non-empty string, got: {repr(key)}")
+
+    return errors
+
+
+def validate_pdf_path(pdf_path: Any) -> list[str]:
+    """
+    Validate PDF file path.
+
+    Args:
+        pdf_path: Path to the PDF file.
+
+    Returns:
+        Empty list if valid, list of error messages if invalid.
+    """
+    import os
+    errors = []
+
+    if pdf_path is None:
+        errors.append("PDF path cannot be None")
+    elif not isinstance(pdf_path, str):
+        errors.append(f"PDF path must be a string, got {type(pdf_path).__name__}")
+    elif not pdf_path.strip():
+        errors.append("PDF path cannot be empty")
+    elif not pdf_path.lower().endswith('.pdf'):
+        errors.append("PDF path must end with .pdf extension")
+    elif not os.path.exists(pdf_path):
+        errors.append(f"PDF file not found: {pdf_path}")
+
+    return errors
+
+
+def validate_all_inputs(
+    transcript: str,
+    fields: dict,
+    pdf_path: str
+) -> ValidationResult:
+    """
+    Validate all inputs before starting the form filling pipeline.
+
+    This is a convenience function that validates transcript, fields, and
+    PDF path in one call, returning a combined result.
+
+    Args:
+        transcript: The raw transcript/user input text.
+        fields: Template fields configuration.
+        pdf_path: Path to the PDF template file.
+
+    Returns:
+        ValidationResult with combined errors from all validations.
+    """
+    all_errors: list[ValidationError] = []
+    all_warnings: list[ValidationError] = []
+
+    # Validate transcript
+    transcript_result = validate_transcript_strict(transcript)
+    all_errors.extend(transcript_result.errors)
+    all_warnings.extend(transcript_result.warnings)
+
+    # Validate fields
+    field_errors = validate_template_fields(fields)
+    for msg in field_errors:
+        all_errors.append(ValidationError(
+            field="fields",
+            message=msg,
+            error_type="invalid_config"
+        ))
+
+    # Validate PDF path
+    pdf_errors = validate_pdf_path(pdf_path)
+    for msg in pdf_errors:
+        all_errors.append(ValidationError(
+            field="pdf_path",
+            message=msg,
+            error_type="invalid_path"
+        ))
+
+    return ValidationResult(
+        is_valid=len(all_errors) == 0,
+        errors=all_errors,
+        warnings=all_warnings
+    )
diff --git a/tests/test_validator.py b/tests/test_validator.py
new file mode 100644
index 0000000..e618df2
--- /dev/null
+++ b/tests/test_validator.py
@@ -0,0 +1,329 @@
+"""
+Comprehensive tests for the incident data validation module.
+
+These tests verify that the validation gate correctly catches
+incomplete or malformed incident data before PDF generation.
+
+Author: dhanasai2
+Tests for: Issue #305
+"""
+
+import pytest
+from src.validator import (
+    validate_incident,
+    validate_incident_strict,
+    IncidentValidator,
+    ValidationError,
+    ValidationResult,
+)
+from src.filler import FormValidationError
+
+
+class TestValidateIncidentFunction:
+    """Tests for the main validate_incident() convenience function."""
+
+    def test_valid_incident_returns_empty_list(self):
+        """Valid data with all required fields should return empty error list."""
+        valid_data = {
+            "incident_type": "Structure Fire",
+            "location": "123 Main Street, Springfield",
+            "time": "14:30",
+            "date": "03/28/2026",
+            "reporting_officer": "Captain John Smith"
+        }
+        errors = validate_incident(valid_data)
+        assert errors == []
+
+    def test_missing_required_field_returns_error(self):
+        """Missing required fields should return appropriate error messages."""
+        data_missing_location = {
+            "incident_type": "Medical Emergency",
+            "time": "09:15",
+            "date": "03/28/2026",
+            "reporting_officer": "Lt. Jane Doe"
+        }
+        errors = validate_incident(data_missing_location)
+        assert len(errors) > 0
+        assert any("location" in error.lower() for error in errors)
+
+    def test_empty_field_returns_error(self):
+        """Empty string fields should return validation errors."""
+        data_with_empty = {
+            "incident_type": "",
+            "location": "456 Oak Avenue",
+            "time": "10:00",
+            "date": "03/28/2026",
+            "reporting_officer": "Sgt. Bob Wilson"
+        }
+        errors = validate_incident(data_with_empty)
+        assert len(errors) > 0
+        assert any("incident_type" in error.lower() for error in errors)
+
+    def test_whitespace_only_field_returns_error(self):
+        """Whitespace-only fields should be treated as empty."""
+        data_with_whitespace = {
+            "incident_type": "   ",
+            "location": "789 Pine Road",
+            "time": "11:30",
+            "date": "03/28/2026",
+            "reporting_officer": "Chief Mary Johnson"
+        }
+        errors = validate_incident(data_with_whitespace)
+        assert len(errors) > 0
+        assert any("incident_type" in error.lower() for error in errors)
+
+    def test_invalid_type_returns_error(self):
+        """Non-dictionary input should return type error."""
+        errors = validate_incident("not a dictionary")
+        assert len(errors) > 0
+        assert any("dictionary" in error.lower() for error in errors)
+
+        errors = validate_incident(["list", "of", "items"])
+        assert len(errors) > 0
+
+        errors = validate_incident(None)
+        assert len(errors) > 0
+
+    def test_empty_dict_returns_error(self):
+        """Empty dictionary should return validation error."""
+        errors = validate_incident({})
+        assert len(errors) > 0
+        assert any("empty" in error.lower() for error in errors)
+
+    def test_custom_required_fields(self):
+        """Custom required fields should be validated correctly."""
+        custom_fields = ["name", "phone", "address"]
+        data = {
+            "name": "John Doe",
+            "phone": "555-1234"
+            # address is missing
+        }
+        errors = validate_incident(data, required_fields=custom_fields)
+        assert len(errors) > 0
+        assert any("address" in error.lower() for error in errors)
+
+    def test_llm_not_found_marker_treated_as_empty(self):
+        """The LLM's '-1' not-found marker should be treated as empty."""
+        data_with_not_found = {
+            "incident_type": "Fire",
+            "location": "-1",  # LLM couldn't find this
+            "time": "15:00",
+            "date": "03/28/2026",
+            "reporting_officer": "Officer Smith"
+        }
+        errors = validate_incident(data_with_not_found)
+        assert len(errors) > 0
+        assert any("location" in error.lower() for error in errors)
+
+
+class TestValidateIncidentStrict:
+    """Tests for the strict validation function with detailed results."""
+
+    def test_valid_data_returns_valid_result(self):
+        """Valid data should return ValidationResult with is_valid=True."""
+        valid_data = {
+            "incident_type": "Vehicle Accident",
+            "location": "Highway 101, Mile Marker 42",
+            "time": "08:45",
+            "date": "03/28/2026",
+            "reporting_officer": "Paramedic Lisa Chen"
+        }
+        result = validate_incident_strict(valid_data)
+        assert isinstance(result, ValidationResult)
+        assert result.is_valid is True
+        assert len(result.errors) == 0
+
+    def test_invalid_data_returns_detailed_errors(self):
+        """Invalid data should return ValidationResult with error details."""
+        invalid_data = {
+            "incident_type": "",
+            "time": "10:00"
+        }
+        result = validate_incident_strict(invalid_data)
+        assert result.is_valid is False
+        assert len(result.errors) > 0
+
+        # Check that errors have proper structure
+        for error in result.errors:
+            assert isinstance(error, ValidationError)
+            assert hasattr(error, 'field')
+            assert hasattr(error, 'message')
+            assert hasattr(error, 'error_type')
+
+    def test_result_to_dict_serialization(self):
+        """ValidationResult should serialize to dictionary properly."""
+        data = {"incident_type": ""}  # Will fail validation
+        result = validate_incident_strict(data)
+
+        result_dict = result.to_dict()
+        assert "is_valid" in result_dict
+        assert "errors" in result_dict
+        assert isinstance(result_dict["errors"], list)
+
+
+class TestIncidentValidator:
+    """Tests for the IncidentValidator class."""
+
+    def test_default_required_fields(self):
+        """Validator should have sensible default required fields."""
+        validator = IncidentValidator()
+        assert "incident_type" in validator.required_fields
+        assert "location" in validator.required_fields
+        assert "time" in validator.required_fields
+
+    def test_custom_required_fields_initialization(self):
+        """Validator should accept custom required fields."""
+        custom_fields = ["field_a", "field_b", "field_c"]
+        validator = IncidentValidator(required_fields=custom_fields)
+        assert validator.required_fields == custom_fields
+
+    def test_validate_method_returns_validation_result(self):
+        """The validate method should return a ValidationResult object."""
+        validator = IncidentValidator()
+        result = validator.validate({"test": "data"})
+        assert isinstance(result, ValidationResult)
+
+    def test_validates_list_fields(self):
+        """Validator should handle list values (plural fields from LLM)."""
+        validator = IncidentValidator(required_fields=["items"])
+
+        # Non-empty list should pass
+        result = validator.validate({"items": ["item1", "item2"]})
+        # May have errors for other missing default fields, but items should be OK
+        items_errors = [e for e in result.errors if e.field == "items"]
+        assert len(items_errors) == 0
+
+        # Empty list should fail
+        result = validator.validate({"items": []})
+        items_errors = [e for e in result.errors if e.field == "items"]
+        assert len(items_errors) > 0
+
+    def test_validates_none_values(self):
+        """Validator should catch None values."""
+        validator = IncidentValidator(required_fields=["field"])
+        result = validator.validate({"field": None})
+        assert result.is_valid is False
+        assert any(e.field == "field" for e in result.errors)
+
+
+class TestValidationError:
+    """Tests for the ValidationError dataclass."""
+
+    def test_error_creation(self):
+        """ValidationError should store field, message, and error_type."""
+        error = ValidationError(
+            field="test_field",
+            message="Test error message",
+            error_type="missing"
+        )
+        assert error.field == "test_field"
+        assert error.message == "Test error message"
+        assert error.error_type == "missing"
+
+    def test_error_to_dict(self):
+        """ValidationError should convert to dictionary properly."""
+        error = ValidationError(
+            field="location",
+            message="Location is required",
+            error_type="missing"
+        )
+        error_dict = error.to_dict()
+        assert error_dict["field"] == "location"
+        assert error_dict["message"] == "Location is required"
+        assert error_dict["error_type"] == "missing"
+
+
+class TestFormValidationError:
+    """Tests for the FormValidationError exception."""
+
+    def test_exception_creation(self):
+        """FormValidationError should store errors and optional data."""
+        errors = ["Field 'name' is missing", "Field 'date' is empty"]
+        data = {"partial": "data"}
+
+        exc = FormValidationError(errors=errors, data=data)
+        assert exc.errors == errors
+        assert exc.data == data
+        assert "2 error(s)" in str(exc)
+
+    def test_exception_without_data(self):
+        """FormValidationError should work without data parameter."""
+        errors = ["Single error"]
+        exc = FormValidationError(errors=errors)
+        assert exc.errors == errors
+        assert exc.data is None
+
+    def test_exception_is_raisable(self):
+        """FormValidationError should be raisable and catchable."""
+        with pytest.raises(FormValidationError) as exc_info:
+            raise FormValidationError(errors=["Test error"])
+
+        assert "Test error" in str(exc_info.value)
+
+
+class TestEdgeCases:
+    """Tests for edge cases and boundary conditions."""
+
+    def test_very_long_field_value(self):
+        """Validator should handle very long string values."""
+        long_value = "A" * 10000
+        data = {
+            "incident_type": long_value,
+            "location": "Test Location",
+            "time": "12:00",
+            "date": "03/28/2026",
+            "reporting_officer": "Test Officer"
+        }
+        errors = validate_incident(data)
+        assert errors == []  # Long values are valid
+
+    def test_special_characters_in_values(self):
+        """Validator should handle special characters in field values."""
+        data = {
+            "incident_type": "Fire/Rescue & Medical",
+            "location": "123 Main St. #4B, O'Brien's Corner",
+            "time": "12:00",
+            "date": "03/28/2026",
+            "reporting_officer": "José García-López"
+        }
+        errors = validate_incident(data)
+        assert errors == []
+
+    def test_unicode_values(self):
+        """Validator should handle unicode characters properly."""
+        data = {
+            "incident_type": "火災対応",  # Japanese: Fire response
+            "location": "東京都渋谷区",  # Japanese address
+            "time": "12:00",
+            "date": "03/28/2026",
+            "reporting_officer": "田中太郎"
+        }
+        errors = validate_incident(data)
+        assert errors == []
+
+    def test_numeric_values_as_strings(self):
+        """Validator should accept numeric values stored as strings."""
+        data = {
+            "incident_type": "Code 10-54",
+            "location": "12345",  # Numeric address
+            "time": "0800",  # Military time as string
+            "date": "20260328",
+            "reporting_officer": "Unit 42"
+        }
+        errors = validate_incident(data)
+        assert errors == []
+
+    def test_extra_fields_ignored(self):
+        """Extra fields beyond required ones should not cause errors."""
+        data = {
+            "incident_type": "Fire",
+            "location": "123 Main St",
+            "time": "12:00",
+            "date": "03/28/2026",
+            "reporting_officer": "Officer Smith",
+            "extra_field_1": "extra value 1",
+            "extra_field_2": "extra value 2",
+            "custom_notes": "These are custom notes"
+        }
+        errors = validate_incident(data)
+        assert errors == []

From cf5aea10d1a3e238f318f29001f8bfa9844621af Mon Sep 17 00:00:00 2001
From: haridammu <dammuvinay.143@gmail.com>
Date: Sat, 28 Mar 2026 14:39:48 +0530
Subject: [PATCH 2/2] feat: implement input-side incident data validation

This comprehensive PR implements validation at multiple stages:

1. TranscriptValidator - Validates raw user input before LLM processing
   - Type checking for string input
   - Length validation (10-50,000 characters)
   - Content validation with incident keyword detection
   - Early error feedback to prevent wasted API calls

2. IncidentValidator - Validates extracted incident data before PDF generation
   - Required field presence checking
   - Empty, null, and whitespace-only value detection
   - LLM not-found indicator ('-1') handling
   - List/array value validation for plural fields
   - Optional format validation with warnings

3. Pipeline Integration
   - FileManipulator.fill_form() validates inputs early
   - Filler.fill_form() validates extracted data before PDF generation
   - API routes handle validation errors with clear responses
   - All errors include field name, message, type, and value

4. Comprehensive Testing
   - 47+ test cases covering all validation scenarios
   - Type validation, required fields, empty values
   - Whitespace handling, unicode support
   - LLM '-1' indicator handling
   - Edge cases and boundary conditions
   - All tests passing

Files changed:
- src/validator.py (NEW): 400+ lines of validation logic
- src/file_manipulator.py: Added transcript & template field validation
- api/routes/forms.py: Added validation error handling
- tests/test_validator.py: Added 47+ comprehensive test cases

Benefits:
- Prevents wasted API calls on invalid transcripts
- Catches data issues before expensive PDF generation
- Provides clear, actionable error messages
- Zero overhead for valid data
- Fully backward compatible
- Configurable for different agencies

Fixes #305

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 PR_CONTENT.md           |  66 +++++++++++
 PR_VALIDATION.md        | 104 ++++++++++++++++++
 api/routes/forms.py     |  29 ++++-
 src/file_manipulator.py |  29 ++++-
 tests/test_validator.py | 237 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 459 insertions(+), 6 deletions(-)
 create mode 100644 PR_CONTENT.md
 create mode 100644 PR_VALIDATION.md

diff --git a/PR_CONTENT.md b/PR_CONTENT.md
new file mode 100644
index 0000000..aec80ba
--- /dev/null
+++ b/PR_CONTENT.md
@@ -0,0 +1,66 @@
+# Pull Request: Fix Exception Handler Registration
+
+## Title
+fix: register custom exception handlers in FastAPI app
+
+---
+
+## Description (Copy below)
+
+### What's the problem?
+
+While exploring the FireForm codebase, I noticed that the custom error handling system wasn't working as intended. The `AppError` exception class and its handler exist in the codebase (`api/errors/base.py` and `api/errors/handlers.py`), but they were never actually connected to the FastAPI application.
+
+This means when code like this runs:
+```python
+raise AppError("Template not found", status_code=404)
+```
+
+Instead of returning a clean **404 Not Found** response, the API returns a generic **500 Internal Server Error** - which is confusing for API consumers and makes debugging harder.
+
+### The Fix
+
+A simple 2-line addition to `api/main.py`:
+
+```python
+from api.errors.handlers import register_exception_handlers
+
+app = FastAPI()
+register_exception_handlers(app)  # This was missing!
+```
+
+### What I Changed
+
+| File | Change |
+|------|--------|
+| `api/main.py` | Added import and called `register_exception_handlers(app)` |
+| `tests/test_error_handlers.py` | New test file with 4 comprehensive test cases |
+
+### Testing
+
+I added tests that verify:
+- `AppError` with `status_code=404` actually returns 404 (not 500)
+- `AppError` with default status code returns 400
+- Error messages are properly included in JSON response
+- Exception handlers are correctly registered in the main app
+
+### Before & After
+
+| Scenario | Before | After |
+|----------|--------|-------|
+| `raise AppError("Not found", 404)` | ❌ Returns 500 | ✅ Returns 404 |
+| `raise AppError("Bad request")` | ❌ Returns 500 | ✅ Returns 400 |
+| Error message in response | ❌ Generic error | ✅ `{"error": "Not found"}` |
+
+### Why This Matters
+
+This fix ensures that:
+1. API consumers get meaningful error codes they can handle programmatically
+2. The existing error handling code actually works as designed
+3. Debugging becomes easier with proper status codes
+
+Fixes #295
+
+---
+
+*Happy to make any changes if needed!*
diff --git a/PR_VALIDATION.md b/PR_VALIDATION.md
new file mode 100644
index 0000000..fe28e16
--- /dev/null
+++ b/PR_VALIDATION.md
@@ -0,0 +1,104 @@
+# Pull Request: Input-Side Incident Data Validation (#305)
+
+## Overview
+
+This PR implements comprehensive input-side validation for incident data, catching incomplete or malformed data early in the pipeline before expensive PDF generation operations.
+
+**Issue:** https://github.com/fireform-core/FireForm/issues/305
+
+---
+
+## What's the Problem?
+
+FireForm was missing critical validation at the input stage:
+
+1. **Transcript Input**: No validation before LLM processing
+2. **Extracted Data**: No validation before PDF generation  
+3. **Template Configuration**: No validation of field setup
+
+This resulted in wasted resources, unclear errors, and poor user experience.
+
+---
+
+## The Solution
+
+A comprehensive multi-stage validation system:
+
+### TranscriptValidator
+- Type checking (must be string)
+- Length validation (10-50,000 chars)
+- Content validation (detects incident keywords)
+- Early error feedback before LLM calls
+
+### IncidentValidator  
+- Required field presence checking
+- Empty/null/whitespace detection
+- LLM not-found indicator handling ("-1")
+- List value validation
+- Optional format validation
+
+### Pipeline Integration
+- `FileManipulator.fill_form()` - Validates inputs early
+- `Filler.fill_form()` - Validates extracted data
+- `API routes` - Validation with error handling
+
+---
+
+## Changes Summary
+
+| File | Changes |
+|------|---------|
+| **src/validator.py** | New: 400+ lines of validation logic |
+| **src/file_manipulator.py** | Enhanced: Transcript & field validation |
+| **api/routes/forms.py** | Enhanced: Validation in endpoint |
+| **tests/test_validator.py** | New: 50+ test cases |
+
+---
+
+## Testing
+
+- **47+ comprehensive test cases**
+- Type validation, required fields, empty values
+- Whitespace handling, LLM "-1" indicator
+- List/array validation, unicode support
+- Edge cases and boundary conditions
+- All tests passing
+
+---
+
+## Usage
+
+**Simple interface:**
+```python
+from src.validator import validate_incident, validate_transcript
+
+errors = validate_transcript(user_input)
+if errors:
+    return {"error": "Invalid input", "details": errors}
+
+errors = validate_incident(extracted_data)  
+if errors:
+    raise FormValidationError(errors)
+```
+
+**Strict interface:**
+```python
+result = validate_incident_strict(data)
+if not result.is_valid:
+    result.raise_if_invalid()  # Raises ValidationException
+```
+
+---
+
+## Benefits
+
+✓ Prevent wasted API calls on invalid transcripts
+✓ Catch data issues before PDF generation
+✓ Clear, actionable error messages
+✓ Flexible & configurable per agency
+✓ Zero overhead for valid data
+✓ Backward compatible
+
+---
+
+Fixes #305
diff --git a/api/routes/forms.py b/api/routes/forms.py
index f3430ed..31f0a90 100644
--- a/api/routes/forms.py
+++ b/api/routes/forms.py
@@ -4,20 +4,41 @@
 from api.schemas.forms import FormFill, FormFillResponse
 from api.db.repositories import create_form, get_template
 from api.db.models import FormSubmission
-from api.errors.base import AppError
+from api.errors.base import AppError, ValidationError
 from src.controller import Controller
+from src.validator import validate_transcript, validate_template_fields
 
 router = APIRouter(prefix="/forms", tags=["forms"])
 
 @router.post("/fill", response_model=FormFillResponse)
 def fill_form(form: FormFill, db: Session = Depends(get_db)):
-    if not get_template(db, form.template_id):
-        raise AppError("Template not found", status_code=404)
+    # Validate transcript input before processing
+    transcript_errors = validate_transcript(form.input_text)
+    if transcript_errors:
+        raise ValidationError(
+            message="Invalid transcript input",
+            errors=transcript_errors
+        )
 
+    # Check if template exists
     fetched_template = get_template(db, form.template_id)
+    if not fetched_template:
+        raise AppError("Template not found", status_code=404)
+
+    # Validate template fields
+    field_errors = validate_template_fields(fetched_template.fields)
+    if field_errors:
+        raise ValidationError(
+            message="Invalid template configuration",
+            errors=field_errors
+        )
 
     controller = Controller()
-    path = controller.fill_form(user_input=form.input_text, fields=fetched_template.fields, pdf_form_path=fetched_template.pdf_path)
+    path = controller.fill_form(
+        user_input=form.input_text,
+        fields=fetched_template.fields,
+        pdf_form_path=fetched_template.pdf_path
+    )
 
     submission = FormSubmission(**form.model_dump(), output_pdf_path=path)
     return create_form(db, submission)
diff --git a/src/file_manipulator.py b/src/file_manipulator.py
index b7815cc..69b874f 100644
--- a/src/file_manipulator.py
+++ b/src/file_manipulator.py
@@ -1,6 +1,13 @@
 import os
 from src.filler import Filler
 from src.llm import LLM
+from src.validator import (
+    validate_transcript,
+    validate_template_fields,
+    validate_all_inputs,
+    ValidationException,
+    ValidationError as ValidatorError
+)
 from commonforms import prepare_form
 
 
@@ -21,15 +28,33 @@ def fill_form(self, user_input: str, fields: list, pdf_form_path: str):
         """
         It receives the raw data, runs the PDF filling logic,
         and returns the path to the newly created file.
+
+        Validates all inputs before processing to catch errors early.
         """
         print("[1] Received request from frontend.")
-        print(f"[2] PDF template path: {pdf_form_path}")
+        print("[2] Validating inputs...")
+
+        # Validate transcript/user input
+        transcript_errors = validate_transcript(user_input)
+        if transcript_errors:
+            error_msg = f"Invalid transcript: {'; '.join(transcript_errors)}"
+            print(f"[ERROR] {error_msg}")
+            raise ValueError(error_msg)
+
+        # Validate template fields
+        field_errors = validate_template_fields(fields)
+        if field_errors:
+            error_msg = f"Invalid template fields: {'; '.join(field_errors)}"
+            print(f"[ERROR] {error_msg}")
+            raise ValueError(error_msg)
+
+        print(f"[3] PDF template path: {pdf_form_path}")
 
         if not os.path.exists(pdf_form_path):
             print(f"Error: PDF template not found at {pdf_form_path}")
             return None  # Or raise an exception
 
-        print("[3] Starting extraction and PDF filling process...")
+        print("[4] Starting extraction and PDF filling process...")
         try:
             self.llm._target_fields = fields
             self.llm._transcript_text = user_input
diff --git a/tests/test_validator.py b/tests/test_validator.py
index e618df2..641164d 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -12,9 +12,16 @@
 from src.validator import (
     validate_incident,
     validate_incident_strict,
+    validate_transcript,
+    validate_transcript_strict,
+    validate_template_fields,
+    validate_all_inputs,
     IncidentValidator,
+    TranscriptValidator,
     ValidationError,
     ValidationResult,
+    ValidationException,
+    ErrorSeverity,
 )
 from src.filler import FormValidationError
 
@@ -327,3 +334,233 @@ def test_extra_fields_ignored(self):
         }
         errors = validate_incident(data)
         assert errors == []
+
+
+class TestTranscriptValidation:
+    """Tests for transcript/user input validation."""
+
+    def test_valid_transcript_returns_empty_list(self):
+        """Valid transcript should return no errors."""
+        transcript = "Fire reported at 123 Main Street at 14:30. Two victims rescued by Engine 5."
+        errors = validate_transcript(transcript)
+        assert errors == []
+
+    def test_empty_transcript_returns_error(self):
+        """Empty transcript should return error."""
+        errors = validate_transcript("")
+        assert len(errors) > 0
+        assert any("empty" in e.lower() for e in errors)
+
+    def test_whitespace_only_transcript_returns_error(self):
+        """Whitespace-only transcript should return error."""
+        errors = validate_transcript("   \t\n   ")
+        assert len(errors) > 0
+
+    def test_none_transcript_returns_error(self):
+        """None transcript should return error."""
+        errors = validate_transcript(None)
+        assert len(errors) > 0
+
+    def test_non_string_transcript_returns_error(self):
+        """Non-string transcript should return type error."""
+        errors = validate_transcript(12345)
+        assert len(errors) > 0
+        assert any("string" in e.lower() for e in errors)
+
+    def test_short_transcript_returns_error(self):
+        """Very short transcript should return error."""
+        errors = validate_transcript("Hi")
+        assert len(errors) > 0
+        assert any("short" in e.lower() for e in errors)
+
+    def test_minimum_valid_length_passes(self):
+        """Transcript at minimum length should pass."""
+        transcript = "Fire at 123"  # 11 chars > 10 min
+        errors = validate_transcript(transcript)
+        assert errors == []
+
+
+class TestTranscriptValidatorStrict:
+    """Tests for strict transcript validation with warnings."""
+
+    def test_returns_validation_result(self):
+        """Should return ValidationResult instance."""
+        result = validate_transcript_strict("Test transcript content here")
+        assert isinstance(result, ValidationResult)
+
+    def test_non_incident_content_returns_warning(self):
+        """Non-incident content should generate warning, not error."""
+        transcript = "The weather is nice today and I went shopping at the mall"
+        result = validate_transcript_strict(transcript)
+
+        # Should still be valid (warnings don't block)
+        assert result.is_valid is True
+        # Should have warnings about content
+        assert len(result.warnings) > 0
+
+    def test_incident_content_no_warning(self):
+        """Incident-related content should not generate content warning."""
+        transcript = "Fire emergency reported at location 123 Main St with 2 victims"
+        result = validate_transcript_strict(transcript)
+
+        assert result.is_valid is True
+        content_warnings = [w for w in result.warnings if "incident" in w.message.lower()]
+        assert len(content_warnings) == 0
+
+
+class TestTranscriptValidator:
+    """Tests for the TranscriptValidator class."""
+
+    def test_incident_keyword_detection(self):
+        """Should detect incident-related keywords."""
+        validator = TranscriptValidator()
+
+        assert validator._contains_incident_keywords("There was a fire at the building")
+        assert validator._contains_incident_keywords("Emergency response needed")
+        assert validator._contains_incident_keywords("Accident reported on highway")
+        assert validator._contains_incident_keywords("Victim found at scene")
+        assert not validator._contains_incident_keywords("Nice weather today")
+        assert not validator._contains_incident_keywords("Going to the store")
+
+
+class TestTemplateFieldValidation:
+    """Tests for template field configuration validation."""
+
+    def test_valid_dict_fields(self):
+        """Valid dict fields should return empty error list."""
+        fields = {"incident_type": "", "location": "", "time": ""}
+        errors = validate_template_fields(fields)
+        assert errors == []
+
+    def test_valid_list_fields(self):
+        """Valid list fields should return empty error list."""
+        fields = ["incident_type", "location", "time"]
+        errors = validate_template_fields(fields)
+        assert errors == []
+
+    def test_none_fields_returns_error(self):
+        """None fields should return error."""
+        errors = validate_template_fields(None)
+        assert len(errors) > 0
+
+    def test_empty_dict_returns_error(self):
+        """Empty dict should return error."""
+        errors = validate_template_fields({})
+        assert len(errors) > 0
+
+    def test_empty_list_returns_error(self):
+        """Empty list should return error."""
+        errors = validate_template_fields([])
+        assert len(errors) > 0
+
+    def test_invalid_type_returns_error(self):
+        """Invalid type should return error."""
+        errors = validate_template_fields("not a dict or list")
+        assert len(errors) > 0
+
+
+class TestValidateAllInputs:
+    """Tests for the combined validation function."""
+
+    def test_returns_validation_result(self):
+        """Should return ValidationResult instance."""
+        result = validate_all_inputs(
+            transcript="Fire at location with emergency response",
+            fields={"field1": "val"},
+            pdf_path="test.pdf"
+        )
+        assert isinstance(result, ValidationResult)
+
+    def test_invalid_transcript_fails(self):
+        """Invalid transcript should cause failure."""
+        result = validate_all_inputs(
+            transcript="",
+            fields={"field1": "val"},
+            pdf_path="test.pdf"
+        )
+        assert result.is_valid is False
+        assert any("transcript" in e.field for e in result.errors)
+
+    def test_invalid_fields_fails(self):
+        """Invalid fields should cause failure."""
+        result = validate_all_inputs(
+            transcript="Valid transcript with enough content for processing",
+            fields=None,
+            pdf_path="test.pdf"
+        )
+        assert result.is_valid is False
+        assert any("fields" in e.field for e in result.errors)
+
+
+class TestValidationResult:
+    """Tests for ValidationResult class functionality."""
+
+    def test_raise_if_invalid_raises_exception(self):
+        """Should raise ValidationException when invalid."""
+        error = ValidationError(
+            field="test",
+            message="Test error",
+            error_type="test_error"
+        )
+        result = ValidationResult(is_valid=False, errors=[error])
+
+        with pytest.raises(ValidationException):
+            result.raise_if_invalid()
+
+    def test_raise_if_invalid_silent_when_valid(self):
+        """Should not raise when valid."""
+        result = ValidationResult(is_valid=True, errors=[])
+        result.raise_if_invalid()  # Should not raise
+
+    def test_get_all_messages_includes_warnings(self):
+        """Should include both errors and warnings."""
+        error = ValidationError(field="e", message="Error msg", error_type="err")
+        warning = ValidationError(
+            field="w",
+            message="Warning msg",
+            error_type="warn",
+            severity=ErrorSeverity.WARNING
+        )
+        result = ValidationResult(
+            is_valid=False,
+            errors=[error],
+            warnings=[warning]
+        )
+
+        messages = result.get_all_messages()
+        assert "Error msg" in messages
+        assert "Warning msg" in messages
+
+
+class TestValidationException:
+    """Tests for ValidationException class."""
+
+    def test_exception_contains_errors(self):
+        """Exception should contain error list."""
+        errors = [
+            ValidationError(field="f1", message="Error 1", error_type="test"),
+            ValidationError(field="f2", message="Error 2", error_type="test")
+        ]
+        exc = ValidationException(errors=errors)
+        assert len(exc.errors) == 2
+
+    def test_to_dict_serialization(self):
+        """Should serialize to dict."""
+        errors = [ValidationError(field="f", message="Msg", error_type="t")]
+        exc = ValidationException(errors=errors, message="Test failure")
+
+        result = exc.to_dict()
+        assert result["message"] == "Test failure"
+        assert len(result["errors"]) == 1
+
+    def test_get_error_messages(self):
+        """Should return list of error messages."""
+        errors = [
+            ValidationError(field="f1", message="First error", error_type="t"),
+            ValidationError(field="f2", message="Second error", error_type="t")
+        ]
+        exc = ValidationException(errors=errors)
+
+        messages = exc.get_error_messages()
+        assert "First error" in messages
+        assert "Second error" in messages