diff --git a/plugins/deploy-on-aws/scripts/lib/post_process_drawio.py b/plugins/deploy-on-aws/scripts/lib/post_process_drawio.py index a668449..fe70a9c 100644 --- a/plugins/deploy-on-aws/scripts/lib/post_process_drawio.py +++ b/plugins/deploy-on-aws/scripts/lib/post_process_drawio.py @@ -17,6 +17,8 @@ import defusedxml.ElementTree as ET from pathlib import Path +MAX_FILE_SIZE = 2 * 1024 * 1024 # 2 MB + # Import sibling modules by explicit file path (avoids sys.path manipulation # that could allow module shadowing — see CWE-426) SCRIPT_DIR = Path(__file__).parent @@ -300,53 +302,84 @@ def main() -> None: # Not a drawio file, exit silently (hook compatibility) sys.exit(0) + path = Path(file_path) + + # Reject symlinks to prevent symlink-follow write attacks + if path.is_symlink(): + print(f"Skipping symlink: {file_path}", file=sys.stderr) + sys.exit(0) + + # Reject files exceeding size limit before parsing + try: + file_size = path.stat().st_size + except OSError as e: + print(f"Cannot stat {file_path}: {e}", file=sys.stderr) + sys.exit(1) + if file_size > MAX_FILE_SIZE: + print( + f"Skipping: file too large ({file_size // 1024}KB > " + f"{MAX_FILE_SIZE // 1024 // 1024}MB limit)", + file=sys.stderr, + ) + sys.exit(0) + try: tree = ET.parse(file_path) except (ET.ParseError, FileNotFoundError) as e: print(f"Error parsing {file_path}: {e}", file=sys.stderr) sys.exit(1) - changes = [] - - # 0. Fix Region container nesting (MUST run first — changes coordinates) - regions_fixed = fix_nesting(tree, verbose=args.verbose) - if regions_fixed > 0: - changes.append(f"nesting: {regions_fixed} regions flattened") - - # 1. Fix icon fill colors (before badge/layout fixes) - icons_fixed = fix_icon_colors(tree, verbose=args.verbose) - if icons_fixed > 0: - changes.append(f"icons: {icons_fixed} colors corrected") - - # 2. Fix step badge overlaps (15px clearance for visual breathing room) - badges_moved = fix_badges(tree, clearance=15.0, verbose=args.verbose) - if badges_moved > 0: - changes.append(f"badges: {badges_moved} moved") - - # 3. Fix external actor placement (below title + outside AWS Cloud) - actors_moved = fix_placement(tree, verbose=args.verbose) - if actors_moved > 0: - changes.append(f"placement: {actors_moved} actors repositioned") - - # 4. Fix legend panel sizing (match diagram height) - legend_resized = fix_legend_size(tree, verbose=args.verbose) - if legend_resized > 0: - changes.append("legend: resized to match diagram height") - - if changes: - summary = "; ".join(changes) - print(f"Post-processing: {summary}") - if not args.dry_run: - # Note: XML indentation skipped — defusedxml doesn't expose indent() - # and importing stdlib xml.etree.ElementTree triggers security scanners. - # Output is valid but not pretty-printed. If human-readable XML is needed, - # add a custom indent helper that walks the element tree without stdlib import. - tree.write(file_path, encoding="unicode", xml_declaration=False) - print(f"Written: {file_path}") + # Top-level try/except prevents unhandled exception tracebacks from + # leaking file paths and source code lines into the hook systemMessage + # (stderr is captured via 2>&1 in validate-drawio.sh). + try: + changes = [] + + # 0. Fix Region container nesting (MUST run first — changes coordinates) + regions_fixed = fix_nesting(tree, verbose=args.verbose) + if regions_fixed > 0: + changes.append(f"nesting: {regions_fixed} regions flattened") + + # 1. Fix icon fill colors (before badge/layout fixes) + icons_fixed = fix_icon_colors(tree, verbose=args.verbose) + if icons_fixed > 0: + changes.append(f"icons: {icons_fixed} colors corrected") + + # 2. Fix step badge overlaps (15px clearance for visual breathing room) + badges_moved = fix_badges(tree, clearance=15.0, verbose=args.verbose) + if badges_moved > 0: + changes.append(f"badges: {badges_moved} moved") + + # 3. Fix external actor placement (below title + outside AWS Cloud) + actors_moved = fix_placement(tree, verbose=args.verbose) + if actors_moved > 0: + changes.append(f"placement: {actors_moved} actors repositioned") + + # 4. Fix legend panel sizing (match diagram height) + legend_resized = fix_legend_size(tree, verbose=args.verbose) + if legend_resized > 0: + changes.append("legend: resized to match diagram height") + + if changes: + summary = "; ".join(changes) + print(f"Post-processing: {summary}") + if not args.dry_run: + # Note: XML indentation skipped — defusedxml doesn't expose indent() + # and importing stdlib xml.etree.ElementTree triggers security scanners. + # Output is valid but not pretty-printed. If human-readable XML is needed, + # add a custom indent helper that walks the element tree without stdlib import. + tree.write(file_path, encoding="unicode", xml_declaration=False) + print(f"Written: {file_path}") + else: + print("(dry run, no changes written)") else: - print("(dry run, no changes written)") - else: - print("Post-processing: no changes needed") + print("Post-processing: no changes needed") + except Exception: + # Generic message only — do not include exception details or tracebacks, + # as they would leak internal file paths and source lines into the agent + # context via the hook's systemMessage. + print("Post-processing: internal error during fixers. Run manually for details.") + sys.exit(1) if __name__ == "__main__": diff --git a/plugins/deploy-on-aws/scripts/lib/validate_drawio.py b/plugins/deploy-on-aws/scripts/lib/validate_drawio.py index b457377..44ba4ab 100755 --- a/plugins/deploy-on-aws/scripts/lib/validate_drawio.py +++ b/plugins/deploy-on-aws/scripts/lib/validate_drawio.py @@ -8,12 +8,17 @@ 5. Geometry validation (vertices have mxGeometry) """ +import io import json import re import sys import defusedxml.ElementTree as ET from pathlib import Path +MAX_FILE_SIZE = 2 * 1024 * 1024 # 2 MB +MAX_XML_DEPTH = 50 +MAX_XML_ELEMENTS = 50_000 + # Load valid AWS4 shapes SCRIPT_DIR = Path(__file__).parent shapes_data = json.loads((SCRIPT_DIR / "aws4-shapes.json").read_text()) @@ -41,13 +46,51 @@ def _sanitize_attr(value: str, max_len: int = 80) -> str: return sanitized +def _check_xml_limits(xml_text: str) -> str | None: + """Pre-flight check for element depth and count using streaming parse. + + Uses iterparse to process elements one at a time without building the + full tree in memory, avoiding C-stack overflow on deeply nested files. + Returns an error message string, or None if within limits. + """ + depth = 0 + count = 0 + for event, _ in ET.iterparse( + io.BytesIO(xml_text.encode("utf-8")), events=("start", "end") + ): + if event == "start": + depth += 1 + count += 1 + if depth > MAX_XML_DEPTH: + return f"XML nesting depth exceeds {MAX_XML_DEPTH} levels" + if count > MAX_XML_ELEMENTS: + return f"XML element count exceeds {MAX_XML_ELEMENTS:,}" + else: + depth -= 1 + return None + + def validate(file_path): errors = [] warnings = [] # 1. Read file + path = Path(file_path) try: - xml_text = Path(file_path).read_text(encoding="utf-8") + file_size = path.stat().st_size + except OSError as e: + errors.append(f"Cannot stat file: {e}") + return errors, warnings + + if file_size > MAX_FILE_SIZE: + errors.append( + f"File too large ({file_size // 1024}KB > " + f"{MAX_FILE_SIZE // 1024 // 1024}MB limit)" + ) + return errors, warnings + + try: + xml_text = path.read_text(encoding="utf-8") except Exception as e: errors.append(f"Cannot read file: {e}") return errors, warnings @@ -56,6 +99,12 @@ def validate(file_path): errors.append("File is empty") return errors, warnings + # Pre-flight: check depth and element count before full parse + limit_error = _check_xml_limits(xml_text) + if limit_error: + errors.append(limit_error) + return errors, warnings + # Parse XML try: root = ET.fromstring(xml_text) @@ -217,7 +266,15 @@ def main(): sys.exit(1) file_path = sys.argv[1] - errors, warnings = validate(file_path) + + # Top-level try/except prevents unhandled exception tracebacks from + # leaking file paths and source code lines into the hook systemMessage + # (stderr is captured via 2>&1 in validate-drawio.sh). + try: + errors, warnings = validate(file_path) + except Exception: + print("VALIDATION FAILED: internal error during validation. Run manually for details.") + sys.exit(1) if errors: print(f"VALIDATION FAILED for {file_path}:") diff --git a/plugins/deploy-on-aws/scripts/validate-drawio.sh b/plugins/deploy-on-aws/scripts/validate-drawio.sh index 919f263..26307f0 100755 --- a/plugins/deploy-on-aws/scripts/validate-drawio.sh +++ b/plugins/deploy-on-aws/scripts/validate-drawio.sh @@ -44,10 +44,11 @@ fi # Step 0: Run post-processing fixers BEFORE validation # This fixes badge overlaps, external actor placement, and legend sizing -POST_RESULT=$(python3 "$SCRIPT_DIR/lib/post_process_drawio.py" "$FILE_PATH" 2>&1) || true +# timeout prevents runaway processes from blocking the hook indefinitely +POST_RESULT=$(timeout 10 python3 "$SCRIPT_DIR/lib/post_process_drawio.py" "$FILE_PATH" 2>&1) || true # Step 1: Run the Python validator on the post-processed file -VALIDATE_RESULT=$(python3 "$SCRIPT_DIR/lib/validate_drawio.py" "$FILE_PATH" 2>&1) || true +VALIDATE_RESULT=$(timeout 10 python3 "$SCRIPT_DIR/lib/validate_drawio.py" "$FILE_PATH" 2>&1) || true VALIDATION_PASSED=false if echo "$VALIDATE_RESULT" | grep -q "VALIDATION PASSED"; then VALIDATION_PASSED=true @@ -56,7 +57,7 @@ fi # Step 2: Only generate draw.io preview URL AFTER validation passes URL_RESULT="" if [[ "$VALIDATION_PASSED" == "true" ]]; then - URL_RESULT=$(python3 "$SCRIPT_DIR/lib/drawio_url.py" "$FILE_PATH" 2>/dev/null) || true + URL_RESULT=$(timeout 5 python3 "$SCRIPT_DIR/lib/drawio_url.py" "$FILE_PATH" 2>/dev/null) || true fi # Build the response message