From ff342d1bc60ca0f8c5e6ea5269c644dfbecf2892 Mon Sep 17 00:00:00 2001 From: arpannookala-12 Date: Wed, 4 Mar 2026 18:57:55 -0600 Subject: [PATCH 01/13] chore: add environment variable template Documents all required and optional env vars for inference endpoint configuration, LLM settings, CORS, and SSL verification. --- .env.example | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2f0accf --- /dev/null +++ b/.env.example @@ -0,0 +1,94 @@ +# ============================================================ +# CodeTrans — Environment Configuration +# ============================================================ + +# Backend port +BACKEND_PORT=5001 + +# ============================================================ +# Inference Provider +# ============================================================ +# "remote" — Cloud or enterprise OpenAI-compatible API (e.g. CodeLlama via gateway) +# "ollama" — Local Ollama running natively on the host machine (recommended for Mac) +INFERENCE_PROVIDER=remote + +# ============================================================ +# Option A: Remote OpenAI-compatible API (INFERENCE_PROVIDER=remote) +# ============================================================ +# INFERENCE_API_ENDPOINT: Base URL of your inference service (no /v1 suffix) +# - GenAI Gateway: https://genai-gateway.example.com +# - APISIX Gateway: https://apisix-gateway.example.com/CodeLlama-34b-Instruct-hf +INFERENCE_API_ENDPOINT=https://your-api-endpoint.com/deployment +INFERENCE_API_TOKEN=your-pre-generated-token-here +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + +# ============================================================ +# Option B: Ollama — native host inference (INFERENCE_PROVIDER=ollama) +# ============================================================ +# +# IMPORTANT — Why Ollama runs on the host, NOT in Docker: +# On macOS (Apple Silicon / M-series), running Ollama as a Docker container +# bypasses Metal GPU acceleration. The model falls back to CPU-only inference +# which is dramatically slower. Ollama must be installed natively so the Metal +# Performance Shaders (MPS) backend is used for hardware-accelerated inference. +# +# Setup: +# 1. Install Ollama: https://ollama.com/download +# 2. Pull your model (see options below) +# 3. Ollama starts automatically; confirm it is running: +# curl http://localhost:11434/api/tags +# 4. Set the variables below in your .env +# +# The backend container reaches host-side Ollama via the special DNS name +# `host.docker.internal` which Docker Desktop resolves to the Mac host. +# (On Linux with Docker Engine this requires the extra_hosts entry in docker-compose.yaml, +# which is already configured.) +# +# --- Production / high-quality translation --- +# INFERENCE_PROVIDER=ollama +# INFERENCE_API_ENDPOINT=http://host.docker.internal:11434 +# INFERENCE_MODEL_NAME=codellama:34b +# ollama pull codellama:34b # ~20 GB, best quality +# +# --- Testing / SLM performance benchmarking --- +# INFERENCE_PROVIDER=ollama +# INFERENCE_API_ENDPOINT=http://host.docker.internal:11434 +# INFERENCE_MODEL_NAME=codellama:7b +# ollama pull codellama:7b # ~4 GB, fast — use this for gauging SLM perf +# +# --- Other recommended code models --- +# ollama pull deepseek-coder:6.7b # ~4 GB, strong at code tasks +# ollama pull qwen2.5-coder:7b # ~4 GB, excellent multilingual code +# ollama pull codellama:13b # ~8 GB, good balance of speed vs quality +# +# Note: INFERENCE_API_TOKEN is not required when using Ollama. + +# ============================================================ +# LLM Settings +# ============================================================ +LLM_TEMPERATURE=0.2 +LLM_MAX_TOKENS=4096 + +# ============================================================ +# Code Translation Settings +# ============================================================ +MAX_CODE_LENGTH=8000 +MAX_FILE_SIZE=10485760 + +# ============================================================ +# CORS Configuration +# ============================================================ +CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"] + +# ============================================================ +# Local URL Endpoint +# ============================================================ +# Only needed if your remote API endpoint is a private domain mapped in /etc/hosts. +# Otherwise leave as "not-needed". +LOCAL_URL_ENDPOINT=not-needed + +# ============================================================ +# SSL Verification +# ============================================================ +# Set to false only for development with self-signed certificates. +VERIFY_SSL=true From c87d4e74391107c396abd90ea6ba070861ad2d0d Mon Sep 17 00:00:00 2001 From: arpannookala-12 Date: Wed, 4 Mar 2026 18:57:59 -0600 Subject: [PATCH 02/13] feat(api): add FastAPI backend service Implements code translation across Java, C, C++, Python, Rust, and Go via CodeLlama inference endpoints. Includes PDF code extraction, token- based auth for GenAI/APISIX gateways, input validation, and health check. --- api/.dockerignore | 29 +++++ api/Dockerfile | 25 ++++ api/config.py | 49 ++++++++ api/models.py | 69 +++++++++++ api/requirements.txt | 9 ++ api/server.py | 234 ++++++++++++++++++++++++++++++++++++ api/services/__init__.py | 13 ++ api/services/api_client.py | 141 ++++++++++++++++++++++ api/services/pdf_service.py | 128 ++++++++++++++++++++ 9 files changed, 697 insertions(+) create mode 100644 api/.dockerignore create mode 100644 api/Dockerfile create mode 100644 api/config.py create mode 100644 api/models.py create mode 100644 api/requirements.txt create mode 100644 api/server.py create mode 100644 api/services/__init__.py create mode 100644 api/services/api_client.py create mode 100644 api/services/pdf_service.py diff --git a/api/.dockerignore b/api/.dockerignore new file mode 100644 index 0000000..bd6b932 --- /dev/null +++ b/api/.dockerignore @@ -0,0 +1,29 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.gitignore +.mypy_cache +.pytest_cache +.hypothesis +*.swp +*.swo +*~ +.DS_Store +.env +.env.local diff --git a/api/Dockerfile b/api/Dockerfile new file mode 100644 index 0000000..c431d5c --- /dev/null +++ b/api/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create a non-root user and change ownership +RUN useradd -m -u 1000 appuser && \ + chown -R appuser:appuser /app + +# Switch to non-root user +USER appuser + +# Expose port +EXPOSE 5001 + +# Run the application +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5001"] diff --git a/api/config.py b/api/config.py new file mode 100644 index 0000000..7fccfa8 --- /dev/null +++ b/api/config.py @@ -0,0 +1,49 @@ +""" +Configuration settings for Code Translation API +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Inference Provider: "remote" (OpenAI-compatible) or "ollama" (local) +INFERENCE_PROVIDER = os.getenv("INFERENCE_PROVIDER", "remote") + +# Inference API Configuration +INFERENCE_API_ENDPOINT = os.getenv( + "INFERENCE_API_ENDPOINT", + "http://host.docker.internal:11434" if os.getenv("INFERENCE_PROVIDER", "remote") == "ollama" else None +) +INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN") +INFERENCE_MODEL_NAME = os.getenv( + "INFERENCE_MODEL_NAME", + "codellama:34b" if os.getenv("INFERENCE_PROVIDER", "remote") == "ollama" else "codellama/CodeLlama-34b-Instruct-hf" +) + +# Application Settings +APP_TITLE = "CodeTrans API" +APP_DESCRIPTION = "AI-powered code translation service" +APP_VERSION = "2.0.0" + +# File Upload Settings +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB +ALLOWED_EXTENSIONS = {".pdf"} + +# Code Translation Settings +SUPPORTED_LANGUAGES = ["java", "c", "cpp", "python", "rust", "go"] +# MAX_CODE_LENGTH: For Enterprise Inference with CodeLlama-34b (max tokens: 5196) +# Set to 4000 characters to stay safely under the token limit with prompt overhead +MAX_CODE_LENGTH = int(os.getenv("MAX_CODE_LENGTH", "4000")) # characters +LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.2")) # Lower temperature for more deterministic code generation +LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096")) + +# SSL Verification Settings +VERIFY_SSL = os.getenv("VERIFY_SSL", "true").lower() == "true" + +# CORS Settings +CORS_ALLOW_ORIGINS = ["*"] # Update with specific origins in production +CORS_ALLOW_CREDENTIALS = True +CORS_ALLOW_METHODS = ["*"] +CORS_ALLOW_HEADERS = ["*"] diff --git a/api/models.py b/api/models.py new file mode 100644 index 0000000..3e00230 --- /dev/null +++ b/api/models.py @@ -0,0 +1,69 @@ +""" +Pydantic models for request/response validation +""" + +from pydantic import BaseModel, Field +from typing import Optional + + +class TranslateRequest(BaseModel): + """Request model for code translation""" + source_code: str = Field(..., min_length=1, description="Source code to translate") + source_language: str = Field(..., description="Source programming language") + target_language: str = Field(..., description="Target programming language") + + class Config: + json_schema_extra = { + "example": { + "source_code": "def hello():\n print('Hello World')", + "source_language": "python", + "target_language": "java" + } + } + + +class TranslateResponse(BaseModel): + """Response model for code translation""" + translated_code: str = Field(..., description="Translated code") + source_language: str = Field(..., description="Source language") + target_language: str = Field(..., description="Target language") + original_code: str = Field(..., description="Original source code") + + class Config: + json_schema_extra = { + "example": { + "translated_code": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}", + "source_language": "python", + "target_language": "java", + "original_code": "def hello():\n print('Hello World')" + } + } + + +class UploadPdfResponse(BaseModel): + """Response model for PDF upload""" + message: str = Field(..., description="Success message") + extracted_code: str = Field(..., description="Extracted code from PDF") + status: str = Field(..., description="Operation status") + + class Config: + json_schema_extra = { + "example": { + "message": "Successfully extracted code from 'code.pdf'", + "extracted_code": "def hello():\n print('Hello World')", + "status": "success" + } + } + + +class HealthResponse(BaseModel): + """Response model for health check""" + status: str = Field(..., description="Health status") + model_configured: bool = Field(..., description="Whether model is configured") + inference_authenticated: bool = Field(..., description="Whether inference API auth is successful") + inference_provider: Optional[str] = Field(None, description="Active inference provider (remote or ollama)") + + +class SupportedLanguagesResponse(BaseModel): + """Response model for supported languages""" + languages: list[str] = Field(..., description="List of supported programming languages") diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 0000000..e18ac5f --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.115.5 +uvicorn==0.32.1 +pydantic==2.10.3 +pydantic-settings==2.6.1 +python-multipart>=0.0.18 +requests==2.32.3 +httpx==0.28.1 +openai==1.57.2 +pypdf==6.1.1 diff --git a/api/server.py b/api/server.py new file mode 100644 index 0000000..40f4c46 --- /dev/null +++ b/api/server.py @@ -0,0 +1,234 @@ +""" +FastAPI server with routes for Code Translation API +""" + +import os +import tempfile +import logging +from contextlib import asynccontextmanager +from fastapi import FastAPI, File, UploadFile, HTTPException, status +from fastapi.middleware.cors import CORSMiddleware + +import config +from models import ( + TranslateRequest, TranslateResponse, UploadPdfResponse, + HealthResponse, SupportedLanguagesResponse +) +from services import ( + get_api_client, extract_code_from_pdf, validate_pdf_file +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan context manager for FastAPI app""" + # Startup + try: + api_client = get_api_client() + app.state.api_client = api_client + logger.info("API client initialized with inference endpoint") + except Exception as e: + logger.error(f"Failed to initialize API client: {str(e)}") + app.state.api_client = None + + yield + + # Shutdown + logger.info("Shutting down Code Translation API") + + +# Initialize FastAPI app +app = FastAPI( + title=config.APP_TITLE, + description=config.APP_DESCRIPTION, + version=config.APP_VERSION, + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=config.CORS_ALLOW_ORIGINS, + allow_credentials=config.CORS_ALLOW_CREDENTIALS, + allow_methods=config.CORS_ALLOW_METHODS, + allow_headers=config.CORS_ALLOW_HEADERS, +) + + +# ==================== Routes ==================== + +@app.get("/") +def root(): + """Root endpoint""" + return { + "message": "Code Translation API is running", + "version": config.APP_VERSION, + "status": "healthy", + "api_client_authenticated": app.state.api_client is not None + } + + +@app.get("/health", response_model=HealthResponse) +def health_check(): + """Detailed health check""" + return HealthResponse( + status="healthy", + model_configured=bool(config.INFERENCE_MODEL_NAME), + inference_authenticated=app.state.api_client is not None and app.state.api_client.is_authenticated(), + inference_provider=config.INFERENCE_PROVIDER + ) + + +@app.get("/languages", response_model=SupportedLanguagesResponse) +def get_supported_languages(): + """Get list of supported programming languages""" + return SupportedLanguagesResponse( + languages=config.SUPPORTED_LANGUAGES + ) + + +@app.post("/translate", response_model=TranslateResponse) +def translate_code_endpoint(request: TranslateRequest): + """ + Translate code from one language to another + + - **source_code**: Code to translate + - **source_language**: Source programming language (java, c, cpp, python, rust, go) + - **target_language**: Target programming language (java, c, cpp, python, rust, go) + """ + if not app.state.api_client: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="API client not initialized. Check inference API configuration." + ) + + # Validate languages + if request.source_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Source language '{request.source_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + if request.target_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Target language '{request.target_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + # Check code length + if len(request.source_code) > config.MAX_CODE_LENGTH: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Code too long. Maximum length is {config.MAX_CODE_LENGTH} characters" + ) + + try: + logger.info(f"Translating code from {request.source_language} to {request.target_language}") + + # Translate code using API client + translated_code = app.state.api_client.translate_code( + source_code=request.source_code, + source_lang=request.source_language, + target_lang=request.target_language + ) + + if not translated_code: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Translation failed. No output received from model." + ) + + logger.info(f"Successfully translated code") + + return TranslateResponse( + translated_code=translated_code, + source_language=request.source_language, + target_language=request.target_language, + original_code=request.source_code + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error translating code: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error translating code: {str(e)}" + ) + + +@app.post("/upload-pdf", response_model=UploadPdfResponse) +async def upload_pdf(file: UploadFile = File(...)): + """ + Upload a PDF file and extract code from it + + - **file**: PDF file containing code (max 10MB) + """ + tmp_path = None + try: + # Read file content + content = await file.read() + file_size = len(content) + + # Validate file + validate_pdf_file(file.filename, file_size, config.MAX_FILE_SIZE) + + logger.info(f"Processing PDF: {file.filename} ({file_size / 1024:.2f} KB)") + + # Save to temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: + tmp.write(content) + tmp_path = tmp.name + logger.info(f"Saved to temporary path: {tmp_path}") + + # Extract code from PDF + extracted_code = extract_code_from_pdf(tmp_path) + + if not extracted_code.strip(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No code content could be extracted from the PDF" + ) + + logger.info(f"Successfully extracted code from PDF: {file.filename}") + + return UploadPdfResponse( + message=f"Successfully extracted code from '{file.filename}'", + extracted_code=extracted_code, + status="success" + ) + + except HTTPException: + raise + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Error processing PDF: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing PDF: {str(e)}" + ) + finally: + # Clean up temporary file + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + logger.info(f"Cleaned up temporary file: {tmp_path}") + except Exception as e: + logger.warning(f"Could not remove temporary file: {str(e)}") + + +# Entry point for running with uvicorn +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5001) # nosec B104 - Binding to all interfaces is intentional for Docker container diff --git a/api/services/__init__.py b/api/services/__init__.py new file mode 100644 index 0000000..223ab23 --- /dev/null +++ b/api/services/__init__.py @@ -0,0 +1,13 @@ +""" +Services module exports +""" + +from .api_client import get_api_client, APIClient +from .pdf_service import extract_code_from_pdf, validate_pdf_file + +__all__ = [ + 'get_api_client', + 'APIClient', + 'extract_code_from_pdf', + 'validate_pdf_file' +] diff --git a/api/services/api_client.py b/api/services/api_client.py new file mode 100644 index 0000000..7913dd5 --- /dev/null +++ b/api/services/api_client.py @@ -0,0 +1,141 @@ +""" +API Client for inference API calls - supports remote OpenAI-compatible APIs and local Ollama +""" + +import logging +import httpx +from typing import Optional +import config + +logger = logging.getLogger(__name__) + + +class APIClient: + """ + Client for handling inference API calls. + Supports remote OpenAI-compatible APIs (e.g. CodeLlama via enterprise gateway) + and local Ollama instances. + """ + + def __init__(self): + self.endpoint = config.INFERENCE_API_ENDPOINT + self.token = config.INFERENCE_API_TOKEN + self.provider = config.INFERENCE_PROVIDER + # Ollama doesn't need auth; use dummy key for OpenAI client compatibility + self.http_client = httpx.Client(verify=config.VERIFY_SSL) + + def get_inference_client(self): + """ + Get OpenAI-compatible client configured for the active provider + """ + from openai import OpenAI + + api_key = self.token if self.token else "ollama" + return OpenAI( + api_key=api_key, + base_url=f"{self.endpoint}/v1", + http_client=self.http_client + ) + + def translate_code(self, source_code: str, source_lang: str, target_lang: str) -> str: + """ + Translate code from one language to another. + + Uses text completions for remote providers (e.g. CodeLlama enterprise gateway) + and chat completions for Ollama (more reliable with local models). + """ + client = self.get_inference_client() + + if self.provider == "ollama": + return self._translate_via_chat(client, source_code, source_lang, target_lang) + else: + return self._translate_via_completions(client, source_code, source_lang, target_lang) + + def _translate_via_completions(self, client, source_code: str, source_lang: str, target_lang: str) -> str: + """Text completions endpoint - for remote OpenAI-compatible gateways""" + prompt = f"""Translate the following {source_lang} code to {target_lang}. +Only output the translated code without any explanations or markdown formatting. + +{source_lang} code: +``` +{source_code} +``` + +{target_lang} code: +```""" + + logger.info(f"[remote] Translating {source_lang} → {target_lang} via completions") + + response = client.completions.create( + model=config.INFERENCE_MODEL_NAME, + prompt=prompt, + max_tokens=config.LLM_MAX_TOKENS, + temperature=config.LLM_TEMPERATURE, + stop=["```"] + ) + + if hasattr(response, 'choices') and response.choices: + translated = response.choices[0].text.strip() + logger.info(f"Translation complete ({len(translated)} chars)") + return translated + + logger.error(f"Unexpected completions response: {response}") + return "" + + def _translate_via_chat(self, client, source_code: str, source_lang: str, target_lang: str) -> str: + """Chat completions endpoint - for Ollama local inference""" + system_prompt = ( + "You are an expert code translator. " + "When asked to translate code, output ONLY the translated code with no explanations, " + "no markdown fences, and no comments unless they were in the original." + ) + user_prompt = ( + f"Translate this {source_lang} code to {target_lang}:\n\n{source_code}" + ) + + logger.info(f"[ollama] Translating {source_lang} → {target_lang} via chat completions") + + response = client.chat.completions.create( + model=config.INFERENCE_MODEL_NAME, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + max_tokens=config.LLM_MAX_TOKENS, + temperature=config.LLM_TEMPERATURE, + ) + + if hasattr(response, 'choices') and response.choices: + content = response.choices[0].message.content or "" + # Strip markdown fences if the model still adds them + translated = content.strip() + if translated.startswith("```"): + lines = translated.split("\n") + translated = "\n".join(lines[1:]) + if translated.endswith("```"): + translated = translated[: translated.rfind("```")].rstrip() + logger.info(f"Translation complete ({len(translated)} chars)") + return translated + + logger.error(f"Unexpected chat response: {response}") + return "" + + def is_authenticated(self) -> bool: + """For Ollama, always returns True (no auth needed). For remote, checks token.""" + if self.provider == "ollama": + return True + return self.token is not None + + def __del__(self): + if self.http_client: + self.http_client.close() + + +_api_client: Optional[APIClient] = None + + +def get_api_client() -> APIClient: + global _api_client + if _api_client is None: + _api_client = APIClient() + return _api_client diff --git a/api/services/pdf_service.py b/api/services/pdf_service.py new file mode 100644 index 0000000..abf857e --- /dev/null +++ b/api/services/pdf_service.py @@ -0,0 +1,128 @@ +""" +PDF Code Extraction Service +Extracts code snippets from PDF documents +""" + +import logging +import re +from pathlib import Path +from typing import List +from pypdf import PdfReader + +logger = logging.getLogger(__name__) + + +def extract_code_from_pdf(pdf_path: str) -> str: + """ + Extract code content from a PDF file + + Args: + pdf_path: Path to the PDF file + + Returns: + Extracted code as string + + Raises: + Exception if PDF cannot be processed + """ + try: + logger.info(f"Extracting code from PDF: {pdf_path}") + + with open(pdf_path, 'rb') as file: + pdf_reader = PdfReader(file) + num_pages = len(pdf_reader.pages) + + logger.info(f"PDF has {num_pages} pages") + + # Extract text from all pages + all_text = "" + for page_num in range(num_pages): + page = pdf_reader.pages[page_num] + text = page.extract_text() + all_text += text + "\n" + + logger.info(f"Extracted {len(all_text)} characters from PDF") + + # Try to identify and extract code blocks + # Look for common code patterns + code_content = extract_code_patterns(all_text) + + if not code_content.strip(): + # If no code patterns found, return all text + code_content = all_text + + logger.info(f"Extracted code content: {len(code_content)} characters") + + return code_content.strip() + + except Exception as e: + logger.error(f"Error extracting code from PDF: {str(e)}", exc_info=True) + raise Exception(f"Failed to extract code from PDF: {str(e)}") + + +def extract_code_patterns(text: str) -> str: + """ + Extract code patterns from text + + Args: + text: Text content to search + + Returns: + Extracted code snippets + """ + # Look for code between common delimiters + code_blocks = [] + + # Pattern 1: Code between ``` markers + markdown_code = re.findall(r'```[\w]*\n(.*?)\n```', text, re.DOTALL) + code_blocks.extend(markdown_code) + + # Pattern 2: Indented code blocks (4+ spaces) + indented_code = re.findall(r'(?:^ .+$)+', text, re.MULTILINE) + code_blocks.extend(indented_code) + + # Pattern 3: Code with common keywords (class, def, function, etc.) + keyword_patterns = [ + r'(?:public|private|protected)?\s*class\s+\w+.*?\{.*?\}', # Java/C++ classes + r'def\s+\w+\(.*?\):.*?(?=\n(?!\s))', # Python functions + r'function\s+\w+\(.*?\)\s*\{.*?\}', # JavaScript functions + r'fn\s+\w+\(.*?\)\s*\{.*?\}', # Rust functions + r'func\s+\w+\(.*?\)\s*\{.*?\}', # Go functions + ] + + for pattern in keyword_patterns: + matches = re.findall(pattern, text, re.DOTALL | re.MULTILINE) + code_blocks.extend(matches) + + if code_blocks: + return '\n\n'.join(code_blocks) + + # If no patterns match, return original text + return text + + +def validate_pdf_file(filename: str, file_size: int, max_size: int) -> None: + """ + Validate uploaded PDF file + + Args: + filename: Name of the file + file_size: Size of the file in bytes + max_size: Maximum allowed file size in bytes + + Raises: + ValueError if validation fails + """ + # Check file extension + if not filename.lower().endswith('.pdf'): + raise ValueError("Only PDF files are allowed") + + # Check file size + if file_size > max_size: + max_size_mb = max_size / (1024 * 1024) + raise ValueError(f"File too large. Maximum size is {max_size_mb}MB") + + if file_size == 0: + raise ValueError("Empty file uploaded") + + logger.info(f"PDF file validation passed: {filename} ({file_size / 1024:.2f} KB)") \ No newline at end of file From 1b775c05f1228fd12b2deb0cd68b3c1992d4f35f Mon Sep 17 00:00:00 2001 From: arpannookala-12 Date: Wed, 4 Mar 2026 18:58:06 -0600 Subject: [PATCH 03/13] feat(ui): add React frontend application Side-by-side code editor with language pill selectors (6 languages), PDF drag-and-drop upload, real-time character counter, dark mode with localStorage persistence, and copy-to-clipboard. Built with Vite, Tailwind CSS, and served via Nginx. --- ui/.dockerignore | 12 ++ ui/Dockerfile | 41 +++++ ui/index.html | 23 +++ ui/nginx.conf | 23 +++ ui/package.json | 31 ++++ ui/postcss.config.js | 6 + ui/src/App.jsx | 70 ++++++++ ui/src/components/CodeTranslator.jsx | 241 +++++++++++++++++++++++++++ ui/src/components/Header.jsx | 85 ++++++++++ ui/src/components/PDFUploader.jsx | 118 +++++++++++++ ui/src/components/StatusBar.jsx | 51 ++++++ ui/src/index.css | 105 ++++++++++++ ui/src/main.jsx | 10 ++ ui/tailwind.config.js | 36 ++++ ui/vite.config.js | 17 ++ 15 files changed, 869 insertions(+) create mode 100644 ui/.dockerignore create mode 100644 ui/Dockerfile create mode 100644 ui/index.html create mode 100644 ui/nginx.conf create mode 100644 ui/package.json create mode 100644 ui/postcss.config.js create mode 100644 ui/src/App.jsx create mode 100644 ui/src/components/CodeTranslator.jsx create mode 100644 ui/src/components/Header.jsx create mode 100644 ui/src/components/PDFUploader.jsx create mode 100644 ui/src/components/StatusBar.jsx create mode 100644 ui/src/index.css create mode 100644 ui/src/main.jsx create mode 100644 ui/tailwind.config.js create mode 100644 ui/vite.config.js diff --git a/ui/.dockerignore b/ui/.dockerignore new file mode 100644 index 0000000..bd3f4ad --- /dev/null +++ b/ui/.dockerignore @@ -0,0 +1,12 @@ +node_modules +npm-debug.log +.git +.gitignore +.DS_Store +.env +.env.local +.env.production +dist +build +coverage +*.log diff --git a/ui/Dockerfile b/ui/Dockerfile new file mode 100644 index 0000000..015b33d --- /dev/null +++ b/ui/Dockerfile @@ -0,0 +1,41 @@ +# Build stage +FROM node:18-alpine as build + +WORKDIR /app + +# Copy package.json +COPY package.json ./ + +# Install dependencies +RUN npm install + +# Copy application code +COPY . . + +# Build the application +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built assets from build stage +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx configuration +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# Create a non-root user and adjust permissions +RUN adduser -D -u 1000 appuser && \ + chown -R appuser:appuser /usr/share/nginx/html && \ + chown -R appuser:appuser /var/cache/nginx && \ + chown -R appuser:appuser /var/log/nginx && \ + chown -R appuser:appuser /etc/nginx/conf.d && \ + touch /var/run/nginx.pid && \ + chown -R appuser:appuser /var/run/nginx.pid + +# Switch to non-root user +USER appuser + +EXPOSE 8080 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000..d791e45 --- /dev/null +++ b/ui/index.html @@ -0,0 +1,23 @@ + + + + + + + CodeTrans — AI Code Translator + + + +
+ + + diff --git a/ui/nginx.conf b/ui/nginx.conf new file mode 100644 index 0000000..a6980e9 --- /dev/null +++ b/ui/nginx.conf @@ -0,0 +1,23 @@ +server { + listen 8080; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + location /api/ { + rewrite ^/api/(.*)$ /$1 break; + proxy_pass http://transpiler-api:5001; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/ui/package.json b/ui/package.json new file mode 100644 index 0000000..310f586 --- /dev/null +++ b/ui/package.json @@ -0,0 +1,31 @@ +{ + "name": "code-trans-ui", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "axios": "^1.6.0", + "lucide-react": "^0.294.0" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@vitejs/plugin-react": "^4.2.1", + "autoprefixer": "^10.4.16", + "eslint": "^8.55.0", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.5", + "postcss": "^8.4.32", + "tailwindcss": "^3.3.6", + "vite": "^5.0.8" + } +} diff --git a/ui/postcss.config.js b/ui/postcss.config.js new file mode 100644 index 0000000..2e7af2b --- /dev/null +++ b/ui/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/ui/src/App.jsx b/ui/src/App.jsx new file mode 100644 index 0000000..6c72401 --- /dev/null +++ b/ui/src/App.jsx @@ -0,0 +1,70 @@ +import { useState, useEffect } from 'react' +import CodeTranslator from './components/CodeTranslator' +import PDFUploader from './components/PDFUploader' +import Header from './components/Header' + +function App() { + const [darkMode, setDarkMode] = useState(() => { + const saved = localStorage.getItem('darkMode') + return saved !== null ? JSON.parse(saved) : true + }) + const [translationStatus, setTranslationStatus] = useState('idle') + const [sourceLanguage, setSourceLanguage] = useState('python') + const [targetLanguage, setTargetLanguage] = useState('java') + const [pdfExtractedCode, setPdfExtractedCode] = useState('') + const [isUploading, setIsUploading] = useState(false) + + useEffect(() => { + localStorage.setItem('darkMode', JSON.stringify(darkMode)) + if (darkMode) { + document.documentElement.classList.add('dark') + } else { + document.documentElement.classList.remove('dark') + } + }, [darkMode]) + + const handleTranslationStart = () => setTranslationStatus('translating') + const handleTranslationSuccess = () => { + setTranslationStatus('success') + setTimeout(() => setTranslationStatus('idle'), 4000) + } + const handleTranslationError = () => { + setTranslationStatus('error') + setTimeout(() => setTranslationStatus('idle'), 4000) + } + const handlePDFUploadSuccess = (extractedCode) => { + setPdfExtractedCode(extractedCode) + setIsUploading(false) + } + const handlePDFUploadStart = () => setIsUploading(true) + + return ( +
+
setDarkMode(d => !d)} /> + +
+ + +
+ +
+
+
+ ) +} + +export default App diff --git a/ui/src/components/CodeTranslator.jsx b/ui/src/components/CodeTranslator.jsx new file mode 100644 index 0000000..3463714 --- /dev/null +++ b/ui/src/components/CodeTranslator.jsx @@ -0,0 +1,241 @@ +import { useState, useEffect } from 'react' +import { ArrowRight, ArrowLeftRight, Copy, Check, Loader2, Sparkles, CheckCircle2, XCircle, Clock } from 'lucide-react' +import axios from 'axios' + +const LANGUAGES = ['java', 'c', 'cpp', 'python', 'rust', 'go'] + +const LANGUAGE_META = { + java: { label: 'Java', color: 'text-orange-400', bg: 'dark:bg-orange-900/20 dark:border-orange-700/40 bg-orange-50 border-orange-200' }, + c: { label: 'C', color: 'text-blue-400', bg: 'dark:bg-blue-900/20 dark:border-blue-700/40 bg-blue-50 border-blue-200' }, + cpp: { label: 'C++', color: 'text-sky-400', bg: 'dark:bg-sky-900/20 dark:border-sky-700/40 bg-sky-50 border-sky-200' }, + python: { label: 'Python', color: 'text-yellow-400', bg: 'dark:bg-yellow-900/20 dark:border-yellow-700/40 bg-yellow-50 border-yellow-200' }, + rust: { label: 'Rust', color: 'text-red-400', bg: 'dark:bg-red-900/20 dark:border-red-700/40 bg-red-50 border-red-200' }, + go: { label: 'Go', color: 'text-cyan-400', bg: 'dark:bg-cyan-900/20 dark:border-cyan-700/40 bg-cyan-50 border-cyan-200' }, +} + +const MAX_CHARS = parseInt(import.meta.env.VITE_MAX_CODE_LENGTH || '8000') +const API_URL = import.meta.env.VITE_API_URL || '/api' + +function StatusPill({ translationStatus, isUploading }) { + if (isUploading) return ( + + Extracting PDF... + + ) + if (translationStatus === 'translating') return ( + + Translating... + + ) + if (translationStatus === 'success') return ( + + Done + + ) + if (translationStatus === 'error') return ( + + Failed + + ) + return ( + + Ready + + ) +} + +function LanguageSelector({ value, onChange, label }) { + return ( +
+ {label} +
+ {LANGUAGES.map(lang => { + const meta = LANGUAGE_META[lang] + const isActive = value === lang + return ( + + ) + })} +
+
+ ) +} + +export default function CodeTranslator({ + onTranslationStart, onTranslationSuccess, onTranslationError, + translationStatus, isUploading, + pdfExtractedCode, sourceLanguage, targetLanguage, + onSourceLanguageChange, onTargetLanguageChange +}) { + const [sourceCode, setSourceCode] = useState('') + const [translatedCode, setTranslatedCode] = useState('') + const [isTranslating, setIsTranslating] = useState(false) + const [copied, setCopied] = useState(false) + const [errorMsg, setErrorMsg] = useState('') + + useEffect(() => { + if (pdfExtractedCode) setSourceCode(pdfExtractedCode) + }, [pdfExtractedCode]) + + const handleSwapLanguages = () => { + const prevSource = sourceLanguage + const prevTarget = targetLanguage + onSourceLanguageChange(prevTarget) + onTargetLanguageChange(prevSource) + if (translatedCode) { + setSourceCode(translatedCode) + setTranslatedCode('') + } + } + + const handleTranslate = async () => { + if (!sourceCode.trim()) { setErrorMsg('Please enter some code to translate.'); return } + if (sourceLanguage === targetLanguage) { setErrorMsg('Source and target languages must be different.'); return } + if (sourceCode.length > MAX_CHARS) { setErrorMsg(`Code exceeds ${MAX_CHARS.toLocaleString()} character limit.`); return } + + setErrorMsg('') + setIsTranslating(true) + onTranslationStart() + + try { + const response = await axios.post(`${API_URL}/translate`, { + source_code: sourceCode, + source_language: sourceLanguage, + target_language: targetLanguage + }) + setTranslatedCode(response.data.translated_code) + onTranslationSuccess() + } catch (error) { + console.error('Translation error:', error) + setErrorMsg(error.response?.data?.detail || 'Translation failed. Please check the backend connection.') + onTranslationError() + } finally { + setIsTranslating(false) + } + } + + const handleCopy = () => { + navigator.clipboard.writeText(translatedCode) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } + + const charCount = sourceCode.length + const overLimit = charCount > MAX_CHARS + + return ( +
+ {/* Header row */} +
+
+ +

Translate Code

+
+ +
+ + {/* Language selectors */} +
+ + + {/* Swap button */} + + + +
+ + {/* Code panels */} +
+ {/* Source */} +
+
+
+ + {LANGUAGE_META[sourceLanguage].label} + + input +
+ + {charCount.toLocaleString()} / {MAX_CHARS.toLocaleString()} + +
+