diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2f0accf --- /dev/null +++ b/.env.example @@ -0,0 +1,94 @@ +# ============================================================ +# CodeTrans — Environment Configuration +# ============================================================ + +# Backend port +BACKEND_PORT=5001 + +# ============================================================ +# Inference Provider +# ============================================================ +# "remote" — Cloud or enterprise OpenAI-compatible API (e.g. CodeLlama via gateway) +# "ollama" — Local Ollama running natively on the host machine (recommended for Mac) +INFERENCE_PROVIDER=remote + +# ============================================================ +# Option A: Remote OpenAI-compatible API (INFERENCE_PROVIDER=remote) +# ============================================================ +# INFERENCE_API_ENDPOINT: Base URL of your inference service (no /v1 suffix) +# - GenAI Gateway: https://genai-gateway.example.com +# - APISIX Gateway: https://apisix-gateway.example.com/CodeLlama-34b-Instruct-hf +INFERENCE_API_ENDPOINT=https://your-api-endpoint.com/deployment +INFERENCE_API_TOKEN=your-pre-generated-token-here +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + +# ============================================================ +# Option B: Ollama — native host inference (INFERENCE_PROVIDER=ollama) +# ============================================================ +# +# IMPORTANT — Why Ollama runs on the host, NOT in Docker: +# On macOS (Apple Silicon / M-series), running Ollama as a Docker container +# bypasses Metal GPU acceleration. The model falls back to CPU-only inference +# which is dramatically slower. Ollama must be installed natively so the Metal +# Performance Shaders (MPS) backend is used for hardware-accelerated inference. +# +# Setup: +# 1. Install Ollama: https://ollama.com/download +# 2. Pull your model (see options below) +# 3. Ollama starts automatically; confirm it is running: +# curl http://localhost:11434/api/tags +# 4. Set the variables below in your .env +# +# The backend container reaches host-side Ollama via the special DNS name +# `host.docker.internal` which Docker Desktop resolves to the Mac host. +# (On Linux with Docker Engine this requires the extra_hosts entry in docker-compose.yaml, +# which is already configured.) +# +# --- Production / high-quality translation --- +# INFERENCE_PROVIDER=ollama +# INFERENCE_API_ENDPOINT=http://host.docker.internal:11434 +# INFERENCE_MODEL_NAME=codellama:34b +# ollama pull codellama:34b # ~20 GB, best quality +# +# --- Testing / SLM performance benchmarking --- +# INFERENCE_PROVIDER=ollama +# INFERENCE_API_ENDPOINT=http://host.docker.internal:11434 +# INFERENCE_MODEL_NAME=codellama:7b +# ollama pull codellama:7b # ~4 GB, fast — use this for gauging SLM perf +# +# --- Other recommended code models --- +# ollama pull deepseek-coder:6.7b # ~4 GB, strong at code tasks +# ollama pull qwen2.5-coder:7b # ~4 GB, excellent multilingual code +# ollama pull codellama:13b # ~8 GB, good balance of speed vs quality +# +# Note: INFERENCE_API_TOKEN is not required when using Ollama. + +# ============================================================ +# LLM Settings +# ============================================================ +LLM_TEMPERATURE=0.2 +LLM_MAX_TOKENS=4096 + +# ============================================================ +# Code Translation Settings +# ============================================================ +MAX_CODE_LENGTH=8000 +MAX_FILE_SIZE=10485760 + +# ============================================================ +# CORS Configuration +# ============================================================ +CORS_ALLOW_ORIGINS=["http://localhost:5173", "http://localhost:3000"] + +# ============================================================ +# Local URL Endpoint +# ============================================================ +# Only needed if your remote API endpoint is a private domain mapped in /etc/hosts. +# Otherwise leave as "not-needed". +LOCAL_URL_ENDPOINT=not-needed + +# ============================================================ +# SSL Verification +# ============================================================ +# Set to false only for development with self-signed certificates. +VERIFY_SSL=true diff --git a/.github/workflows/code-scans.yaml b/.github/workflows/code-scans.yaml new file mode 100644 index 0000000..2029a2f --- /dev/null +++ b/.github/workflows/code-scans.yaml @@ -0,0 +1,104 @@ +name: SDLE Scans + +on: + workflow_dispatch: + inputs: + PR_number: + description: 'Pull request number' + required: true + push: + branches: [ main ] + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +concurrency: + group: sdle-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + +# ----------------------------- +# 1) Trivy Scan +# ----------------------------- + trivy_scan: + name: Trivy Vulnerability Scan + runs-on: ubuntu-latest + env: + TRIVY_REPORT_FORMAT: table + TRIVY_SCAN_TYPE: fs + TRIVY_SCAN_PATH: . + TRIVY_EXIT_CODE: '1' + TRIVY_VULN_TYPE: os,library + TRIVY_SEVERITY: CRITICAL,HIGH + steps: + - uses: actions/checkout@v4 + + - name: Create report directory + run: mkdir -p trivy-reports + + - name: Run Trivy FS Scan + uses: aquasecurity/trivy-action@0.24.0 + with: + scan-type: 'fs' + scan-ref: '.' + scanners: 'vuln,misconfig,secret,license' + ignore-unfixed: true + format: 'table' + exit-code: '1' + output: 'trivy-reports/trivy_scan_report.txt' + vuln-type: 'os,library' + severity: 'CRITICAL,HIGH' + + - name: Upload Trivy Report + uses: actions/upload-artifact@v4 + with: + name: trivy-report + path: trivy-reports/trivy_scan_report.txt + + - name: Show Trivy Report in Logs + if: failure() + run: | + echo "========= TRIVY FINDINGS =========" + cat trivy-reports/trivy_scan_report.txt + echo "=================================" + +# ----------------------------- +# 2) Bandit Scan +# ----------------------------- + bandit_scan: + name: Bandit security scan + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: 'recursive' + fetch-depth: 0 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Install Bandit + run: pip install bandit + + - name: Create Bandit configuration + shell: bash + run: | + cat > .bandit << 'EOF' + [bandit] + exclude_dirs = tests,test,venv,.venv,node_modules + skips = B101 + EOF + + - name: Run Bandit scan + run: | + bandit -r . -ll -iii -f screen + bandit -r . -ll -iii -f html -o bandit-report.html + + - name: Upload Bandit Report + uses: actions/upload-artifact@v4 + with: + name: bandit-report + path: bandit-report.html + retention-days: 30 diff --git a/.gitignore b/.gitignore index 6d7a1cd..a2b9f02 100644 --- a/.gitignore +++ b/.gitignore @@ -63,10 +63,24 @@ temp/ # Python type checker cache .mypy_cache/ +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.cache/ + # Security scan outputs bandit-*.html bandit-*.txt +# Local project references (not part of this repo) +Audify/ + +# Langfuse observability stack (local testing only, never commit) +langfuse/ +api/services/observability.py + # Reference documents (local working files, not part of this repo) *.docx *.docx.pdf diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..38c27b1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,27 @@ +# Contributing to CodeTrans + +Thank you for your interest in contributing to **CodeTrans — AI-Powered Code Translation** by Cloud2 Labs. + +## Scope of Contributions + +Appropriate contributions include: + +- Documentation improvements +- Bug fixes +- Reference architecture enhancements +- Additional LLM provider configurations +- Educational clarity and examples + +Major feature additions or architectural changes (e.g., new inference backends, +new supported languages, UI framework changes) require prior discussion with the +Cloud2 Labs maintainers. + +## Contribution Guidelines + +- Follow existing coding and documentation standards +- Avoid production-specific assumptions +- Do not introduce sensitive, proprietary, or regulated data into examples or tests +- Ensure any new environment variables are documented in `.env.example` and the README + +By submitting a contribution, you agree that your work may be used, modified, +and redistributed by Cloud2 Labs under the terms of the project license. diff --git a/DISCLAIMER.md b/DISCLAIMER.md new file mode 100644 index 0000000..2366bce --- /dev/null +++ b/DISCLAIMER.md @@ -0,0 +1,21 @@ +# Disclaimer + +This blueprint is provided by Cloud2 Labs "as is" and "as available" for +educational and demonstration purposes only. + +The **CodeTrans — AI-Powered Code Translation** blueprint is a reference +implementation and does not constitute a production-ready system or +regulatory-compliant solution. + +This software is not designed to provide professional software engineering, +legal, or compliance advice. All code translations generated by this blueprint +require independent human review and validation before use in any production +system. + +Cloud2 Labs does not assume responsibility or liability for any data loss, +security incident, service disruption, regulatory non-compliance, or adverse +outcome resulting from the use or modification of this blueprint. + +Do not submit confidential, proprietary, or sensitive source code to third-party +inference API providers (OpenAI, Groq, OpenRouter, etc.) without first reviewing +their data handling, privacy, and retention policies. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..8261ccf --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +© 2026 cld2labs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..bd23533 --- /dev/null +++ b/README.md @@ -0,0 +1,623 @@ +

+ Company Logo +

+ +# CodeTrans — AI-Powered Code Translation + +An AI-powered full-stack application that translates source code between programming languages. Paste code (or upload a PDF), pick your source and target languages, and get idiomatic translated output in seconds — powered by any OpenAI-compatible LLM endpoint or a locally running Ollama model. + +--- + +## Table of Contents + +- [CodeTrans — AI-Powered Code Translation](#codetrans--ai-powered-code-translation) + - [Table of Contents](#table-of-contents) + - [Project Overview](#project-overview) + - [How It Works](#how-it-works) + - [Architecture](#architecture) + - [Architecture Diagram](#architecture-diagram) + - [Architecture Components](#architecture-components) + - [Service Components](#service-components) + - [Typical Flow](#typical-flow) + - [Get Started](#get-started) + - [Prerequisites](#prerequisites) + - [Verify Installation](#verify-installation) + - [Quick Start (Docker Deployment)](#quick-start-docker-deployment) + - [1. Clone the Repository](#1-clone-the-repository) + - [2. Configure the Environment](#2-configure-the-environment) + - [3. Build and Start the Application](#3-build-and-start-the-application) + - [4. Access the Application](#4-access-the-application) + - [5. Verify Services](#5-verify-services) + - [6. Stop the Application](#6-stop-the-application) + - [Local Development Setup](#local-development-setup) + - [Project Structure](#project-structure) + - [Usage Guide](#usage-guide) + - [Performance Tips](#performance-tips) + - [LLM Provider Configuration](#llm-provider-configuration) + - [OpenAI](#openai) + - [Groq](#groq) + - [Ollama](#ollama) + - [OpenRouter](#openrouter) + - [Custom OpenAI-Compatible API](#custom-openai-compatible-api) + - [Switching Providers](#switching-providers) + - [Environment Variables](#environment-variables) + - [Core LLM Configuration](#core-llm-configuration) + - [Generation Parameters](#generation-parameters) + - [File Upload Limits](#file-upload-limits) + - [Session Management](#session-management) + - [Server Configuration](#server-configuration) + - [Technology Stack](#technology-stack) + - [Backend](#backend) + - [Frontend](#frontend) + - [Troubleshooting](#troubleshooting) + - [Common Issues](#common-issues) + - [Debug Mode](#debug-mode) + - [License](#license) + - [Disclaimer](#disclaimer) + +--- + +## Project Overview + +**CodeTrans** demonstrates how code-specialized large language models can be used to translate source code between programming languages. It supports six languages — Java, C, C++, Python, Rust, and Go — and works with any OpenAI-compatible inference endpoint or a locally running Ollama instance. + +This makes CodeTrans suitable for: + +- **Enterprise deployments** — connect to a GenAI Gateway or any managed LLM API +- **Air-gapped environments** — run fully offline with Ollama and a locally hosted model +- **Local experimentation** — quick setup on a laptop with GPU-accelerated inference +- **Hardware benchmarking** — measure SLM throughput on Apple Silicon, CUDA, or Intel Gaudi hardware + +--- + +## How It Works + +1. The user pastes code or uploads a PDF in the browser. +2. The React frontend sends the source code and language selection to the FastAPI backend. +3. If a PDF was uploaded, a text extraction service pulls the code out of the document. +4. The backend constructs a structured prompt and calls the configured LLM endpoint (remote API or local Ollama). +5. The LLM returns the translated code, which is displayed in the output panel. +6. The user copies the result with one click. + +All inference logic is abstracted behind a single `INFERENCE_PROVIDER` environment variable — switching between providers requires only a `.env` change and a container restart. + +--- + +## Architecture + +The application follows a modular two-service architecture with a React frontend and a FastAPI backend. The backend handles all inference orchestration, PDF extraction, and optional LLM observability tracing. The inference layer is fully pluggable — any OpenAI-compatible remote endpoint or a locally running Ollama instance can be used without any code changes. + +### Architecture Diagram + +```mermaid +graph TB + subgraph "User Interface (port 3000)" + A[React Frontend] + A1[Code Input] + A2[PDF Upload] + A3[Language Selection] + end + + subgraph "FastAPI Backend (port 5001)" + B[API Server] + C[PDF Service] + D[API Client] + end + + subgraph "Inference - Option A: Remote" + E[OpenAI / Groq / OpenRouter
Enterprise Gateway] + end + + subgraph "Inference - Option B: Local" + F[Ollama on Host
host.docker.internal:11434] + end + + A1 --> B + A2 --> B + A3 --> B + B --> C + C -->|Extracted Code| B + B --> D + D -->|INFERENCE_PROVIDER=remote| E + D -->|INFERENCE_PROVIDER=ollama| F + E -->|Translated Code| D + F -->|Translated Code| D + D --> B + B --> A + + style A fill:#e1f5ff,color:#000 + style B fill:#fff4e1,color:#000 + style E fill:#e1ffe1,color:#000 + style F fill:#f3e5f5,color:#000 +``` + +### Architecture Components + +**Frontend (React + Vite)** +- Side-by-side code editor with language pill selectors for source and target +- PDF drag-and-drop upload that populates the source panel automatically +- Real-time character counter and live status indicator +- Dark mode (default) with `localStorage` persistence and flash prevention +- One-click copy of translated output +- Nginx serves the production build and proxies all `/api/` requests to the backend + +**Backend Services** +- **API Server** (`server.py`): FastAPI application with CORS middleware, request validation, and routing +- **API Client** (`services/api_client.py`): Handles both inference paths — text completions for remote endpoints and chat completions for Ollama — with token-based auth support +- **PDF Service** (`services/pdf_service.py`): Extracts code from uploaded PDF files using pattern recognition + +**External Integration** +- **Remote inference**: Any OpenAI-compatible API (OpenAI, Groq, OpenRouter, GenAI Gateway) +- **Local inference**: Ollama running natively on the host machine, accessed from the container via `host.docker.internal:11434` + +### Service Components + +| Service | Container | Host Port | Description | +|---|---|---|---| +| `transpiler-api` | `transpiler-api` | `5001` | FastAPI backend — input validation, PDF extraction, inference orchestration | +| `transpiler-ui` | `transpiler-ui` | `3000` | React frontend — served by Nginx, proxies `/api/` to the backend | + +> **Ollama is intentionally not a Docker service.** On macOS (Apple Silicon), running Ollama in Docker bypasses Metal GPU (MPS) acceleration, resulting in CPU-only inference. Ollama must run natively on the host so the backend container can reach it via `host.docker.internal:11434`. + +### Typical Flow + +1. User enters code or uploads a PDF in the web UI. +2. The backend validates the input; PDF text is extracted if needed. +3. The backend calls the configured inference endpoint (remote API or Ollama). +4. The model returns translated code, which is displayed in the right panel. +5. User copies the result with one click. + +--- + +## Get Started + +### Prerequisites + +Before you begin, ensure you have the following installed and configured: + +- **Docker and Docker Compose** (v2) + - [Install Docker](https://docs.docker.com/get-docker/) + - [Install Docker Compose](https://docs.docker.com/compose/install/) +- An inference endpoint — one of: + - A remote OpenAI-compatible API key (OpenAI, Groq, OpenRouter, or enterprise gateway) + - [Ollama](https://ollama.com/download) installed natively on the host machine + +#### Verify Installation + +```bash +docker --version +docker compose version +docker ps +``` + +### Quick Start (Docker Deployment) + +#### 1. Clone the Repository + +```bash +git clone https://github.com/cld2labs/CodeTrans.git +cd CodeTrans +``` + +#### 2. Configure the Environment + +```bash +cp .env.example .env +``` + +Open `.env` and set `INFERENCE_PROVIDER` plus the corresponding variables for your chosen provider. See [LLM Provider Configuration](#llm-provider-configuration) for per-provider instructions. + +#### 3. Build and Start the Application + +```bash +# Standard (attached) +docker compose up --build + +# Detached (background) +docker compose up -d --build +``` + +#### 4. Access the Application + +Once containers are running: + +- **Frontend UI**: http://localhost:3000 +- **Backend API**: http://localhost:5001 +- **API Docs (Swagger)**: http://localhost:5001/docs + +#### 5. Verify Services + +```bash +# Health check +curl http://localhost:5001/health + +# View running containers +docker compose ps +``` + +**View logs:** + +```bash +# All services +docker compose logs -f + +# Backend only +docker compose logs -f transpiler-api + +# Frontend only +docker compose logs -f transpiler-ui +``` + +#### 6. Stop the Application + +```bash +docker compose down +``` + +### Local Development Setup + +Run the backend and frontend directly on the host without Docker. + +**Backend (Python / FastAPI)** + +```bash +cd api +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate +pip install -r requirements.txt +cp ../.env.example ../.env # configure your .env at the repo root +uvicorn server:app --reload --port 5001 +``` + +**Frontend (Node / Vite)** + +```bash +cd ui +npm install +npm run dev +``` + +The Vite dev server proxies `/api/` to `http://localhost:5001`. Open http://localhost:5173. + +--- + +## Project Structure + +``` +CodeTrans/ +├── api/ # FastAPI backend +│ ├── config.py # All environment-driven settings +│ ├── models.py # Pydantic request/response schemas +│ ├── server.py # FastAPI app, routes, and middleware +│ ├── services/ +│ │ ├── api_client.py # LLM inference client (remote + Ollama) +│ │ └── pdf_service.py # PDF text and code extraction +│ ├── Dockerfile +│ └── requirements.txt +├── ui/ # React frontend +│ ├── src/ +│ │ ├── App.jsx +│ │ ├── components/ +│ │ │ ├── CodeTranslator.jsx # Main editor panel +│ │ │ ├── Header.jsx +│ │ │ ├── PDFUploader.jsx +│ │ │ └── StatusBar.jsx +│ │ └── main.jsx +│ ├── Dockerfile +│ └── vite.config.js +├── docs/ +│ └── assets/ # Documentation images +├── docker-compose.yaml # Main orchestration file +├── .env.example # Environment variable reference +└── README.md +``` + +--- + +## Usage Guide + +**Translate code:** + +1. Open the application at http://localhost:3000. +2. Select the source language using the pill buttons at the top-left. +3. Select the target language using the pill buttons at the top-right. +4. Paste or type your code in the left panel. +5. Click **Translate Code**. +6. View the result in the right panel and click **Copy** to copy it to the clipboard. + +**Upload a PDF:** + +1. Scroll to the **Upload PDF** section below the code panels. +2. Drag and drop a PDF file, or click to browse. +3. Code is extracted automatically and placed in the source panel. +4. Select your languages and translate as normal. + +**Dark mode:** + +The app defaults to dark mode. Click the theme toggle in the header to switch to light mode. Your preference is saved in `localStorage`. + +--- + +## Performance Tips + +- **Use the largest model your hardware can sustain.** `codellama:34b` produces the best translation quality; `codellama:7b` is faster and good for benchmarking. +- **Lower `LLM_TEMPERATURE`** (e.g., `0.1`) for more deterministic, literal translations. Raise it slightly (e.g., `0.3–0.5`) if you want more idiomatic rewrites. +- **Keep inputs under `MAX_CODE_LENGTH`.** Shorter, focused snippets translate more accurately than entire files. Split large files by class or function. +- **On Apple Silicon**, always run Ollama natively — never inside Docker. The MPS (Metal) GPU backend delivers 5–10x the throughput of CPU-only inference. +- **On Linux with an NVIDIA GPU**, set `CUDA_VISIBLE_DEVICES` before starting Ollama to target a specific GPU. +- **For enterprise remote APIs**, choose a model with a large context window (≥16k tokens) to avoid truncation on longer inputs. + +--- + +## LLM Provider Configuration + +All providers are configured via the `.env` file. Set `INFERENCE_PROVIDER=remote` for any cloud or API-based provider, and `INFERENCE_PROVIDER=ollama` for local inference. + +### OpenAI + +```bash +INFERENCE_PROVIDER=remote +INFERENCE_API_ENDPOINT=https://api.openai.com +INFERENCE_API_TOKEN=sk-... +INFERENCE_MODEL_NAME=gpt-4o +``` + +Recommended models: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`. + +### Groq + +Groq provides OpenAI-compatible endpoints with extremely fast inference (LPU hardware). + +```bash +INFERENCE_PROVIDER=remote +INFERENCE_API_ENDPOINT=https://api.groq.com/openai +INFERENCE_API_TOKEN=gsk_... +INFERENCE_MODEL_NAME=llama3-70b-8192 +``` + +Recommended models: `llama3-70b-8192`, `mixtral-8x7b-32768`, `llama-3.1-8b-instant`. + +### Ollama + +Runs inference locally on the host machine with full GPU acceleration. + +1. Install Ollama: https://ollama.com/download +2. Pull a model: + + ```bash + # Production — best translation quality (~20 GB) + ollama pull codellama:34b + + # Testing / SLM benchmarking (~4 GB, fast) + ollama pull codellama:7b + + # Other strong code models + ollama pull deepseek-coder:6.7b + ollama pull qwen2.5-coder:7b + ollama pull codellama:13b + ``` + +3. Confirm Ollama is running: + + ```bash + curl http://localhost:11434/api/tags + ``` + +4. Configure `.env`: + + ```bash + INFERENCE_PROVIDER=ollama + INFERENCE_API_ENDPOINT=http://host.docker.internal:11434 + INFERENCE_MODEL_NAME=codellama:7b + # INFERENCE_API_TOKEN is not required for Ollama + ``` + +### OpenRouter + +OpenRouter provides a unified API across hundreds of models from different providers. + +```bash +INFERENCE_PROVIDER=remote +INFERENCE_API_ENDPOINT=https://openrouter.ai/api +INFERENCE_API_TOKEN=sk-or-... +INFERENCE_MODEL_NAME=meta-llama/llama-3.1-70b-instruct +``` + +Recommended models: `meta-llama/llama-3.1-70b-instruct`, `deepseek/deepseek-coder`, `qwen/qwen-2.5-coder-32b-instruct`. + +### Custom OpenAI-Compatible API + +Any enterprise gateway that exposes an OpenAI-compatible `/v1/completions` or `/v1/chat/completions` endpoint works without code changes. + +**GenAI Gateway (LiteLLM-backed):** + +```bash +INFERENCE_PROVIDER=remote +INFERENCE_API_ENDPOINT=https://genai-gateway.example.com +INFERENCE_API_TOKEN=your-litellm-master-key +INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf +``` + +If the endpoint uses a private domain mapped in `/etc/hosts`, also set: + +```bash +LOCAL_URL_ENDPOINT=your-private-domain.internal +``` + +### Switching Providers + +1. Edit `.env` with the new provider's values. +2. Restart the backend container: + + ```bash + docker compose restart transpiler-api + ``` + +No rebuild is needed — all settings are injected at runtime via environment variables. + +--- + +## Environment Variables + +All variables are defined in `.env` (copied from `.env.example`). The backend reads them at startup via `python-dotenv`. + +### Core LLM Configuration + +| Variable | Description | Default | Type | +|---|---|---|---| +| `INFERENCE_PROVIDER` | `remote` for any OpenAI-compatible API; `ollama` for local inference | `remote` | string | +| `INFERENCE_API_ENDPOINT` | Base URL of the inference service (no `/v1` suffix) | — | string | +| `INFERENCE_API_TOKEN` | Bearer token / API key. Not required for Ollama | — | string | +| `INFERENCE_MODEL_NAME` | Model identifier passed to the API | `codellama/CodeLlama-34b-Instruct-hf` | string | + +### Generation Parameters + +| Variable | Description | Default | Type | +|---|---|---|---| +| `LLM_TEMPERATURE` | Sampling temperature. Lower = more deterministic output (0.0–2.0) | `0.2` | float | +| `LLM_MAX_TOKENS` | Maximum tokens in the translated output | `4096` | integer | +| `MAX_CODE_LENGTH` | Maximum input code length in characters | `4000` | integer | + +### File Upload Limits + +| Variable | Description | Default | Type | +|---|---|---|---| +| `MAX_FILE_SIZE` | Maximum PDF upload size in bytes (default: 10 MB) | `10485760` | integer | + +### Session Management + +| Variable | Description | Default | Type | +|---|---|---|---| +| `CORS_ALLOW_ORIGINS` | Allowed CORS origins (comma-separated or `*`). Restrict in production | `["*"]` | string | + +### Server Configuration + +| Variable | Description | Default | Type | +|---|---|---|---| +| `BACKEND_PORT` | Port the FastAPI server listens on | `5001` | integer | +| `LOCAL_URL_ENDPOINT` | Private domain in `/etc/hosts` the container must resolve. Leave as `not-needed` if not applicable | `not-needed` | string | +| `VERIFY_SSL` | Set `false` only for environments with self-signed certificates | `true` | boolean | + +--- + +## Technology Stack + +### Backend + +- **Framework**: FastAPI (Python 3.11+) with Uvicorn ASGI server +- **LLM Integration**: `openai` Python SDK — works with any OpenAI-compatible endpoint (remote or Ollama) +- **Local Inference**: Ollama — runs natively on host with full Metal (MPS) or CUDA GPU acceleration +- **PDF Processing**: PyMuPDF (`fitz`) for text and code extraction from uploaded documents +- **Config Management**: `python-dotenv` for environment variable injection at startup +- **Data Validation**: Pydantic v2 for request/response schema enforcement + +### Frontend + +- **Framework**: React 18 with Vite (fast HMR and production bundler) +- **Styling**: Tailwind CSS v3 with custom `surface-*` dark mode color palette +- **Production Server**: Nginx — serves the built assets and proxies `/api/` to the backend container +- **UI Features**: Language pill selectors, side-by-side code editor, drag-and-drop PDF upload, real-time character counter, one-click copy, dark/light theme toggle + +--- + +## Troubleshooting + +For common issues and solutions, see [TROUBLESHOOTING.md](./TROUBLESHOOTING.md). + +### Common Issues + +**Issue: Backend returns 500 on translate** + +```bash +# Check backend logs for error details +docker compose logs backend + +# Verify the inference endpoint and token are set correctly +grep INFERENCE .env +``` + +- Confirm `INFERENCE_API_ENDPOINT` is reachable from your machine. +- Verify `INFERENCE_API_TOKEN` is valid and has the correct permissions. + +**Issue: Ollama connection refused** + +```bash +# Confirm Ollama is running on the host +curl http://localhost:11434/api/tags + +# If not running, start it +ollama serve +``` + +**Issue: Ollama is slow / appears to be CPU-only** + +- Ensure Ollama is running natively on the host, **not** inside Docker. +- On macOS, verify the Ollama app is using MPS in Activity Monitor (GPU History). +- See the [Ollama](#ollama) section for correct setup. + +**Issue: SSL certificate errors** + +```bash +# In .env +VERIFY_SSL=false + +# Restart the backend +docker compose restart transpiler-api +``` + +**Issue: PDF upload fails or returns no code** + +- Max file size: 10 MB (`MAX_FILE_SIZE`) +- Supported format: PDF only (text-based; scanned image PDFs are not supported) +- Ensure the file is not corrupted or password-protected + +**Issue: Frontend cannot connect to API** + +```bash +# Verify both containers are running +docker compose ps + +# Check CORS settings +grep CORS .env +``` + +Ensure `CORS_ALLOW_ORIGINS` includes the frontend origin (e.g., `http://localhost:3000`). + +**Issue: Private domain not resolving inside container** + +Set `LOCAL_URL_ENDPOINT=your-private-domain.internal` in `.env` — this adds the host-gateway mapping for the container. + +### Debug Mode + +Enable verbose logging for deeper inspection: + +```bash +# Not a built-in env var — increase FastAPI log level via Uvicorn +# Edit docker-compose.yaml command or run locally: +uvicorn server:app --reload --port 5001 --log-level debug +``` + +Or view real-time container logs: + +```bash +docker compose logs -f transpiler-api +``` + +--- + +## License + +This project is licensed under our [LICENSE](./LICENSE.md) file for details. + +--- + +## Disclaimer + +**CodeTrans** is provided as-is for demonstration and educational purposes. While we strive for accuracy: + +- Translated code should be reviewed by a qualified engineer before use in production systems +- Do not rely solely on AI-generated translations without testing and validation +- Do not submit confidential or proprietary code to third-party API providers without reviewing their data handling policies +- The quality of translation depends on the underlying model and may vary across language pairs and code complexity + +For full disclaimer details, see [DISCLAIMER.md](./DISCLAIMER.md). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..db61969 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,34 @@ +# Security Policy + +The **CodeTrans — AI-Powered Code Translation** blueprint does not include +production-grade security controls. + +This repository is not secure by default and must not be used in production +without a comprehensive security review. + +## Known Considerations + +- **API tokens**: `INFERENCE_API_TOKEN` is loaded from `.env`. + Never commit `.env` to version control. +- **CORS**: `CORS_ALLOW_ORIGINS` defaults to `*`. Restrict to specific origins in + any non-local deployment. +- **SSL verification**: `VERIFY_SSL=false` disables certificate validation. Only + use this in controlled development environments. +- **Source code privacy**: Code submitted for translation is sent to the configured + inference endpoint. Do not use third-party cloud APIs with proprietary or + sensitive source code. + +## User Responsibilities + +Users are responsible for implementing appropriate: + +- Authentication and authorization mechanisms +- Encryption and secure data storage +- Network-level access controls and firewall rules +- Monitoring, logging, and auditing +- Regulatory and compliance safeguards relevant to their deployment environment + +## Reporting a Vulnerability + +If you discover a security vulnerability in this blueprint, please report it +privately to the Cloud2 Labs maintainers rather than opening a public issue. diff --git a/TERMS_AND_CONDITIONS.md b/TERMS_AND_CONDITIONS.md new file mode 100644 index 0000000..ec8b053 --- /dev/null +++ b/TERMS_AND_CONDITIONS.md @@ -0,0 +1,17 @@ +# Terms and Conditions + +This repository contains the **CodeTrans — AI-Powered Code Translation** blueprint +maintained by Cloud2 Labs. + +By accessing or using this blueprint, you acknowledge and agree that: + +- This blueprint is provided solely for educational and demonstration purposes +- You are solely responsible for deployment, configuration, and usage +- You are responsible for all data handling, security controls, and compliance +- You are responsible for reviewing the data handling and privacy policies of any + third-party inference API provider (OpenAI, Groq, OpenRouter, etc.) before + submitting code to their endpoints +- Cloud2 Labs provides no warranties or guarantees of any kind + +Cloud2 Labs does not support or recommend production deployment of this blueprint +without a thorough security review and appropriate hardening. diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 0000000..4ed2c93 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,131 @@ +# Troubleshooting Guide + +This document contains all common issues encountered during development and their solutions. + +## Table of Contents + +- [API Common Issues](#api-common-issues) +- [UI Common Issues](#ui-common-issues) + +### API Common Issues + +#### "API client not initialized. Check inference API configuration." + +**Solution**: + +1. Create a `.env` file in the root directory +2. Add your inference API credentials: + ``` + INFERENCE_API_ENDPOINT=https://your-api-endpoint.com/deployment + INFERENCE_API_TOKEN=your-pre-generated-token-here + INFERENCE_MODEL_NAME=codellama/CodeLlama-34b-Instruct-hf + ``` +3. Restart the server + +#### "Code too long. Maximum length is 4000 characters" + +**Solution**: + +- The limit exists due to model context window constraints + - CodeLlama-34b on Enterprise Inference has a max token limit of 5196 + - 4000 characters stays safely under the token limit including prompt overhead +- Break your code into smaller modules +- Translate one class or function at a time +- Or adjust `MAX_CODE_LENGTH` in `.env` if your deployment supports higher limits + +#### "Source language not supported" + +**Solution**: + +- Only 6 languages are supported: Java, C, C++, Python, Rust, Go +- Check the `/languages` endpoint for the current list +- Ensure language names are lowercase (e.g., "python" not "Python") + +#### Import errors + +**Solution**: + +1. Ensure all dependencies are installed: `pip install -r requirements.txt` +2. Verify you're using Python 3.10 or higher: `python --version` +3. Activate your virtual environment if using one + +#### Server won't start + +**Solution**: + +1. Check if port 5001 is already in use: `lsof -i :5001` (Unix) or `netstat -ano | findstr :5001` (Windows) +2. Use a different port by updating `BACKEND_PORT` in `.env` +3. Check the logs for specific error messages + +#### PDF upload fails + +**Solution**: + +1. Verify the file is a valid PDF +2. Check file size (must be under 10MB by default) +3. Ensure the PDF contains extractable text (not just images) +4. Check server logs for detailed error messages + +#### Translation returns empty result + +**Solution**: + +1. Verify inference API authentication is working (check `/health` endpoint) +2. Check if the model endpoint is accessible +3. Verify INFERENCE_API_TOKEN is valid and not expired +4. Try with simpler code first +5. Check server logs for API errors + +#### "No module named 'pypdf'" + +**Solution**: + +```bash +pip install pypdf +``` + +## UI Common Issues + +### API Connection Issues + +**Problem**: "Failed to translate" or "Failed to upload PDF" + +**Solution**: + +1. Ensure the API server is running on `http://localhost:5001` +2. Check browser console for detailed errors +3. Verify CORS is enabled in the API +4. Test API directly: `curl http://localhost:5001/health` + +### Build Issues + +**Problem**: Build fails with dependency errors + +**Solution**: + +```bash +# Clear node_modules and reinstall +rm -rf node_modules package-lock.json +npm install +``` + +### Styling Issues + +**Problem**: Styles not applying + +**Solution**: + +```bash +# Rebuild Tailwind CSS +npm run dev +``` + +### Character Counter Not Updating + +**Problem**: Character counter shows 0 / 4,000 even with code + +**Solution**: + +1. Clear browser cache +2. Hard refresh (Ctrl+Shift+R or Cmd+Shift+R) +3. Restart the dev server diff --git a/api/.dockerignore b/api/.dockerignore new file mode 100644 index 0000000..bd6b932 --- /dev/null +++ b/api/.dockerignore @@ -0,0 +1,29 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +.Python +env/ +venv/ +.venv/ +pip-log.txt +pip-delete-this-directory.txt +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.git +.gitignore +.mypy_cache +.pytest_cache +.hypothesis +*.swp +*.swo +*~ +.DS_Store +.env +.env.local diff --git a/api/Dockerfile b/api/Dockerfile new file mode 100644 index 0000000..c431d5c --- /dev/null +++ b/api/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy requirements first for better caching +COPY requirements.txt . + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . . + +# Create a non-root user and change ownership +RUN useradd -m -u 1000 appuser && \ + chown -R appuser:appuser /app + +# Switch to non-root user +USER appuser + +# Expose port +EXPOSE 5001 + +# Run the application +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "5001"] diff --git a/api/config.py b/api/config.py new file mode 100644 index 0000000..7fccfa8 --- /dev/null +++ b/api/config.py @@ -0,0 +1,49 @@ +""" +Configuration settings for Code Translation API +""" + +import os +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Inference Provider: "remote" (OpenAI-compatible) or "ollama" (local) +INFERENCE_PROVIDER = os.getenv("INFERENCE_PROVIDER", "remote") + +# Inference API Configuration +INFERENCE_API_ENDPOINT = os.getenv( + "INFERENCE_API_ENDPOINT", + "http://host.docker.internal:11434" if os.getenv("INFERENCE_PROVIDER", "remote") == "ollama" else None +) +INFERENCE_API_TOKEN = os.getenv("INFERENCE_API_TOKEN") +INFERENCE_MODEL_NAME = os.getenv( + "INFERENCE_MODEL_NAME", + "codellama:34b" if os.getenv("INFERENCE_PROVIDER", "remote") == "ollama" else "codellama/CodeLlama-34b-Instruct-hf" +) + +# Application Settings +APP_TITLE = "CodeTrans API" +APP_DESCRIPTION = "AI-powered code translation service" +APP_VERSION = "2.0.0" + +# File Upload Settings +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB +ALLOWED_EXTENSIONS = {".pdf"} + +# Code Translation Settings +SUPPORTED_LANGUAGES = ["java", "c", "cpp", "python", "rust", "go"] +# MAX_CODE_LENGTH: For Enterprise Inference with CodeLlama-34b (max tokens: 5196) +# Set to 4000 characters to stay safely under the token limit with prompt overhead +MAX_CODE_LENGTH = int(os.getenv("MAX_CODE_LENGTH", "4000")) # characters +LLM_TEMPERATURE = float(os.getenv("LLM_TEMPERATURE", "0.2")) # Lower temperature for more deterministic code generation +LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", "4096")) + +# SSL Verification Settings +VERIFY_SSL = os.getenv("VERIFY_SSL", "true").lower() == "true" + +# CORS Settings +CORS_ALLOW_ORIGINS = ["*"] # Update with specific origins in production +CORS_ALLOW_CREDENTIALS = True +CORS_ALLOW_METHODS = ["*"] +CORS_ALLOW_HEADERS = ["*"] diff --git a/api/models.py b/api/models.py new file mode 100644 index 0000000..3e00230 --- /dev/null +++ b/api/models.py @@ -0,0 +1,69 @@ +""" +Pydantic models for request/response validation +""" + +from pydantic import BaseModel, Field +from typing import Optional + + +class TranslateRequest(BaseModel): + """Request model for code translation""" + source_code: str = Field(..., min_length=1, description="Source code to translate") + source_language: str = Field(..., description="Source programming language") + target_language: str = Field(..., description="Target programming language") + + class Config: + json_schema_extra = { + "example": { + "source_code": "def hello():\n print('Hello World')", + "source_language": "python", + "target_language": "java" + } + } + + +class TranslateResponse(BaseModel): + """Response model for code translation""" + translated_code: str = Field(..., description="Translated code") + source_language: str = Field(..., description="Source language") + target_language: str = Field(..., description="Target language") + original_code: str = Field(..., description="Original source code") + + class Config: + json_schema_extra = { + "example": { + "translated_code": "public class Main {\n public static void main(String[] args) {\n System.out.println(\"Hello World\");\n }\n}", + "source_language": "python", + "target_language": "java", + "original_code": "def hello():\n print('Hello World')" + } + } + + +class UploadPdfResponse(BaseModel): + """Response model for PDF upload""" + message: str = Field(..., description="Success message") + extracted_code: str = Field(..., description="Extracted code from PDF") + status: str = Field(..., description="Operation status") + + class Config: + json_schema_extra = { + "example": { + "message": "Successfully extracted code from 'code.pdf'", + "extracted_code": "def hello():\n print('Hello World')", + "status": "success" + } + } + + +class HealthResponse(BaseModel): + """Response model for health check""" + status: str = Field(..., description="Health status") + model_configured: bool = Field(..., description="Whether model is configured") + inference_authenticated: bool = Field(..., description="Whether inference API auth is successful") + inference_provider: Optional[str] = Field(None, description="Active inference provider (remote or ollama)") + + +class SupportedLanguagesResponse(BaseModel): + """Response model for supported languages""" + languages: list[str] = Field(..., description="List of supported programming languages") diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 0000000..e18ac5f --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,9 @@ +fastapi==0.115.5 +uvicorn==0.32.1 +pydantic==2.10.3 +pydantic-settings==2.6.1 +python-multipart>=0.0.18 +requests==2.32.3 +httpx==0.28.1 +openai==1.57.2 +pypdf==6.1.1 diff --git a/api/server.py b/api/server.py new file mode 100644 index 0000000..40f4c46 --- /dev/null +++ b/api/server.py @@ -0,0 +1,234 @@ +""" +FastAPI server with routes for Code Translation API +""" + +import os +import tempfile +import logging +from contextlib import asynccontextmanager +from fastapi import FastAPI, File, UploadFile, HTTPException, status +from fastapi.middleware.cors import CORSMiddleware + +import config +from models import ( + TranslateRequest, TranslateResponse, UploadPdfResponse, + HealthResponse, SupportedLanguagesResponse +) +from services import ( + get_api_client, extract_code_from_pdf, validate_pdf_file +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Lifespan context manager for FastAPI app""" + # Startup + try: + api_client = get_api_client() + app.state.api_client = api_client + logger.info("API client initialized with inference endpoint") + except Exception as e: + logger.error(f"Failed to initialize API client: {str(e)}") + app.state.api_client = None + + yield + + # Shutdown + logger.info("Shutting down Code Translation API") + + +# Initialize FastAPI app +app = FastAPI( + title=config.APP_TITLE, + description=config.APP_DESCRIPTION, + version=config.APP_VERSION, + lifespan=lifespan +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=config.CORS_ALLOW_ORIGINS, + allow_credentials=config.CORS_ALLOW_CREDENTIALS, + allow_methods=config.CORS_ALLOW_METHODS, + allow_headers=config.CORS_ALLOW_HEADERS, +) + + +# ==================== Routes ==================== + +@app.get("/") +def root(): + """Root endpoint""" + return { + "message": "Code Translation API is running", + "version": config.APP_VERSION, + "status": "healthy", + "api_client_authenticated": app.state.api_client is not None + } + + +@app.get("/health", response_model=HealthResponse) +def health_check(): + """Detailed health check""" + return HealthResponse( + status="healthy", + model_configured=bool(config.INFERENCE_MODEL_NAME), + inference_authenticated=app.state.api_client is not None and app.state.api_client.is_authenticated(), + inference_provider=config.INFERENCE_PROVIDER + ) + + +@app.get("/languages", response_model=SupportedLanguagesResponse) +def get_supported_languages(): + """Get list of supported programming languages""" + return SupportedLanguagesResponse( + languages=config.SUPPORTED_LANGUAGES + ) + + +@app.post("/translate", response_model=TranslateResponse) +def translate_code_endpoint(request: TranslateRequest): + """ + Translate code from one language to another + + - **source_code**: Code to translate + - **source_language**: Source programming language (java, c, cpp, python, rust, go) + - **target_language**: Target programming language (java, c, cpp, python, rust, go) + """ + if not app.state.api_client: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="API client not initialized. Check inference API configuration." + ) + + # Validate languages + if request.source_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Source language '{request.source_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + if request.target_language.lower() not in config.SUPPORTED_LANGUAGES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Target language '{request.target_language}' not supported. Supported: {', '.join(config.SUPPORTED_LANGUAGES)}" + ) + + # Check code length + if len(request.source_code) > config.MAX_CODE_LENGTH: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Code too long. Maximum length is {config.MAX_CODE_LENGTH} characters" + ) + + try: + logger.info(f"Translating code from {request.source_language} to {request.target_language}") + + # Translate code using API client + translated_code = app.state.api_client.translate_code( + source_code=request.source_code, + source_lang=request.source_language, + target_lang=request.target_language + ) + + if not translated_code: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Translation failed. No output received from model." + ) + + logger.info(f"Successfully translated code") + + return TranslateResponse( + translated_code=translated_code, + source_language=request.source_language, + target_language=request.target_language, + original_code=request.source_code + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error translating code: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error translating code: {str(e)}" + ) + + +@app.post("/upload-pdf", response_model=UploadPdfResponse) +async def upload_pdf(file: UploadFile = File(...)): + """ + Upload a PDF file and extract code from it + + - **file**: PDF file containing code (max 10MB) + """ + tmp_path = None + try: + # Read file content + content = await file.read() + file_size = len(content) + + # Validate file + validate_pdf_file(file.filename, file_size, config.MAX_FILE_SIZE) + + logger.info(f"Processing PDF: {file.filename} ({file_size / 1024:.2f} KB)") + + # Save to temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: + tmp.write(content) + tmp_path = tmp.name + logger.info(f"Saved to temporary path: {tmp_path}") + + # Extract code from PDF + extracted_code = extract_code_from_pdf(tmp_path) + + if not extracted_code.strip(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No code content could be extracted from the PDF" + ) + + logger.info(f"Successfully extracted code from PDF: {file.filename}") + + return UploadPdfResponse( + message=f"Successfully extracted code from '{file.filename}'", + extracted_code=extracted_code, + status="success" + ) + + except HTTPException: + raise + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + logger.error(f"Error processing PDF: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error processing PDF: {str(e)}" + ) + finally: + # Clean up temporary file + if tmp_path and os.path.exists(tmp_path): + try: + os.remove(tmp_path) + logger.info(f"Cleaned up temporary file: {tmp_path}") + except Exception as e: + logger.warning(f"Could not remove temporary file: {str(e)}") + + +# Entry point for running with uvicorn +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5001) # nosec B104 - Binding to all interfaces is intentional for Docker container diff --git a/api/services/__init__.py b/api/services/__init__.py new file mode 100644 index 0000000..223ab23 --- /dev/null +++ b/api/services/__init__.py @@ -0,0 +1,13 @@ +""" +Services module exports +""" + +from .api_client import get_api_client, APIClient +from .pdf_service import extract_code_from_pdf, validate_pdf_file + +__all__ = [ + 'get_api_client', + 'APIClient', + 'extract_code_from_pdf', + 'validate_pdf_file' +] diff --git a/api/services/api_client.py b/api/services/api_client.py new file mode 100644 index 0000000..7913dd5 --- /dev/null +++ b/api/services/api_client.py @@ -0,0 +1,141 @@ +""" +API Client for inference API calls - supports remote OpenAI-compatible APIs and local Ollama +""" + +import logging +import httpx +from typing import Optional +import config + +logger = logging.getLogger(__name__) + + +class APIClient: + """ + Client for handling inference API calls. + Supports remote OpenAI-compatible APIs (e.g. CodeLlama via enterprise gateway) + and local Ollama instances. + """ + + def __init__(self): + self.endpoint = config.INFERENCE_API_ENDPOINT + self.token = config.INFERENCE_API_TOKEN + self.provider = config.INFERENCE_PROVIDER + # Ollama doesn't need auth; use dummy key for OpenAI client compatibility + self.http_client = httpx.Client(verify=config.VERIFY_SSL) + + def get_inference_client(self): + """ + Get OpenAI-compatible client configured for the active provider + """ + from openai import OpenAI + + api_key = self.token if self.token else "ollama" + return OpenAI( + api_key=api_key, + base_url=f"{self.endpoint}/v1", + http_client=self.http_client + ) + + def translate_code(self, source_code: str, source_lang: str, target_lang: str) -> str: + """ + Translate code from one language to another. + + Uses text completions for remote providers (e.g. CodeLlama enterprise gateway) + and chat completions for Ollama (more reliable with local models). + """ + client = self.get_inference_client() + + if self.provider == "ollama": + return self._translate_via_chat(client, source_code, source_lang, target_lang) + else: + return self._translate_via_completions(client, source_code, source_lang, target_lang) + + def _translate_via_completions(self, client, source_code: str, source_lang: str, target_lang: str) -> str: + """Text completions endpoint - for remote OpenAI-compatible gateways""" + prompt = f"""Translate the following {source_lang} code to {target_lang}. +Only output the translated code without any explanations or markdown formatting. + +{source_lang} code: +``` +{source_code} +``` + +{target_lang} code: +```""" + + logger.info(f"[remote] Translating {source_lang} → {target_lang} via completions") + + response = client.completions.create( + model=config.INFERENCE_MODEL_NAME, + prompt=prompt, + max_tokens=config.LLM_MAX_TOKENS, + temperature=config.LLM_TEMPERATURE, + stop=["```"] + ) + + if hasattr(response, 'choices') and response.choices: + translated = response.choices[0].text.strip() + logger.info(f"Translation complete ({len(translated)} chars)") + return translated + + logger.error(f"Unexpected completions response: {response}") + return "" + + def _translate_via_chat(self, client, source_code: str, source_lang: str, target_lang: str) -> str: + """Chat completions endpoint - for Ollama local inference""" + system_prompt = ( + "You are an expert code translator. " + "When asked to translate code, output ONLY the translated code with no explanations, " + "no markdown fences, and no comments unless they were in the original." + ) + user_prompt = ( + f"Translate this {source_lang} code to {target_lang}:\n\n{source_code}" + ) + + logger.info(f"[ollama] Translating {source_lang} → {target_lang} via chat completions") + + response = client.chat.completions.create( + model=config.INFERENCE_MODEL_NAME, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + max_tokens=config.LLM_MAX_TOKENS, + temperature=config.LLM_TEMPERATURE, + ) + + if hasattr(response, 'choices') and response.choices: + content = response.choices[0].message.content or "" + # Strip markdown fences if the model still adds them + translated = content.strip() + if translated.startswith("```"): + lines = translated.split("\n") + translated = "\n".join(lines[1:]) + if translated.endswith("```"): + translated = translated[: translated.rfind("```")].rstrip() + logger.info(f"Translation complete ({len(translated)} chars)") + return translated + + logger.error(f"Unexpected chat response: {response}") + return "" + + def is_authenticated(self) -> bool: + """For Ollama, always returns True (no auth needed). For remote, checks token.""" + if self.provider == "ollama": + return True + return self.token is not None + + def __del__(self): + if self.http_client: + self.http_client.close() + + +_api_client: Optional[APIClient] = None + + +def get_api_client() -> APIClient: + global _api_client + if _api_client is None: + _api_client = APIClient() + return _api_client diff --git a/api/services/pdf_service.py b/api/services/pdf_service.py new file mode 100644 index 0000000..abf857e --- /dev/null +++ b/api/services/pdf_service.py @@ -0,0 +1,128 @@ +""" +PDF Code Extraction Service +Extracts code snippets from PDF documents +""" + +import logging +import re +from pathlib import Path +from typing import List +from pypdf import PdfReader + +logger = logging.getLogger(__name__) + + +def extract_code_from_pdf(pdf_path: str) -> str: + """ + Extract code content from a PDF file + + Args: + pdf_path: Path to the PDF file + + Returns: + Extracted code as string + + Raises: + Exception if PDF cannot be processed + """ + try: + logger.info(f"Extracting code from PDF: {pdf_path}") + + with open(pdf_path, 'rb') as file: + pdf_reader = PdfReader(file) + num_pages = len(pdf_reader.pages) + + logger.info(f"PDF has {num_pages} pages") + + # Extract text from all pages + all_text = "" + for page_num in range(num_pages): + page = pdf_reader.pages[page_num] + text = page.extract_text() + all_text += text + "\n" + + logger.info(f"Extracted {len(all_text)} characters from PDF") + + # Try to identify and extract code blocks + # Look for common code patterns + code_content = extract_code_patterns(all_text) + + if not code_content.strip(): + # If no code patterns found, return all text + code_content = all_text + + logger.info(f"Extracted code content: {len(code_content)} characters") + + return code_content.strip() + + except Exception as e: + logger.error(f"Error extracting code from PDF: {str(e)}", exc_info=True) + raise Exception(f"Failed to extract code from PDF: {str(e)}") + + +def extract_code_patterns(text: str) -> str: + """ + Extract code patterns from text + + Args: + text: Text content to search + + Returns: + Extracted code snippets + """ + # Look for code between common delimiters + code_blocks = [] + + # Pattern 1: Code between ``` markers + markdown_code = re.findall(r'```[\w]*\n(.*?)\n```', text, re.DOTALL) + code_blocks.extend(markdown_code) + + # Pattern 2: Indented code blocks (4+ spaces) + indented_code = re.findall(r'(?:^ .+$)+', text, re.MULTILINE) + code_blocks.extend(indented_code) + + # Pattern 3: Code with common keywords (class, def, function, etc.) + keyword_patterns = [ + r'(?:public|private|protected)?\s*class\s+\w+.*?\{.*?\}', # Java/C++ classes + r'def\s+\w+\(.*?\):.*?(?=\n(?!\s))', # Python functions + r'function\s+\w+\(.*?\)\s*\{.*?\}', # JavaScript functions + r'fn\s+\w+\(.*?\)\s*\{.*?\}', # Rust functions + r'func\s+\w+\(.*?\)\s*\{.*?\}', # Go functions + ] + + for pattern in keyword_patterns: + matches = re.findall(pattern, text, re.DOTALL | re.MULTILINE) + code_blocks.extend(matches) + + if code_blocks: + return '\n\n'.join(code_blocks) + + # If no patterns match, return original text + return text + + +def validate_pdf_file(filename: str, file_size: int, max_size: int) -> None: + """ + Validate uploaded PDF file + + Args: + filename: Name of the file + file_size: Size of the file in bytes + max_size: Maximum allowed file size in bytes + + Raises: + ValueError if validation fails + """ + # Check file extension + if not filename.lower().endswith('.pdf'): + raise ValueError("Only PDF files are allowed") + + # Check file size + if file_size > max_size: + max_size_mb = max_size / (1024 * 1024) + raise ValueError(f"File too large. Maximum size is {max_size_mb}MB") + + if file_size == 0: + raise ValueError("Empty file uploaded") + + logger.info(f"PDF file validation passed: {filename} ({file_size / 1024:.2f} KB)") \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..f254cfe --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,63 @@ +services: + transpiler-api: + build: + context: ./api + dockerfile: Dockerfile + container_name: transpiler-api + ports: + - "5001:5001" + env_file: + - .env + environment: + - INFERENCE_PROVIDER=${INFERENCE_PROVIDER:-remote} + - INFERENCE_API_ENDPOINT=${INFERENCE_API_ENDPOINT} + - INFERENCE_API_TOKEN=${INFERENCE_API_TOKEN} + - INFERENCE_MODEL_NAME=${INFERENCE_MODEL_NAME} + - LLM_TEMPERATURE=${LLM_TEMPERATURE:-0.2} + - LLM_MAX_TOKENS=${LLM_MAX_TOKENS:-4096} + - MAX_CODE_LENGTH=${MAX_CODE_LENGTH:-8000} + - MAX_FILE_SIZE=${MAX_FILE_SIZE:-10485760} + networks: + - transpiler-network + extra_hosts: + # Allows the container to reach a custom domain mapped in /etc/hosts (remote API use case). + # Leave as "not-needed" if your inference endpoint is a public URL. + - "${LOCAL_URL_ENDPOINT:-not-needed}:host-gateway" + # Allows the container to reach Ollama (or any service) running on the Docker host. + # On macOS Docker Desktop this is automatic — the line below is a no-op but harmless. + # On Linux with Docker Engine this is REQUIRED for host.docker.internal to resolve. + - "host.docker.internal:host-gateway" + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:5001/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + transpiler-ui: + build: + context: ./ui + dockerfile: Dockerfile + container_name: transpiler-ui + ports: + - "3000:8080" + depends_on: + - transpiler-api + networks: + - transpiler-network + restart: unless-stopped + + # NOTE: Ollama is intentionally NOT defined as a Docker service here. + # + # On macOS (Apple Silicon / M-series), running Ollama inside Docker bypasses + # Metal GPU acceleration, resulting in CPU-only inference that is extremely slow. + # Ollama must be installed and run natively on the host machine so it can use + # the Metal Performance Shaders (MPS) backend for full hardware acceleration. + # + # The backend container reaches host-side Ollama via host.docker.internal:11434. + # See the INFERENCE_PROVIDER=ollama section in .env.example for setup instructions. + +networks: + transpiler-network: + driver: bridge diff --git a/docs/assets/InnovationHub-HeaderImage.png b/docs/assets/InnovationHub-HeaderImage.png new file mode 100644 index 0000000..0558e37 Binary files /dev/null and b/docs/assets/InnovationHub-HeaderImage.png differ diff --git a/ui/.dockerignore b/ui/.dockerignore new file mode 100644 index 0000000..bd3f4ad --- /dev/null +++ b/ui/.dockerignore @@ -0,0 +1,12 @@ +node_modules +npm-debug.log +.git +.gitignore +.DS_Store +.env +.env.local +.env.production +dist +build +coverage +*.log diff --git a/ui/Dockerfile b/ui/Dockerfile new file mode 100644 index 0000000..015b33d --- /dev/null +++ b/ui/Dockerfile @@ -0,0 +1,41 @@ +# Build stage +FROM node:18-alpine as build + +WORKDIR /app + +# Copy package.json +COPY package.json ./ + +# Install dependencies +RUN npm install + +# Copy application code +COPY . . + +# Build the application +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built assets from build stage +COPY --from=build /app/dist /usr/share/nginx/html + +# Copy nginx configuration +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# Create a non-root user and adjust permissions +RUN adduser -D -u 1000 appuser && \ + chown -R appuser:appuser /usr/share/nginx/html && \ + chown -R appuser:appuser /var/cache/nginx && \ + chown -R appuser:appuser /var/log/nginx && \ + chown -R appuser:appuser /etc/nginx/conf.d && \ + touch /var/run/nginx.pid && \ + chown -R appuser:appuser /var/run/nginx.pid + +# Switch to non-root user +USER appuser + +EXPOSE 8080 + +CMD ["nginx", "-g", "daemon off;"] diff --git a/ui/index.html b/ui/index.html new file mode 100644 index 0000000..d791e45 --- /dev/null +++ b/ui/index.html @@ -0,0 +1,23 @@ + + + + + + + CodeTrans — AI Code Translator + + + +
+ + + diff --git a/ui/nginx.conf b/ui/nginx.conf new file mode 100644 index 0000000..a6980e9 --- /dev/null +++ b/ui/nginx.conf @@ -0,0 +1,23 @@ +server { + listen 8080; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + location /api/ { + rewrite ^/api/(.*)$ /$1 break; + proxy_pass http://transpiler-api:5001; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/ui/package.json b/ui/package.json new file mode 100644 index 0000000..310f586 --- /dev/null +++ b/ui/package.json @@ -0,0 +1,31 @@ +{ + "name": "code-trans-ui", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview", + "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "axios": "^1.6.0", + "lucide-react": "^0.294.0" + }, + "devDependencies": { + "@types/react": "^18.2.43", + "@types/react-dom": "^18.2.17", + "@vitejs/plugin-react": "^4.2.1", + "autoprefixer": "^10.4.16", + "eslint": "^8.55.0", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.6.0", + "eslint-plugin-react-refresh": "^0.4.5", + "postcss": "^8.4.32", + "tailwindcss": "^3.3.6", + "vite": "^5.0.8" + } +} diff --git a/ui/postcss.config.js b/ui/postcss.config.js new file mode 100644 index 0000000..2e7af2b --- /dev/null +++ b/ui/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/ui/src/App.jsx b/ui/src/App.jsx new file mode 100644 index 0000000..6c72401 --- /dev/null +++ b/ui/src/App.jsx @@ -0,0 +1,70 @@ +import { useState, useEffect } from 'react' +import CodeTranslator from './components/CodeTranslator' +import PDFUploader from './components/PDFUploader' +import Header from './components/Header' + +function App() { + const [darkMode, setDarkMode] = useState(() => { + const saved = localStorage.getItem('darkMode') + return saved !== null ? JSON.parse(saved) : true + }) + const [translationStatus, setTranslationStatus] = useState('idle') + const [sourceLanguage, setSourceLanguage] = useState('python') + const [targetLanguage, setTargetLanguage] = useState('java') + const [pdfExtractedCode, setPdfExtractedCode] = useState('') + const [isUploading, setIsUploading] = useState(false) + + useEffect(() => { + localStorage.setItem('darkMode', JSON.stringify(darkMode)) + if (darkMode) { + document.documentElement.classList.add('dark') + } else { + document.documentElement.classList.remove('dark') + } + }, [darkMode]) + + const handleTranslationStart = () => setTranslationStatus('translating') + const handleTranslationSuccess = () => { + setTranslationStatus('success') + setTimeout(() => setTranslationStatus('idle'), 4000) + } + const handleTranslationError = () => { + setTranslationStatus('error') + setTimeout(() => setTranslationStatus('idle'), 4000) + } + const handlePDFUploadSuccess = (extractedCode) => { + setPdfExtractedCode(extractedCode) + setIsUploading(false) + } + const handlePDFUploadStart = () => setIsUploading(true) + + return ( +
+
setDarkMode(d => !d)} /> + +
+ + +
+ +
+
+
+ ) +} + +export default App diff --git a/ui/src/components/CodeTranslator.jsx b/ui/src/components/CodeTranslator.jsx new file mode 100644 index 0000000..3463714 --- /dev/null +++ b/ui/src/components/CodeTranslator.jsx @@ -0,0 +1,241 @@ +import { useState, useEffect } from 'react' +import { ArrowRight, ArrowLeftRight, Copy, Check, Loader2, Sparkles, CheckCircle2, XCircle, Clock } from 'lucide-react' +import axios from 'axios' + +const LANGUAGES = ['java', 'c', 'cpp', 'python', 'rust', 'go'] + +const LANGUAGE_META = { + java: { label: 'Java', color: 'text-orange-400', bg: 'dark:bg-orange-900/20 dark:border-orange-700/40 bg-orange-50 border-orange-200' }, + c: { label: 'C', color: 'text-blue-400', bg: 'dark:bg-blue-900/20 dark:border-blue-700/40 bg-blue-50 border-blue-200' }, + cpp: { label: 'C++', color: 'text-sky-400', bg: 'dark:bg-sky-900/20 dark:border-sky-700/40 bg-sky-50 border-sky-200' }, + python: { label: 'Python', color: 'text-yellow-400', bg: 'dark:bg-yellow-900/20 dark:border-yellow-700/40 bg-yellow-50 border-yellow-200' }, + rust: { label: 'Rust', color: 'text-red-400', bg: 'dark:bg-red-900/20 dark:border-red-700/40 bg-red-50 border-red-200' }, + go: { label: 'Go', color: 'text-cyan-400', bg: 'dark:bg-cyan-900/20 dark:border-cyan-700/40 bg-cyan-50 border-cyan-200' }, +} + +const MAX_CHARS = parseInt(import.meta.env.VITE_MAX_CODE_LENGTH || '8000') +const API_URL = import.meta.env.VITE_API_URL || '/api' + +function StatusPill({ translationStatus, isUploading }) { + if (isUploading) return ( + + Extracting PDF... + + ) + if (translationStatus === 'translating') return ( + + Translating... + + ) + if (translationStatus === 'success') return ( + + Done + + ) + if (translationStatus === 'error') return ( + + Failed + + ) + return ( + + Ready + + ) +} + +function LanguageSelector({ value, onChange, label }) { + return ( +
+ {label} +
+ {LANGUAGES.map(lang => { + const meta = LANGUAGE_META[lang] + const isActive = value === lang + return ( + + ) + })} +
+
+ ) +} + +export default function CodeTranslator({ + onTranslationStart, onTranslationSuccess, onTranslationError, + translationStatus, isUploading, + pdfExtractedCode, sourceLanguage, targetLanguage, + onSourceLanguageChange, onTargetLanguageChange +}) { + const [sourceCode, setSourceCode] = useState('') + const [translatedCode, setTranslatedCode] = useState('') + const [isTranslating, setIsTranslating] = useState(false) + const [copied, setCopied] = useState(false) + const [errorMsg, setErrorMsg] = useState('') + + useEffect(() => { + if (pdfExtractedCode) setSourceCode(pdfExtractedCode) + }, [pdfExtractedCode]) + + const handleSwapLanguages = () => { + const prevSource = sourceLanguage + const prevTarget = targetLanguage + onSourceLanguageChange(prevTarget) + onTargetLanguageChange(prevSource) + if (translatedCode) { + setSourceCode(translatedCode) + setTranslatedCode('') + } + } + + const handleTranslate = async () => { + if (!sourceCode.trim()) { setErrorMsg('Please enter some code to translate.'); return } + if (sourceLanguage === targetLanguage) { setErrorMsg('Source and target languages must be different.'); return } + if (sourceCode.length > MAX_CHARS) { setErrorMsg(`Code exceeds ${MAX_CHARS.toLocaleString()} character limit.`); return } + + setErrorMsg('') + setIsTranslating(true) + onTranslationStart() + + try { + const response = await axios.post(`${API_URL}/translate`, { + source_code: sourceCode, + source_language: sourceLanguage, + target_language: targetLanguage + }) + setTranslatedCode(response.data.translated_code) + onTranslationSuccess() + } catch (error) { + console.error('Translation error:', error) + setErrorMsg(error.response?.data?.detail || 'Translation failed. Please check the backend connection.') + onTranslationError() + } finally { + setIsTranslating(false) + } + } + + const handleCopy = () => { + navigator.clipboard.writeText(translatedCode) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } + + const charCount = sourceCode.length + const overLimit = charCount > MAX_CHARS + + return ( +
+ {/* Header row */} +
+
+ +

Translate Code

+
+ +
+ + {/* Language selectors */} +
+ + + {/* Swap button */} + + + +
+ + {/* Code panels */} +
+ {/* Source */} +
+
+
+ + {LANGUAGE_META[sourceLanguage].label} + + input +
+ + {charCount.toLocaleString()} / {MAX_CHARS.toLocaleString()} + +
+