diff --git a/src/youtube_extension.egg-info/PKG-INFO b/src/youtube_extension.egg-info/PKG-INFO index ebb6d87ca..40de3e1ac 100644 --- a/src/youtube_extension.egg-info/PKG-INFO +++ b/src/youtube_extension.egg-info/PKG-INFO @@ -54,6 +54,7 @@ Requires-Dist: python-dotenv>=1.0.0 Requires-Dist: PyYAML>=6.0.0 Requires-Dist: requests>=2.31.0 Requires-Dist: Pillow>=10.0.0 +Requires-Dist: google-genai>=1.0.0 Requires-Dist: google-generativeai>=0.3.0 Requires-Dist: opentelemetry-distro>=0.40b0 Requires-Dist: opentelemetry-exporter-otlp>=1.20.0 @@ -98,6 +99,8 @@ Requires-Dist: gitpython>=3.1.0; extra == "youtube" Requires-Dist: cachetools>=5.0.0; extra == "youtube" Requires-Dist: google-cloud-storage>=2.13.0; extra == "youtube" Requires-Dist: google-cloud-pubsub>=2.19.0; extra == "youtube" +Requires-Dist: google-cloud-vision>=3.7.0; extra == "youtube" +Requires-Dist: google-cloud-videointelligence>=2.13.0; extra == "youtube" Provides-Extra: postgres Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres" Requires-Dist: asyncpg>=0.29.0; extra == "postgres" @@ -118,392 +121,186 @@ Requires-Dist: numpy>=1.24.0; extra == "ml" Requires-Dist: pandas>=2.0.0; extra == "ml" Dynamic: license-file -# 🎯 EventRelay β€” Agentic Video Execution Platform +# 🎯 EventRelay β€” AI Video Processing & Event Extraction Platform -AI-powered transcript capture, event extraction, and agent execution for YouTube content. EventRelay ships a FastAPI backend, a React dashboard, Gemini/Veo hybrid orchestration, and an agent workflow that mirrors what happens in the videoβ€”transcribing every scene into natural language, grounding it in RAG, and dispatching MCP/A2A agents to take real follow-up actions. +[![CI](https://github.com/groupthinking/EventRelay/actions/workflows/ci.yml/badge.svg)](https://github.com/groupthinking/EventRelay/actions/workflows/ci.yml) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) +![Node >= 20](https://img.shields.io/badge/Node-%3E%3D20-green) +![Python >= 3.11](https://img.shields.io/badge/Python-%3E%3D3.11-blue) -## πŸ“˜ Overview +AI-powered video transcript capture, structured event extraction, and agent execution for YouTube content. Paste a URL β†’ get a word-for-word transcript, typed events, actionable tasks, and AI-driven insights. -- **What it solves:** Automates end-to-end execution from YouTube videosβ€”capturing word-for-word transcripts, extracting concrete events, and wiring them into agent runtimes that can build code, create tickets, or trigger workflows. -- **Why it matters:** Eliminates manual note-taking, keeps teams aligned on factual video-derived events, and exposes a programmable API for dispatching agents that act on what was actually said and shown. -- **Status:** Production-ready backend + frontend with ongoing instrumentation and MCP ecosystem integration. -- **Learning loop:** Every transcript is grounded into the RAG store and fed back into agents’ skill adapters so subsequent runs refine their prompts, tooling choices, and dispatch heuristics. -- **Implementation Guide:** See [`docs/MASTER_IMPLEMENTATION_GUIDE.md`](docs/MASTER_IMPLEMENTATION_GUIDE.md) for the complete "Framework First" prompt-driven implementation plan. +## Architecture -## πŸ” Mandatory Context Verification - -Before contributing or running automation, review **all lines** in the following governance artifacts: - -- `~/.claude/CLAUDE.md` -- `~/CLAUDE.md` -- `/Users/garvey/CLAUDE_CODE_GOVERNANCE.md` - -## πŸ§‘β€πŸ’» Contributor Guide - -Implementation details, coding standards, and testing workflows for EventRelay live in [AGENTS.md](AGENTS.md). Review that file before modifying backend, frontend, or MCP modules so new changes stay aligned with the shared agent guidelines. - -## πŸ–ΌοΈ Visual Context - -- Architecture diagram: [`docs/visuals/architecture.md`](docs/visuals/architecture.md) -- Add product screenshots (`png/jpg/gif`) under `docs/visuals/` and link them from this section when ready. - -## βš™οΈ Prerequisites - -- Python >= 3.9 (see `pyproject.toml`) -- Node.js >= 18 and npm >= 8 (`package.json` engines) -- Google Cloud project configured for Speech-to-Text v2 (optional but required for long videos) -- Valid API credentials (YouTube Data API, Gemini, OpenAI, optional Anthropic/Grok) - -## πŸš€ Installation & Setup - -1. **Clone & create virtual env** - - ```bash - python -m venv .venv - source .venv/bin/activate - pip install -e .[dev,youtube,ml] - ``` - -2. **Install frontend dependencies** - - ```bash - npm install --prefix apps/web - ``` - -3. **Setup API keys** (choose one method): - - **Option A: Interactive Setup (Recommended)** - - ```bash - python3 scripts/setup_env.py - ``` - - This guided CLI will: - - - Create `.env` from template - - Prompt for each API key with help URLs - - Validate your configuration - - Show where to get each key - - **Option B: Manual Setup** - - ```bash - cp .env.example .env - # Edit .env and add your API keys (lines 5-25 have instructions) - ``` - - **Required Keys** (need at least ONE): - - - `GEMINI_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/app/apikey) (recommended) - - `OPENAI_API_KEY` - Get from [OpenAI Platform](https://platform.openai.com/api-keys) - - **Optional Keys** (recommended): - - - `YOUTUBE_API_KEY` - Get from [Google Cloud Console](https://console.cloud.google.com/apis/credentials) - - `ANTHROPIC_API_KEY` - Get from [Anthropic Console](https://console.anthropic.com/settings/keys) - - **Validate Configuration**: - - ```bash - python3 scripts/validate_env.py - ``` - - See [Environment Monitoring Guide](docs/ENV_MONITORING.md) for details on monitoring, validation, and MCP integration. - -4. **Boot the backend** - - ```bash - uvicorn uvai.api.main:app --reload --port 8000 - ``` - -5. **Boot the frontend** - - ```bash - npm start --prefix apps/web - ``` - -## πŸ”§ Configuration - -- **Environment files:** `.env`, `.env.local`, `.env.production` (create as needed); keep secrets out of source control. -- **Real Mode:** Set `REAL_MODE_ONLY=true` in production to disable all simulated behaviors (fake delays, mock responses) and enforce strict security checks. -- **Setup tools:** - - `python3 scripts/setup_env.py` - Interactive environment setup - - `python3 scripts/validate_env.py` - Validate API key configuration - - `python3 scripts/monitor_env.py` - Monitor .env changes in real-time (development) -- **MCP tooling:** - - Core MCP servers configured in `.github/mcp-servers.json` for video processing - - **Optional**: [GitHub MCP Server](https://github.com/github/github-mcp-server) for AI-assisted development (see `.github/mcp-config.md`) - - Align `~/.cursor/mcp.json` with configuration before enabling MCP-based agents -- **Optional providers:** `ANTHROPIC_API_KEY`, `GROK_API_KEY`, `LIVEKIT_*`, `REDIS_URL`, and `OTEL_EXPORTER_OTLP_ENDPOINT` unlock additional integrations. -- **Speech-to-Text batch:** Ensure your Google project has access to the configured GCS bucket for >30 minute videos. -- **Full documentation:** See [Environment Monitoring Guide](docs/ENV_MONITORING.md) for complete API key setup, validation, and monitoring details. - -## πŸ› οΈ Usage - -- **CLI helpers** (`youtube-extension`): - - `youtube-extension serve --host 0.0.0.0 --port 8000` – start FastAPI dev server - - `youtube-extension test -v --coverage` – run pytest with optional coverage (expects `tests/` directory) - - `youtube-extension lint` / `format` – run Ruff + mypy, or Black + isort -- **REST APIs:** Once the backend is running, visit `http://localhost:8000/docs` for FastAPI Swagger UI. - - - Transcript workflow example: - - ```bash - curl -X POST http://127.0.0.1:8000/api/v1/transcript-action \ - -H "Content-Type: application/json" \ - -d '{"video_url":"https://www.youtube.com/watch?v=m0XAPRAOJ8A","language":"en"}' - ``` - - - Video-to-Software category discovery: - - ```bash - curl -s -X POST http://127.0.0.1:8000/api/video-to-software/by-category \ - -H "Content-Type: application/json" \ - -d '{"category":"react frontend","project_type":"web_app","deployment_target":"vercel","published_within_days":14}' - ``` - - - Cloud AI analysis endpoints live under `/api/v1/cloud-ai/*` (see [API Reference](#-api-reference)). - -- **Frontend dashboard:** `npm start --prefix apps/web` launches the React UI with hot reload and proxying to the backend. -- **Sample payloads:** `transcript_action_sample.json` illustrates the end-to-end response (event log, execution graph, task dispatch) for the transcript workflow. - -## πŸ€– GitHub Copilot Custom Agents - -EventRelay includes specialized GitHub Copilot agents for different development tasks. These agents provide expert guidance and code generation for specific domains: - -### Available Agents - -Invoke agents using the `@agent-name` pattern in GitHub Copilot Chat: - -- **@python-backend** - FastAPI development, async services, database operations - - ``` - @python-backend How do I create a new API endpoint? - @python-backend Add authentication to this route - ``` - -- **@frontend** - React components, hooks, TypeScript, API integration - - ``` - @frontend Create a hook to fetch video data - @frontend Build a video player component - ``` - -- **@testing** - Unit tests, integration tests, mocking, coverage - - ``` - @testing Write tests for the video processor - @testing Add test fixtures for API endpoints - ``` +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Next.js Frontend (apps/web) localhost:3000 β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Dashboard β”‚ β”‚ /api/video β”‚ β”‚ /api/extract- β”‚ β”‚ +β”‚ β”‚ (React + │──│ (proxy to │──│ events β”‚ β”‚ +β”‚ β”‚ Zustand) β”‚ β”‚ backend) β”‚ β”‚ (OpenAI β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ Responses API) β”‚ β”‚ +β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ +β”‚ β”‚ /api/transcribe β”‚ β”‚ OpenAI STT fallback β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ FastAPI Backend (src/) localhost:8000 β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ /api/v1/transcript-action β”‚ β”‚ +β”‚ β”‚ YouTube transcript β†’ 3 Gemini agents: β”‚ β”‚ +β”‚ β”‚ β€’ transcript_action (summary + tasks) β”‚ β”‚ +β”‚ β”‚ β€’ personality_agent (intent analysis) β”‚ β”‚ +β”‚ β”‚ β€’ strategy_agent (strategic insights) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ /api/v1/health /api/v1/capabilities /api/v1/videos β”‚ +β”‚ /api/v1/events /api/v1/agents /api/v1/chat β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` -- **@mcp** - Model Context Protocol, agent orchestration, JSON-RPC +**Hybrid AI:** Gemini handles deep analysis (personality, strategy), OpenAI Responses API handles structured event/action extraction with JSON Schema strict mode, and OpenAI STT provides transcription fallback when YouTube captions are unavailable. - ``` - @mcp Show me how to implement an MCP tool - @mcp Create an agent workflow for video processing - ``` +## Quick Start -- **@documentation** - Technical writing, API docs, tutorials +### Prerequisites - ``` - @documentation Document this API endpoint - @documentation Create a setup guide for new contributors - ``` +- Python >= 3.11 +- Node.js >= 20 +- API keys: `GEMINI_API_KEY` and `OPENAI_API_KEY` -- **@video-processing** - Video analysis, transcription, event extraction +### Setup - ``` - @video-processing How do I extract events from a transcript? - @video-processing Optimize video processing performance - ``` +```bash +# Clone +git clone https://github.com/groupthinking/EventRelay.git +cd EventRelay -### Agent Configuration +# Backend +python3 -m venv .venv && source .venv/bin/activate +pip install -e .[dev] -Agent definitions are stored in `.github/agents/*.agent.md`. Each agent has: +# Frontend +npm install -- Specialized expertise and best practices -- Project-specific context and patterns -- Boundaries defining what they can/cannot modify -- Tool access and capabilities +# API keys (add to shell profile or .env) +export GEMINI_API_KEY="your-key" +export OPENAI_API_KEY="your-key" +``` -For details, see [.github/agents/README.md](.github/agents/README.md). +### Run -## πŸ’° Revenue Pipeline Testing +```bash +# Terminal 1: Backend +PYTHONPATH=src python3 -m uvicorn youtube_extension.main:app --port 8000 -The Revenue Pipeline transforms YouTube videos into deployed web applications: +# Terminal 2: Frontend +cd apps/web && BACKEND_URL=http://localhost:8000 npx next dev --port 3000 +``` -**YouTube URL β†’ Video Processing β†’ AI Code Generation β†’ Vercel Deployment** +Open http://localhost:3000/dashboard β€” paste a YouTube URL and watch it process. -### Quick Test +## How It Works -```bash -# Check prerequisites -python3 scripts/check_revenue_pipeline_prerequisites.py +1. **Paste URL** β†’ Dashboard sends to `/api/video` +2. **Transcribe** β†’ Backend fetches YouTube transcript (falls back to OpenAI STT if unavailable) +3. **Analyze** β†’ 3 Gemini agents run: summary, personality mapping, strategy +4. **Extract** β†’ OpenAI Responses API returns structured events, actions, topics via strict JSON Schema +5. **Display** β†’ Dashboard shows everything in tabs: insights, transcript, events, agents -# Test pipeline (generation only, 30-60 seconds) -python3 scripts/test_revenue_pipeline.py -``` +## API Endpoints -### Requirements +### Frontend Routes (Next.js) -- **API Keys**: At least one of `GEMINI_API_KEY`, `GOOGLE_API_KEY`, or `OPENAI_API_KEY` -- **Python**: 3.9+ -- **Disk Space**: 5GB+ free -- **Optional**: Vercel CLI for deployment testing +| Method | Route | Description | +|--------|-------|-------------| +| POST | `/api/video` | Process YouTube URL β†’ transcript + AI analysis | +| POST | `/api/extract-events` | Structured event/action extraction (OpenAI) | +| POST | `/api/transcribe` | Transcription with YouTube/OpenAI STT fallback | +| POST | `/api/chat` | Chat with AI about video content | +| GET | `/api/dashboard` | Backend health check proxy | -### Documentation +### Backend Routes (FastAPI) -- **[REVENUE_PIPELINE.md](REVENUE_PIPELINE.md)** - Architecture and design -- **[REVENUE_PIPELINE_TESTING.md](REVENUE_PIPELINE_TESTING.md)** - Complete testing guide -- **Scripts**: - - `scripts/check_revenue_pipeline_prerequisites.py` - Prerequisite validator - - `scripts/test_revenue_pipeline.py` - End-to-end pipeline test +| Method | Route | Description | +|--------|-------|-------------| +| POST | `/api/v1/transcript-action` | Core pipeline: transcript β†’ agents β†’ results | +| GET | `/api/v1/health` | Service health check | +| GET | `/api/v1/capabilities` | Available features and providers | +| POST | `/api/v1/videos/process` | Async video processing job | +| GET | `/api/v1/videos/{job_id}/status` | Job status polling | +| POST | `/api/v1/events/extract` | Backend event extraction | +| POST | `/api/v1/agents/dispatch` | Dispatch agent execution | +| POST | `/api/v1/chat` | Conversational AI about videos | -See [REVENUE_PIPELINE_TESTING.md](REVENUE_PIPELINE_TESTING.md) for detailed setup, troubleshooting, and examples. +Full API docs at http://localhost:8000/docs (Swagger UI). -## πŸ—‚οΈ Project Structure +## Project Structure ``` -youtube_extension/ -β”œβ”€β”€ src/youtube_extension/ -β”‚ β”œβ”€β”€ backend/ # FastAPI routers, deployment helpers -β”‚ β”œβ”€β”€ services/ # Agents, workflows, deployment manager -β”‚ β”œβ”€β”€ integrations/ # Cloud AI, external providers -β”‚ β”œβ”€β”€ mcp/ # MCP ecosystem coordinator and servers -β”‚ └── main.py # FastAPI entry point -β”œβ”€β”€ apps/web/ # Next.js frontend + MCP-aware hooks +EventRelay/ +β”œβ”€β”€ apps/web/ # Next.js frontend β”‚ └── src/ -β”‚ β”œβ”€β”€ components/ # UI components + tests -β”‚ β”œβ”€β”€ hooks/ # Data fetching & integration hooks -β”‚ └── services/ # API clients / stores -β”‚ └── (Note: `frontend/` references in legacy code point here) -β”œβ”€β”€ docs/ # Living documentation & status reports -β”œβ”€β”€ deployment/ # Production assembly tooling -β”œβ”€β”€ scripts/ # Credential checks, monitoring, utilities -β”œβ”€β”€ Dockerfile* / docker-compose.*.yml -β”œβ”€β”€ pyproject.toml / package.json -└── LICENSE +β”‚ β”œβ”€β”€ app/ +β”‚ β”‚ β”œβ”€β”€ dashboard/page.tsx # Main dashboard UI +β”‚ β”‚ └── api/ # API routes (video, extract-events, transcribe, chat) +β”‚ β”œβ”€β”€ components/ # TranscriptViewer, EventList, AgentDashboard, ResultsViewer +β”‚ β”œβ”€β”€ store/ # Zustand state management +β”‚ └── lib/ # API client, services, types +β”œβ”€β”€ src/youtube_extension/ # FastAPI backend +β”‚ β”œβ”€β”€ main.py # App entry point +β”‚ └── backend/ +β”‚ β”œβ”€β”€ api/v1/ # Router + Pydantic models +β”‚ └── services/ai/ # Gemini service, health monitoring +β”œβ”€β”€ tests/unit/ # Python unit tests +β”œβ”€β”€ docs/ # Documentation +β”œβ”€β”€ .github/ # CI workflows, Copilot agent configs +β”œβ”€β”€ Dockerfile # Production container +└── package.json # Monorepo root (npm workspaces) ``` -## 🧠 Adaptive Learning & Memory - -- **Transcript memory:** Each transcription is persisted to `database/` tables and `youtube_processed_videos/`, then vectorized through the RAG workers in `src/rag/` for factual recall. -- **Agent skill adapters:** The MCP orchestration in `mcp_youtube-0.2.0/` and `scripts/youtube_innovation_mcp_server.py` captures execution traces so subsequent runs reuse the highest scoring tools and prompts. -- **Feedback signals:** Task outcomes and competitive insights are logged via `scripts/youtube_innovation_learning_database.py`, producing pattern tables (`learning_outcomes`, `pattern_database`) that agents query before acting. -- **Custom model loops:** Fine-tuning recipes in `fastvlm_gemini_hybrid/` and `fine_tuned_execution/` let you continually improve planners and dispatch heuristics once enough labeled events accumulate. - -## 🧾 API Reference - -- `GET /` – server metadata and feature list -- `GET /health` – service heartbeat -- `POST /api/video-to-software` – legacy endpoint (deprecated) -- `POST /api/v1/generate` – **Primary**: Transform YouTube video into deployed infrastructure (Revenue Pipeline) -- `POST /api/video-to-software/by-category` – auto-discover a fresh video within a category and run the same pipeline -- `POST /api/v1/transcript-action` – transcript β†’ event extraction β†’ agent dispatch -- `POST /api/v1/process-video` – placeholder for legacy workflow -- `GET /api/v1/cloud-ai/providers/status` – provider availability snapshot -- `POST /api/v1/cloud-ai/analyze/video` – single video multi-provider analysis -- `POST /api/v1/cloud-ai/analyze/batch` – batch analysis with provider fallback -- `POST /api/v1/cloud-ai/analyze/multi-provider` – parallel provider invocation -- `GET /api/v1/cloud-ai/analysis-types` – supported analysis enumerations -- Full REST schema is discoverable via FastAPI docs (`/docs`, `/redoc`). - -## πŸ§ͺ Testing - -- **Backend:** - - ```bash - pytest tests/unit/test_gemini_service_model_selection.py \ - tests/unit/test_hybrid_processor_cloud.py \ - tests/unit/test_transcript_action_workflow.py -q - ``` - - _Heads-up: the repository currently references `tests/` in several scripts, but the folder may be missing in some branchesβ€”recreate or restore before running the suite._ +## Testing -- **Frontend:** - - ```bash - npm test -- --watch=false --prefix apps/web - ``` - - Unit specs live under `apps/web/src/components/__tests__/` and smoke tests under `apps/web/src/__tests__/`. - -- **Lint & type-check:** - - ```bash - youtube-extension lint - youtube-extension format - ``` - -## 🚒 Deployment - -- **Local production:** - - ```bash - docker compose -f docker-compose.full.yml up --build - ``` - - Or run tailored stacks (e.g., `docker-compose.youtube-packager.yml`) for scoped deployments. - -- **Containers:** Dockerfiles exist for backend, orchestrator, MCP server, and frontend (`Dockerfile.production`, `Dockerfile.youtube-packager`, etc.). -- **Environments:** Keep secrets in your orchestrator (Fly.io, Vercel, etc.) and mirror the environment variables from the setup section. - -## 🧰 Troubleshooting - -- **`ModuleNotFoundError` on startup:** Verify the virtual environment is active before running CLI commands. -- **`GOOGLE_SPEECH_*` errors:** Re-export credentials or copy `.env.example` to `.env` and populate required keys. -- **Port 8000/3000 already in use:** Stop existing services (`lsof -i :8000`) or override the port flags. -- **Frontend `npm start` fails:** Remove `node_modules`, clear npm cache, and reinstall with `npm install --prefix apps/web`. -- **Missing tests directory:** Some branches omit `tests/`; recreate from templates in `docs/status/` before running pytest. - -## 🀝 Contributing - -- Follow the Python style guide (4-space indent, type hints, Black/Isort/Ruff) and React conventions (PascalCase components, camelCase hooks). -- Run `youtube-extension lint` and `youtube-extension test` before opening a PR. -- Document new agents in `development/agents/` and wire feature flags through `src/youtube_extension/services/agents/`. -- Review `AGENTS.md` and `development/agents/architecture/*` when extending the agent stack. -- Use imperative commit messages and include motivation, implementation notes, and test evidence in PR descriptions. - -## πŸ“’ Support & Contact - -- File issues or requests through the repository issue tracker. -- For security concerns, reference `SECURITY.md` (currently a templateβ€”update with final contact details when available). -- Internal teams can document playbooks and escalation paths under `docs/status/`. - -## πŸ” Security - -- Never commit secretsβ€”`.env.example` is provided as a template only. -- Rotate API credentials stored in your shell profile or secret manager regularly. -- Align with the guidance in `SECURITY.md` and `/Users/garvey/CLAUDE_CODE_GOVERNANCE.md` before enabling production agents. -- Rate limiting and circuit breakers are enforced in `mcp_servers/youtube_api_proxy.py`; keep defaults unless you understand provider quotas. - -## πŸ“¦ Dependencies +```bash +# Python unit tests (15 tests) +PYTHONPATH=src python3 -m pytest tests/unit/test_api_v1_models.py -v --override-ini="addopts=" -- Python packages are declared in `pyproject.toml`; install optional extras with `pip install -e .[dev,youtube,ml]` as needed. -- Frontend dependencies live in `frontend/package.json`; Node 18+ is required by the `engines` constraint. -- Docker images reference `Dockerfile.production` and `Dockerfile.youtube-packager` for backend and packaging workloads respectively. +# Frontend build check +npm run build:web -## πŸ“ˆ Monitoring & Observability +# Lint +cd apps/web && npx next lint +``` -- Health checks: `GET /health` and per-container Docker health probes (see `docker-compose.full.yml`). -- Metrics service tracks transcript fallback success, provider latency, and quota usageβ€”wire into dashboards via `metrics_service`. -- `scripts/check_credentials.py` audits required keys across `.env` files. -- Processed artifacts persist in `.runtime/`, `youtube_processed_videos/`, and configured cloud buckets. +## Environment Variables -## πŸ“„ License +| Variable | Required | Description | +|----------|----------|-------------| +| `GEMINI_API_KEY` | Yes | Google AI Studio key for Gemini agents | +| `OPENAI_API_KEY` | Yes | OpenAI key for event extraction + STT | +| `BACKEND_URL` | No | Backend URL (default: `http://localhost:8000`) | +| `YOUTUBE_API_KEY` | No | YouTube Data API for enhanced metadata | -Released under the MIT License. See `LICENSE` for full terms. +## Deployment -## πŸ”„ Changelog & Roadmap +```bash +# Docker +docker build -t eventrelay . +docker run -p 8000:8000 -e GEMINI_API_KEY=... -e OPENAI_API_KEY=... eventrelay -- Operational status reports live in `docs/status/` and historical plans in `PLAN.md`. -- Release history is tracked in [`docs/changelog/CHANGELOG.md`](docs/changelog/CHANGELOG.md); update the "Unreleased" section as features land and cut tagged releases for production drops. +# Vercel (frontend) +vercel deploy --prod +``` -## πŸ“Š Status Badges +## Contributing -_Add CI/Test/Coverage badges here once your pipelines are active._ +- Follow [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `chore:`, etc. +- Run tests before opening PRs +- See [CONTRIBUTING.md](CONTRIBUTING.md) and [AGENTS.md](AGENTS.md) for detailed guidelines ---- +## License -Built for agentic video understanding, transcript automation, and actionable execution planning. -Supports deployment to Google Cloud Run. See [CLOUD_RUN_DEPLOYMENT.md](CLOUD_RUN_DEPLOYMENT.md) for details. +MIT β€” see [LICENSE](LICENSE) diff --git a/src/youtube_extension.egg-info/SOURCES.txt b/src/youtube_extension.egg-info/SOURCES.txt index 4b8412c9c..ab3d0c6a6 100644 --- a/src/youtube_extension.egg-info/SOURCES.txt +++ b/src/youtube_extension.egg-info/SOURCES.txt @@ -35,6 +35,7 @@ src/agents/specialized/architecture_agent.py src/agents/specialized/code_generator.py src/agents/specialized/performance_agent.py src/agents/specialized/personality_agent.py +src/agents/specialized/precision_extractor.py src/agents/specialized/quality_agent.py src/agents/specialized/security_agent.py src/agents/specialized/strategy_agent.py @@ -48,14 +49,17 @@ src/connectors/__init__.py src/connectors/mcp_base.py src/core/__init__.py src/core/canary_router.py +src/core/collections.py src/core/feature_flags.py src/core/model_router.py src/integration/__init__.py src/integration/api_gateway.py +src/integration/cloudevents_publisher.py src/integration/component_registry.py src/integration/context_manager.py src/integration/data_pipeline.py src/integration/gemini_video.py +src/integration/gemini_video_fix.py src/integration/health_checker.py src/integration/mcp_orchestrator.py src/integration/openai_voice.py @@ -63,6 +67,7 @@ src/integration/performance_monitor.py src/integration/routes.py src/integration/stripe_payments.py src/integration/supabase_db.py +src/integration/temporal_video_analysis.py src/integration/vercel_deploy.py src/integration/youtube_api.py src/mcp/bridge.py @@ -77,6 +82,7 @@ src/unified_ai_sdk/__init__.py src/unified_ai_sdk/rate_limiter.py src/unified_ai_sdk/unified_ai_sdk.py src/utils/__init__.py +src/utils/notebooklm_ingest.py src/utils/path_utils.py src/uvai/main_v2.py src/uvai/server.py @@ -104,20 +110,14 @@ src/youtube_extension/backend/cloud_ai_routes.py src/youtube_extension/backend/code_generator.py src/youtube_extension/backend/deployment_manager.py src/youtube_extension/backend/enhanced_video_processor.py -src/youtube_extension/backend/http_server.py src/youtube_extension/backend/main.py -src/youtube_extension/backend/main_legacy.py -src/youtube_extension/backend/main_refactored.py src/youtube_extension/backend/real_api_endpoints.py -src/youtube_extension/backend/real_video_processor.py -src/youtube_extension/backend/revenue_pipeline.py -src/youtube_extension/backend/test_real_pipeline.py src/youtube_extension/backend/video_processor_factory.py src/youtube_extension/backend/video_processor_interface.py src/youtube_extension/backend/worker.py src/youtube_extension/backend/api/__init__.py +src/youtube_extension/backend/api/advanced_video_routes.py src/youtube_extension/backend/api/event_routes.py -src/youtube_extension/backend/api/generator_routes.py src/youtube_extension/backend/api/mcp_bridge.py src/youtube_extension/backend/api/v1/__init__.py src/youtube_extension/backend/api/v1/models.py @@ -133,6 +133,7 @@ src/youtube_extension/backend/deploy/fly.py src/youtube_extension/backend/deploy/netlify.py src/youtube_extension/backend/deploy/vercel.py src/youtube_extension/backend/middleware/__init__.py +src/youtube_extension/backend/middleware/api_key_auth.py src/youtube_extension/backend/middleware/error_handling_middleware.py src/youtube_extension/backend/middleware/rate_limiting.py src/youtube_extension/backend/middleware/security_headers.py @@ -204,12 +205,10 @@ src/youtube_extension/integrations/cloud_ai/providers/azure_vision.py src/youtube_extension/integrations/cloud_ai/providers/google_cloud.py src/youtube_extension/intelligence/__init__.py src/youtube_extension/mcp/enterprise_mcp_server.py +src/youtube_extension/mcp/notebooklm_processor.py src/youtube_extension/orchestrator/main.py src/youtube_extension/processors/__init__.py -src/youtube_extension/processors/autonomous_processor.py src/youtube_extension/processors/enhanced_extractor.py -src/youtube_extension/processors/scoring_engine.py -src/youtube_extension/processors/simple_real_processor.py src/youtube_extension/processors/strategies.py src/youtube_extension/processors/video_processor.py src/youtube_extension/services/__init__.py diff --git a/src/youtube_extension.egg-info/requires.txt b/src/youtube_extension.egg-info/requires.txt index f494cc2ab..21dbfaedc 100644 --- a/src/youtube_extension.egg-info/requires.txt +++ b/src/youtube_extension.egg-info/requires.txt @@ -25,6 +25,7 @@ python-dotenv>=1.0.0 PyYAML>=6.0.0 requests>=2.31.0 Pillow>=10.0.0 +google-genai>=1.0.0 google-generativeai>=0.3.0 opentelemetry-distro>=0.40b0 opentelemetry-exporter-otlp>=1.20.0 @@ -93,3 +94,5 @@ gitpython>=3.1.0 cachetools>=5.0.0 google-cloud-storage>=2.13.0 google-cloud-pubsub>=2.19.0 +google-cloud-vision>=3.7.0 +google-cloud-videointelligence>=2.13.0 diff --git a/src/youtube_extension/backend/deployment_manager.py b/src/youtube_extension/backend/deployment_manager.py index b1b3834d7..d9face6e6 100644 --- a/src/youtube_extension/backend/deployment_manager.py +++ b/src/youtube_extension/backend/deployment_manager.py @@ -552,12 +552,15 @@ async def _upload_to_github(self, project_path: str, repo_name: str) -> dict[str } async with httpx.AsyncClient() as client: - # Get user info + # Get user info β€” check status before parsing to surface auth errors clearly user_response = await client.get("https://api.github.com/user", headers=headers) + if user_response.status_code != 200: + raise Exception( + f"GitHub authentication failed: {user_response.status_code} - {user_response.text}" + ) user_data = user_response.json() username = user_data["login"] - uploaded_files = [] project_path_obj = Path(project_path) # Directories to exclude from GitHub upload (standard .gitignore patterns) @@ -567,55 +570,100 @@ def should_skip_path(path: Path) -> bool: """Check if any parent directory is in the exclusion list""" return any(part in EXCLUDED_DIRS for part in path.parts) - # Read all files to upload concurrently to improve performance further - upload_tasks = [] - - async def upload_file(client, file_path, relative_path): + # Collect file list up-front; coroutines are created lazily inside workers + files_to_upload = [ + (file_path, file_path.relative_to(project_path_obj)) + for file_path in project_path_obj.rglob("*") + if not should_skip_path(file_path.relative_to(project_path_obj)) + and file_path.is_file() + and not file_path.name.startswith('.') + ] + + async def upload_file(file_path: Path, relative_path: Path) -> Optional[str]: + """Upload a single file with retry/backoff for GitHub rate limits.""" try: - # Read file content - with open(file_path, 'rb') as f: - content = f.read() - - # Encode content + # Offload blocking disk I/O to a thread to avoid stalling the event loop + content = await asyncio.to_thread(file_path.read_bytes) encoded_content = base64.b64encode(content).decode('utf-8') - # Upload file file_data = { "message": f"Add {relative_path}", "content": encoded_content } upload_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{relative_path}" - response = await client.put(upload_url, headers=headers, json=file_data) - if response.status_code in [201, 200]: - return str(relative_path) - else: - logger.warning(f"Failed to upload {relative_path}: {response.text}") - return None + # Retry with exponential backoff on 403/429 (rate limit / abuse detection). + # max_retries=3 means 1 initial attempt + 3 retries = 4 total requests. + max_retries = 3 + for attempt in range(max_retries + 1): + response = await client.put(upload_url, headers=headers, json=file_data) + if response.status_code in [200, 201]: + return str(relative_path) + elif response.status_code in [403, 429]: + if attempt == max_retries: + break # exhausted all retries, no point sleeping + try: + retry_after = int(response.headers.get("Retry-After", 2 ** (attempt + 1))) + except (ValueError, TypeError): + retry_after = 2 ** (attempt + 1) + logger.warning( + f"Rate limited uploading {relative_path} " + f"(attempt {attempt + 1}/{max_retries + 1}), retrying in {retry_after}s" + ) + await asyncio.sleep(retry_after) + else: + logger.warning( + f"Failed to upload {relative_path}: " + f"{response.status_code} - {response.text}" + ) + return None + + logger.error(f"Exhausted retries uploading {relative_path} due to rate limiting") + return None + except asyncio.CancelledError: + # Propagate cancellation so calling code can handle shutdown correctly + raise except Exception as e: logger.warning(f"Error uploading {file_path}: {e}") return None - # Collect tasks - for file_path in project_path_obj.rglob("*"): - # Skip excluded directories and dotfiles - if should_skip_path(file_path.relative_to(project_path_obj)): - continue - if file_path.is_file() and not file_path.name.startswith('.'): - relative_path = file_path.relative_to(project_path_obj) - upload_tasks.append(upload_file(client, file_path, relative_path)) - - # Run uploads concurrently with a semaphore to avoid overwhelming the GitHub API - # Secondary rate limit for GitHub is generally not strictly documented for concurrent writes but 10-20 concurrent requests is a safe maximum. - semaphore = asyncio.Semaphore(10) - - async def run_with_semaphore(coro): - async with semaphore: - return await coro - - results = await asyncio.gather(*(run_with_semaphore(task) for task in upload_tasks)) - uploaded_files = [res for res in results if res is not None] + # Bounded worker-pool: only MAX_WORKERS coroutines exist at a time, keeping + # memory usage O(workers) rather than O(files) for large repositories. + # maxsize=MAX_WORKERS*2 keeps the queue bounded so producers don't outpace consumers. + MAX_WORKERS = 10 + queue: asyncio.Queue = asyncio.Queue(maxsize=MAX_WORKERS * 2) + uploaded_files: list[str] = [] + lock = asyncio.Lock() + + async def worker() -> None: + while True: + item = await queue.get() + try: + if item is None: + return + file_path, relative_path = item + result = await upload_file(file_path, relative_path) + if result is not None: + async with lock: + uploaded_files.append(result) + finally: + queue.task_done() + + # Start workers before filling the queue so they begin consuming immediately + worker_tasks = [asyncio.create_task(worker()) for _ in range(MAX_WORKERS)] + + for item in files_to_upload: + await queue.put(item) + # Sentinel values to signal each worker to stop + for _ in range(MAX_WORKERS): + await queue.put(None) + + # return_exceptions=True prevents one unexpected worker error from cancelling others + results = await asyncio.gather(*worker_tasks, return_exceptions=True) + for exc in results: + if isinstance(exc, Exception): + logger.error(f"Unexpected error in upload worker: {exc}") return { "files_uploaded": len(uploaded_files),