From 9795da76d2e248494f32c8e6e120dc8fa54e994c Mon Sep 17 00:00:00 2001 From: Roman Zydyk Date: Tue, 17 Mar 2026 11:26:25 +0100 Subject: [PATCH 1/2] Support OpenRouter as Embedder --- README.md | 50 ++++++++++++++++++++++++++++++++++++---- api/README.md | 4 ++-- api/config.py | 21 +++++++++++++---- api/config/embedder.json | 12 ++++++++++ api/tools/embedder.py | 6 ++++- 5 files changed, 82 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ba8fd1a88..fc0e32ec0 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ - **Ask Feature**: Chat with your repository using RAG-powered AI to get accurate answers - **DeepResearch**: Multi-turn research process that thoroughly investigates complex topics - **Multiple Model Providers**: Support for Google Gemini, OpenAI, OpenRouter, and local Ollama models -- **Flexible Embeddings**: Choose between OpenAI, Google AI, or local Ollama embeddings for optimal performance +- **Flexible Embeddings**: Choose between OpenAI, Google AI, OpenRouter, or local Ollama embeddings for optimal performance ## 🚀 Quick Start (Super Easy!) @@ -341,6 +341,7 @@ docker-compose up |------|-------------|------------------|-------| | `openai` | OpenAI embeddings (default) | `OPENAI_API_KEY` | Uses `text-embedding-3-small` model | | `google` | Google AI embeddings | `GOOGLE_API_KEY` | Uses `text-embedding-004` model | +| `openrouter` | OpenRouter embeddings | `OPENROUTER_API_KEY` | Uses `openai/text-embedding-3-small` via OpenRouter | | `ollama` | Local Ollama embeddings | None | Requires local Ollama installation | ### Why Use Google AI Embeddings? @@ -361,12 +362,52 @@ export DEEPWIKI_EMBEDDER_TYPE=openai # Use Google AI embeddings export DEEPWIKI_EMBEDDER_TYPE=google +# Use OpenRouter embeddings +export DEEPWIKI_EMBEDDER_TYPE=openrouter + # Use local Ollama embeddings export DEEPWIKI_EMBEDDER_TYPE=ollama ``` **Note**: When switching embedders, you may need to regenerate your repository embeddings as different models produce different vector spaces. +## 🔀 Using OpenRouter Embeddings + +DeepWiki supports OpenRouter as an embedding provider. OpenRouter exposes an OpenAI-compatible embeddings endpoint, so any embedding model available on OpenRouter can be used. + +### How to Enable OpenRouter Embeddings + +Set the embedder type and your OpenRouter API key in your `.env` file: + +```bash +OPENROUTER_API_KEY=your_openrouter_api_key +DEEPWIKI_EMBEDDER_TYPE=openrouter +``` + +**Docker:** + +```bash +docker run -p 8001:8001 -p 3000:3000 \ + -e OPENROUTER_API_KEY=your_openrouter_api_key \ + -e DEEPWIKI_EMBEDDER_TYPE=openrouter \ + -v ~/.adalflow:/root/.adalflow \ + ghcr.io/asyncfuncai/deepwiki-open:latest +``` + +### Changing the Embedding Model + +The default model is `openai/text-embedding-3-small`. To use a different model, edit `api/config/embedder.json` and change the `model` field under `embedder_openrouter`: + +```json +"embedder_openrouter": { + "model_kwargs": { + "model": "openai/text-embedding-3-large" + } +} +``` + +Any embedding model listed on [OpenRouter](https://openrouter.ai/models) that exposes an OpenAI-compatible embeddings endpoint can be used here. + ### Logging DeepWiki uses Python's built-in `logging` module for diagnostic output. You can configure the verbosity and log file destination via environment variables: @@ -411,7 +452,7 @@ docker-compose up |----------------------|--------------------------------------------------------------|----------|----------------------------------------------------------------------------------------------------------| | `GOOGLE_API_KEY` | Google Gemini API key for AI generation and embeddings | No | Required for Google Gemini models and Google AI embeddings | `OPENAI_API_KEY` | OpenAI API key for embeddings and models | Conditional | Required if using OpenAI embeddings or models | -| `OPENROUTER_API_KEY` | OpenRouter API key for alternative models | No | Required only if you want to use OpenRouter models | +| `OPENROUTER_API_KEY` | OpenRouter API key for alternative models and embeddings | No | Required if using OpenRouter models or `DEEPWIKI_EMBEDDER_TYPE=openrouter` | | `AWS_ACCESS_KEY_ID` | AWS access key ID for Bedrock | No | Required for Bedrock if not using instance/role-based credentials | | `AWS_SECRET_ACCESS_KEY` | AWS secret access key for Bedrock | No | Required for Bedrock if not using instance/role-based credentials | | `AWS_SESSION_TOKEN` | AWS session token for Bedrock (STS) | No | Required when using temporary credentials | @@ -421,7 +462,7 @@ docker-compose up | `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint | No | Required only if you want to use Azure OpenAI models | | `AZURE_OPENAI_VERSION` | Azure OpenAI version | No | Required only if you want to use Azure OpenAI models | | `OLLAMA_HOST` | Ollama Host (default: http://localhost:11434) | No | Required only if you want to use external Ollama server | -| `DEEPWIKI_EMBEDDER_TYPE` | Embedder type: `openai`, `google`, `ollama`, or `bedrock` (default: `openai`) | No | Controls which embedding provider to use | +| `DEEPWIKI_EMBEDDER_TYPE` | Embedder type: `openai`, `google`, `ollama`, `bedrock`, or `openrouter` (default: `openai`) | No | Controls which embedding provider to use | | `PORT` | Port for the API server (default: 8001) | No | If you host API and frontend on the same machine, make sure change port of `SERVER_BASE_URL` accordingly | | `SERVER_BASE_URL` | Base URL for the API server (default: http://localhost:8001) | No | | `DEEPWIKI_AUTH_MODE` | Set to `true` or `1` to enable authorization mode. | No | Defaults to `false`. If enabled, `DEEPWIKI_AUTH_CODE` is required. | @@ -429,7 +470,8 @@ docker-compose up **API Key Requirements:** - If using `DEEPWIKI_EMBEDDER_TYPE=openai` (default): `OPENAI_API_KEY` is required -- If using `DEEPWIKI_EMBEDDER_TYPE=google`: `GOOGLE_API_KEY` is required +- If using `DEEPWIKI_EMBEDDER_TYPE=google`: `GOOGLE_API_KEY` is required +- If using `DEEPWIKI_EMBEDDER_TYPE=openrouter`: `OPENROUTER_API_KEY` is required - If using `DEEPWIKI_EMBEDDER_TYPE=ollama`: No API key required (local processing) - If using `DEEPWIKI_EMBEDDER_TYPE=bedrock`: AWS credentials (or role-based credentials) are required diff --git a/api/README.md b/api/README.md index 65e82a2dc..9f2c3bd3a 100644 --- a/api/README.md +++ b/api/README.md @@ -29,7 +29,7 @@ GOOGLE_API_KEY=your_google_api_key # Required for Google Gemini models OPENAI_API_KEY=your_openai_api_key # Required for embeddings and OpenAI models # Optional API Keys -OPENROUTER_API_KEY=your_openrouter_api_key # Required only if using OpenRouter models +OPENROUTER_API_KEY=your_openrouter_api_key # Required if using OpenRouter models or embeddings (DEEPWIKI_EMBEDDER_TYPE=openrouter) # AWS Bedrock Configuration AWS_ACCESS_KEY_ID=your_aws_access_key_id # Required for AWS Bedrock models @@ -47,7 +47,7 @@ OLLAMA_HOST=https://your_ollama_host" # Optional: Add Ollama host if not local. PORT=8001 # Optional, defaults to 8001 ``` -If you're not using Ollama mode, you need to configure an OpenAI API key for embeddings. Other API keys are only required when configuring and using models from the corresponding providers. +The embedder type is controlled by `DEEPWIKI_EMBEDDER_TYPE` (`openai`, `google`, `openrouter`, `ollama`, or `bedrock`; default: `openai`). The corresponding API key is required for the chosen embedder. Other API keys are only required when configuring and using models from the corresponding providers. > 💡 **Where to get these keys:** > - Get a Google API key from [Google AI Studio](https://makersuite.google.com/app/apikey) diff --git a/api/config.py b/api/config.py index 49dfcf7b0..a68b99814 100644 --- a/api/config.py +++ b/api/config.py @@ -152,7 +152,7 @@ def load_embedder_config(): embedder_config = load_json_config("embedder.json") # Process client classes - for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock"]: + for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock", "embedder_openrouter"]: if key in embedder_config and "client_class" in embedder_config[key]: class_name = embedder_config[key]["client_class"] if class_name in CLIENT_CLASSES: @@ -174,6 +174,8 @@ def get_embedder_config(): return configs.get("embedder_google", {}) elif embedder_type == 'ollama' and 'embedder_ollama' in configs: return configs.get("embedder_ollama", {}) + elif embedder_type == 'openrouter' and 'embedder_openrouter' in configs: + return configs.get("embedder_openrouter", {}) else: return configs.get("embedder", {}) @@ -235,12 +237,21 @@ def is_bedrock_embedder(): client_class = embedder_config.get("client_class", "") return client_class == "BedrockClient" +def is_openrouter_embedder(): + """ + Check if the current embedder configuration uses OpenRouterClient or OpenAIClient pointed at OpenRouter. + + Returns: + bool: True if using OpenRouter for embeddings, False otherwise + """ + return EMBEDDER_TYPE == 'openrouter' and 'embedder_openrouter' in configs + def get_embedder_type(): """ Get the current embedder type based on configuration. - + Returns: - str: 'bedrock', 'ollama', 'google', or 'openai' (default) + str: 'bedrock', 'ollama', 'google', 'openrouter', or 'openai' (default) """ if is_bedrock_embedder(): return 'bedrock' @@ -248,6 +259,8 @@ def get_embedder_type(): return 'ollama' elif is_google_embedder(): return 'google' + elif is_openrouter_embedder(): + return 'openrouter' else: return 'openai' @@ -341,7 +354,7 @@ def load_lang_config(): # Update embedder configuration if embedder_config: - for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock", "retriever", "text_splitter"]: + for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock", "embedder_openrouter", "retriever", "text_splitter"]: if key in embedder_config: configs[key] = embedder_config[key] diff --git a/api/config/embedder.json b/api/config/embedder.json index e64cfdcbf..47b43e589 100644 --- a/api/config/embedder.json +++ b/api/config/embedder.json @@ -30,6 +30,18 @@ "dimensions": 256 } }, + "embedder_openrouter": { + "client_class": "OpenAIClient", + "initialize_kwargs": { + "base_url": "https://openrouter.ai/api/v1", + "env_api_key_name": "OPENROUTER_API_KEY" + }, + "batch_size": 500, + "model_kwargs": { + "model": "openai/text-embedding-3-small", + "encoding_format": "float" + } + }, "retriever": { "top_k": 20 }, diff --git a/api/tools/embedder.py b/api/tools/embedder.py index 050d63547..59aa0401d 100644 --- a/api/tools/embedder.py +++ b/api/tools/embedder.py @@ -9,7 +9,7 @@ def get_embedder(is_local_ollama: bool = False, use_google_embedder: bool = Fals Args: is_local_ollama: Legacy parameter for Ollama embedder use_google_embedder: Legacy parameter for Google embedder - embedder_type: Direct specification of embedder type ('ollama', 'google', 'bedrock', 'openai') + embedder_type: Direct specification of embedder type ('ollama', 'google', 'bedrock', 'openrouter', 'openai') Returns: adal.Embedder: Configured embedder instance @@ -22,6 +22,8 @@ def get_embedder(is_local_ollama: bool = False, use_google_embedder: bool = Fals embedder_config = configs["embedder_google"] elif embedder_type == 'bedrock': embedder_config = configs["embedder_bedrock"] + elif embedder_type == 'openrouter': + embedder_config = configs["embedder_openrouter"] else: # default to openai embedder_config = configs["embedder"] elif is_local_ollama: @@ -37,6 +39,8 @@ def get_embedder(is_local_ollama: bool = False, use_google_embedder: bool = Fals embedder_config = configs["embedder_ollama"] elif current_type == 'google': embedder_config = configs["embedder_google"] + elif current_type == 'openrouter': + embedder_config = configs["embedder_openrouter"] else: embedder_config = configs["embedder"] From 244f0cd9524e603f9a78998baf71084f842657a5 Mon Sep 17 00:00:00 2001 From: Roman Zydyk Date: Tue, 17 Mar 2026 12:54:00 +0100 Subject: [PATCH 2/2] Adjust based on CR --- api/config.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/api/config.py b/api/config.py index a68b99814..be2c3aca5 100644 --- a/api/config.py +++ b/api/config.py @@ -239,12 +239,29 @@ def is_bedrock_embedder(): def is_openrouter_embedder(): """ - Check if the current embedder configuration uses OpenRouterClient or OpenAIClient pointed at OpenRouter. + Check if the current embedder configuration uses OpenRouter for embeddings. + + Detects OpenRouter by inspecting the loaded configuration's initialize_kwargs + for a base_url pointing to openrouter.ai, consistent with how other is_*_embedder + helpers inspect the config rather than reading environment variables directly. Returns: bool: True if using OpenRouter for embeddings, False otherwise """ - return EMBEDDER_TYPE == 'openrouter' and 'embedder_openrouter' in configs + embedder_config = get_embedder_config() + if not embedder_config: + return False + + initialize_kwargs = embedder_config.get("initialize_kwargs", {}) + base_url = initialize_kwargs.get("base_url", "") + if base_url and "openrouter.ai" in base_url: + return True + + # Fallback: explicit OpenRouterClient class + model_client = embedder_config.get("model_client") + if model_client: + return model_client.__name__ == "OpenRouterClient" + return embedder_config.get("client_class", "") == "OpenRouterClient" def get_embedder_type(): """