From c80332d056298b978f036c5106136f80ce7a1dea Mon Sep 17 00:00:00 2001 From: Vinit Sutar Date: Sun, 5 Apr 2026 21:37:48 +0530 Subject: [PATCH 1/2] feat(litellm): add support for local proxy without API key - Add litellm to interactive provider selection menu - Support LITELLM_BASE_URL for local proxy deployments (no API key required) - Auto-add openai/ prefix when using api_base for proper LiteLLM routing - Add dummy API key for local proxies (OpenAI SDK requirement) - Add validation and tests for litellm provider configuration Co-Authored-By: Claude Opus 4.6 --- packages/cli/src/repowise/cli/helpers.py | 44 +++- packages/cli/src/repowise/cli/ui.py | 32 ++- .../repowise/core/providers/llm/litellm.py | 39 +++- tests/unit/cli/test_helpers.py | 27 +++ .../test_providers/test_litellm_provider.py | 199 ++++++++++++++++++ 5 files changed, 324 insertions(+), 17 deletions(-) create mode 100644 tests/unit/test_providers/test_litellm_provider.py diff --git a/packages/cli/src/repowise/cli/helpers.py b/packages/cli/src/repowise/cli/helpers.py index 68518c3..d1ae01f 100644 --- a/packages/cli/src/repowise/cli/helpers.py +++ b/packages/cli/src/repowise/cli/helpers.py @@ -251,6 +251,12 @@ def resolve_provider( kwargs["api_key"] = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") elif provider_name == "ollama" and os.environ.get("OLLAMA_BASE_URL"): kwargs["base_url"] = os.environ["OLLAMA_BASE_URL"] + elif provider_name == "litellm": + # LiteLLM: API key for cloud, base URL for local proxy + if os.environ.get("LITELLM_API_KEY"): + kwargs["api_key"] = os.environ["LITELLM_API_KEY"] + if os.environ.get("LITELLM_BASE_URL"): + kwargs["api_base"] = os.environ["LITELLM_BASE_URL"] return get_provider(provider_name, **kwargs) @@ -282,10 +288,26 @@ def resolve_provider( api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") kwargs = {"model": model, "api_key": api_key} if model else {"api_key": api_key} return get_provider("gemini", **kwargs) + # LiteLLM: check for API key (cloud) or base URL (local proxy) + if os.environ.get("LITELLM_API_KEY") and os.environ["LITELLM_API_KEY"].strip(): + kwargs = ( + {"model": model, "api_key": os.environ["LITELLM_API_KEY"]} + if model + else {"api_key": os.environ["LITELLM_API_KEY"]} + ) + return get_provider("litellm", **kwargs) + if os.environ.get("LITELLM_BASE_URL") and os.environ["LITELLM_BASE_URL"].strip(): + kwargs = ( + {"model": model, "api_base": os.environ["LITELLM_BASE_URL"]} + if model + else {"api_base": os.environ["LITELLM_BASE_URL"]} + ) + return get_provider("litellm", **kwargs) raise click.ClickException( "No provider configured. Use --provider, set REPOWISE_PROVIDER, " - "or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / GOOGLE_API_KEY." + "or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / " + "LITELLM_API_KEY / LITELLM_BASE_URL." ) @@ -321,7 +343,10 @@ def _is_env_var_exists(var_name: str) -> bool: "openai": ["OPENAI_API_KEY"], "gemini": ["GEMINI_API_KEY", "GOOGLE_API_KEY"], # Either one "ollama": ["OLLAMA_BASE_URL"], - "litellm": ["LITELLM_API_KEY"], # May need others depending on backend + "litellm": [ + "LITELLM_API_KEY", + "LITELLM_BASE_URL", + ], # Either one (API key for cloud, base URL for local) } if provider_name: @@ -337,6 +362,10 @@ def _is_env_var_exists(var_name: str) -> bool: # Special case: either GEMINI_API_KEY or GOOGLE_API_KEY if not (_is_env_var_set("GEMINI_API_KEY") or _is_env_var_set("GOOGLE_API_KEY")): missing_vars = env_vars + elif provider_name == "litellm": + # Special case: LITELLM_API_KEY (cloud) OR LITELLM_BASE_URL (local proxy) + if not (_is_env_var_set("LITELLM_API_KEY") or _is_env_var_set("LITELLM_BASE_URL")): + missing_vars = env_vars else: for var in env_vars: if not _is_env_var_set(var): @@ -359,6 +388,17 @@ def _is_env_var_exists(var_name: str) -> bool: ) continue + if name == "litellm": + # Special case: LITELLM_API_KEY (cloud) OR LITELLM_BASE_URL (local proxy) + # Only warn if explicitly requested and neither is set + if os.environ.get("REPOWISE_PROVIDER") == "litellm" and not ( + _is_env_var_set("LITELLM_API_KEY") or _is_env_var_set("LITELLM_BASE_URL") + ): + warnings.append( + "Provider 'litellm' requires LITELLM_API_KEY or LITELLM_BASE_URL environment variable" + ) + continue + missing = [var for var in env_vars if not _is_env_var_set(var)] if missing: # Only warn if this provider is explicitly requested OR diff --git a/packages/cli/src/repowise/cli/ui.py b/packages/cli/src/repowise/cli/ui.py index 3c500d1..ec6b21f 100644 --- a/packages/cli/src/repowise/cli/ui.py +++ b/packages/cli/src/repowise/cli/ui.py @@ -84,11 +84,14 @@ def print_phase_header( "litellm": "groq/llama-3.1-70b-versatile", } +# For most providers, a single env var indicates configuration. +# litellm is special: can use LITELLM_API_KEY (cloud) OR LITELLM_BASE_URL (local proxy). _PROVIDER_ENV: dict[str, str] = { "gemini": "GEMINI_API_KEY", "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama": "OLLAMA_BASE_URL", + "litellm": "LITELLM_API_KEY", # Also checks LITELLM_BASE_URL in _detect_provider_status } _PROVIDER_SIGNUP: dict[str, str] = { @@ -96,6 +99,7 @@ def print_phase_header( "openai": "https://platform.openai.com/api-keys", "anthropic": "https://console.anthropic.com/settings/keys", "ollama": "https://ollama.com/download", + "litellm": "https://docs.litellm.ai/docs/proxy/proxy", } @@ -226,6 +230,10 @@ def _detect_provider_status() -> dict[str, str]: if prov == "gemini": if os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"): status[prov] = env_var + elif prov == "litellm": + # litellm can be configured via API key (cloud) OR base URL (local proxy) + if os.environ.get("LITELLM_API_KEY") or os.environ.get("LITELLM_BASE_URL"): + status[prov] = env_var elif os.environ.get(env_var): status[prov] = env_var return status @@ -292,14 +300,22 @@ def interactive_provider_select( env_var = _PROVIDER_ENV[chosen] signup_url = _PROVIDER_SIGNUP.get(chosen, "") console.print() - console.print(f" [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].") - if signup_url: - console.print(f" Get your API key here: [{BRAND}]{signup_url}[/]") - console.print() - key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path) - if not key: - console.print(f" [{WARN}]Skipped. Please select another provider.[/]") - return interactive_provider_select(console, model_flag, repo_path=repo_path) + # Special case: litellm local proxy doesn't need an API key + if chosen == "litellm" and os.environ.get("LITELLM_BASE_URL"): + console.print( + f" [{OK}]✓ Using LiteLLM proxy at[/] [{BRAND}]{os.environ['LITELLM_BASE_URL']}[/]" + ) + console.print(" [dim]No API key required for local proxy.[/dim]") + console.print() + else: + console.print(f" [bold]{chosen}[/bold] requires [cyan]{env_var}[/cyan].") + if signup_url: + console.print(f" Get your API key here: [{BRAND}]{signup_url}[/]") + console.print() + key = _prompt_api_key(console, chosen, env_var, repo_path=repo_path) + if not key: + console.print(f" [{WARN}]Skipped. Please select another provider.[/]") + return interactive_provider_select(console, model_flag, repo_path=repo_path) # --- model --- default_model = _PROVIDER_DEFAULTS.get(chosen, "") diff --git a/packages/core/src/repowise/core/providers/llm/litellm.py b/packages/core/src/repowise/core/providers/llm/litellm.py index 0692cd9..5b36677 100644 --- a/packages/core/src/repowise/core/providers/llm/litellm.py +++ b/packages/core/src/repowise/core/providers/llm/litellm.py @@ -19,13 +19,16 @@ from __future__ import annotations +from collections.abc import AsyncIterator +from typing import Any + import structlog from tenacity import ( + RetryError, retry, retry_if_exception_type, stop_after_attempt, wait_exponential_jitter, - RetryError, ) from repowise.core.providers.llm.base import ( @@ -36,8 +39,6 @@ ProviderError, RateLimitError, ) - -from typing import Any, AsyncIterator from repowise.core.rate_limiter import RateLimiter log = structlog.get_logger(__name__) @@ -52,9 +53,13 @@ class LiteLLMProvider(BaseProvider): Args: model: LiteLLM model string (e.g., "groq/llama-3.1-70b-versatile"). + When using api_base (local proxy), just use the model name + (e.g., "zai.glm-5") - the provider will auto-add "openai/" prefix. api_key: API key for the target provider. Some providers read from environment variables (e.g., GROQ_API_KEY, TOGETHER_API_KEY). - api_base: Optional custom API base URL (e.g., for self-hosted deployments). + For local proxies without auth, a dummy key is used. + api_base: Optional custom API base URL for self-hosted LiteLLM proxy. + When set, the model is treated as OpenAI-compatible. rate_limiter: Optional RateLimiter instance. """ @@ -70,6 +75,13 @@ def __init__( self._api_base = api_base self._rate_limiter = rate_limiter + # When using a custom api_base (proxy), treat model as OpenAI-compatible. + # LiteLLM requires "openai/" prefix to route to custom endpoints. + if api_base and not model.startswith("openai/"): + self._litellm_model = f"openai/{model}" + else: + self._litellm_model = model + @property def provider_name(self) -> str: return "litellm" @@ -125,7 +137,7 @@ async def _generate_with_retry( litellm.suppress_debug_info = True call_kwargs: dict[str, object] = { - "model": self._model, + "model": self._litellm_model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, @@ -137,6 +149,10 @@ async def _generate_with_retry( call_kwargs["api_key"] = self._api_key if self._api_base: call_kwargs["api_base"] = self._api_base + # Local proxy without auth: OpenAI SDK still requires a key. + # Use a dummy key if none provided. + if not self._api_key: + call_kwargs["api_key"] = "sk-dummy" try: response = await litellm.acompletion(**call_kwargs) @@ -177,6 +193,7 @@ async def stream_chat( tool_executor: Any | None = None, ) -> AsyncIterator[ChatStreamEvent]: import json as _json + import litellm # type: ignore[import-untyped] litellm.set_verbose = False @@ -184,7 +201,7 @@ async def stream_chat( full_messages = [{"role": "system", "content": system_prompt}, *messages] call_kwargs: dict[str, Any] = { - "model": self._model, + "model": self._litellm_model, "messages": full_messages, "temperature": temperature, "max_tokens": max_tokens, @@ -196,6 +213,10 @@ async def stream_chat( call_kwargs["api_key"] = self._api_key if self._api_base: call_kwargs["api_base"] = self._api_base + # Local proxy without auth: OpenAI SDK still requires a key. + # Use a dummy key if none provided. + if not self._api_key: + call_kwargs["api_key"] = "sk-dummy" try: stream = await litellm.acompletion(**call_kwargs) @@ -222,7 +243,11 @@ async def stream_chat( for tc_delta in delta.tool_calls: idx = tc_delta.index if idx not in tool_calls_acc: - tool_calls_acc[idx] = {"id": getattr(tc_delta, "id", "") or "", "name": "", "arguments": ""} + tool_calls_acc[idx] = { + "id": getattr(tc_delta, "id", "") or "", + "name": "", + "arguments": "", + } acc = tool_calls_acc[idx] if getattr(tc_delta, "id", None): acc["id"] = tc_delta.id diff --git a/tests/unit/cli/test_helpers.py b/tests/unit/cli/test_helpers.py index 1444ac2..07845c3 100644 --- a/tests/unit/cli/test_helpers.py +++ b/tests/unit/cli/test_helpers.py @@ -231,3 +231,30 @@ def test_anthropic_empty_key_auto_detect(self, monkeypatch): assert len(warnings) == 1 assert "anthropic" in warnings[0] assert "ANTHROPIC_API_KEY" in warnings[0] + + # --- litellm tests --- + + def test_litellm_with_api_key(self, monkeypatch): + monkeypatch.setenv("LITELLM_API_KEY", "test-key") + monkeypatch.setenv("REPOWISE_PROVIDER", "litellm") + + assert validate_provider_config() == [] + + def test_litellm_with_base_url(self, monkeypatch): + """Local proxy without API key should be valid.""" + monkeypatch.delenv("LITELLM_API_KEY", raising=False) + monkeypatch.setenv("LITELLM_BASE_URL", "http://localhost:4000/v1") + monkeypatch.setenv("REPOWISE_PROVIDER", "litellm") + + assert validate_provider_config() == [] + + def test_litellm_missing_both(self, monkeypatch): + """Should warn when neither API key nor base URL is set.""" + monkeypatch.delenv("LITELLM_API_KEY", raising=False) + monkeypatch.delenv("LITELLM_BASE_URL", raising=False) + monkeypatch.setenv("REPOWISE_PROVIDER", "litellm") + + warnings = validate_provider_config() + assert len(warnings) == 1 + assert "litellm" in warnings[0] + assert "LITELLM_API_KEY" in warnings[0] or "LITELLM_BASE_URL" in warnings[0] diff --git a/tests/unit/test_providers/test_litellm_provider.py b/tests/unit/test_providers/test_litellm_provider.py new file mode 100644 index 0000000..bf0bab4 --- /dev/null +++ b/tests/unit/test_providers/test_litellm_provider.py @@ -0,0 +1,199 @@ +"""Unit tests for LiteLLMProvider. + +All tests mock the litellm.acompletion call — no real API calls are made. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +pytest.importorskip("litellm", reason="litellm SDK not installed") + +from repowise.core.providers.llm.base import GeneratedResponse, ProviderError +from repowise.core.providers.llm.litellm import LiteLLMProvider + +# --------------------------------------------------------------------------- +# Construction +# --------------------------------------------------------------------------- + + +def test_provider_name(): + p = LiteLLMProvider(model="gpt-4o", api_key="sk-test") + assert p.provider_name == "litellm" + + +def test_default_model(): + p = LiteLLMProvider(model="groq/llama-3.1-70b-versatile", api_key="sk-test") + assert p.model_name == "groq/llama-3.1-70b-versatile" + + +def test_model_without_api_base(): + """Without api_base, model should be passed through unchanged.""" + p = LiteLLMProvider(model="groq/llama-3.1-70b-versatile", api_key="sk-test") + assert p._litellm_model == "groq/llama-3.1-70b-versatile" + + +def test_model_with_api_base_adds_openai_prefix(): + """With api_base (local proxy), model should get openai/ prefix.""" + p = LiteLLMProvider( + model="zai.glm-5", + api_base="http://localhost:4000/v1", + ) + assert p._litellm_model == "openai/zai.glm-5" + assert p.model_name == "zai.glm-5" # Public property shows original name + + +def test_model_with_api_base_and_existing_prefix(): + """If model already has openai/ prefix, don't add another.""" + p = LiteLLMProvider( + model="openai/gpt-4o", + api_base="http://localhost:4000/v1", + ) + assert p._litellm_model == "openai/gpt-4o" + + +def test_no_api_key_or_base(): + """Provider can be created without API key or base (for some backends).""" + p = LiteLLMProvider(model="groq/llama-3.1-70b-versatile") + assert p._api_key is None + assert p._api_base is None + + +# --------------------------------------------------------------------------- +# Successful generation +# --------------------------------------------------------------------------- + + +def _make_mock_response(text: str = "# Doc\nContent.") -> MagicMock: + usage = MagicMock() + usage.prompt_tokens = 120 + usage.completion_tokens = 60 + + choice = MagicMock() + choice.message.content = text + + response = MagicMock() + response.choices = [choice] + response.usage = usage + return response + + +async def test_generate_returns_generated_response(): + provider = LiteLLMProvider(model="gpt-4o", api_key="sk-test") + mock_response = _make_mock_response("Hello from LiteLLM") + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.return_value = mock_response + result = await provider.generate("sys", "user") + + assert isinstance(result, GeneratedResponse) + assert result.content == "Hello from LiteLLM" + + +async def test_generate_token_counts(): + provider = LiteLLMProvider(model="gpt-4o", api_key="sk-test") + mock_response = _make_mock_response() + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.return_value = mock_response + result = await provider.generate("sys", "user") + + assert result.input_tokens == 120 + assert result.output_tokens == 60 + + +async def test_generate_sends_correct_kwargs(): + provider = LiteLLMProvider( + model="groq/llama-3.1-70b-versatile", + api_key="sk-test", + ) + mock_response = _make_mock_response() + captured_kwargs: list[dict] = [] + + async def fake_acompletion(**kwargs): + captured_kwargs.append(kwargs) + return mock_response + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.side_effect = fake_acompletion + await provider.generate("system msg", "user msg", max_tokens=2048, temperature=0.5) + + kw = captured_kwargs[0] + assert kw["model"] == "groq/llama-3.1-70b-versatile" + assert kw["max_tokens"] == 2048 + assert kw["temperature"] == 0.5 + assert kw["api_key"] == "sk-test" + messages = kw["messages"] + assert messages[0] == {"role": "system", "content": "system msg"} + assert messages[1] == {"role": "user", "content": "user msg"} + + +async def test_generate_with_api_base(): + """With api_base (local proxy), should pass api_base and dummy key.""" + provider = LiteLLMProvider( + model="zai.glm-5", + api_base="http://localhost:4000/v1", + ) + mock_response = _make_mock_response() + captured_kwargs: list[dict] = [] + + async def fake_acompletion(**kwargs): + captured_kwargs.append(kwargs) + return mock_response + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.side_effect = fake_acompletion + await provider.generate("sys", "user") + + kw = captured_kwargs[0] + # Model should have openai/ prefix for proxy routing + assert kw["model"] == "openai/zai.glm-5" + assert kw["api_base"] == "http://localhost:4000/v1" + # Dummy key should be added when using api_base without api_key + assert kw["api_key"] == "sk-dummy" + + +async def test_generate_with_api_base_and_api_key(): + """With both api_base and api_key, should use provided key.""" + provider = LiteLLMProvider( + model="zai.glm-5", + api_key="sk-real-key", + api_base="http://localhost:4000/v1", + ) + mock_response = _make_mock_response() + captured_kwargs: list[dict] = [] + + async def fake_acompletion(**kwargs): + captured_kwargs.append(kwargs) + return mock_response + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.side_effect = fake_acompletion + await provider.generate("sys", "user") + + kw = captured_kwargs[0] + assert kw["api_key"] == "sk-real-key" + assert kw["api_base"] == "http://localhost:4000/v1" + + +# --------------------------------------------------------------------------- +# Error handling +# --------------------------------------------------------------------------- + + +async def test_api_error(): + import litellm + + provider = LiteLLMProvider(model="gpt-4o", api_key="sk-test") + + with patch("litellm.acompletion", new_callable=AsyncMock) as mock_acompletion: + mock_acompletion.side_effect = litellm.APIError( + message="server error", + llm_provider="openai", + model="gpt-4o", + status_code=500, + ) + with pytest.raises(ProviderError): + await provider.generate("sys", "user") \ No newline at end of file From 168bc8f978c8756740c74aac3e57636b41b7d58a Mon Sep 17 00:00:00 2001 From: Vinit Sutar Date: Mon, 6 Apr 2026 15:42:20 +0530 Subject: [PATCH 2/2] fix(litellm): add inline comment for sk-dummy to avoid secret scanner false positives Co-Authored-By: Claude Opus 4.6 --- packages/core/src/repowise/core/providers/llm/litellm.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/core/src/repowise/core/providers/llm/litellm.py b/packages/core/src/repowise/core/providers/llm/litellm.py index 5b36677..cc89ab0 100644 --- a/packages/core/src/repowise/core/providers/llm/litellm.py +++ b/packages/core/src/repowise/core/providers/llm/litellm.py @@ -149,10 +149,8 @@ async def _generate_with_retry( call_kwargs["api_key"] = self._api_key if self._api_base: call_kwargs["api_base"] = self._api_base - # Local proxy without auth: OpenAI SDK still requires a key. - # Use a dummy key if none provided. if not self._api_key: - call_kwargs["api_key"] = "sk-dummy" + call_kwargs["api_key"] = "sk-dummy" # LiteLLM requires a non-empty key even for unauthenticated local proxies (OpenAI SDK requirement) try: response = await litellm.acompletion(**call_kwargs) @@ -213,10 +211,8 @@ async def stream_chat( call_kwargs["api_key"] = self._api_key if self._api_base: call_kwargs["api_base"] = self._api_base - # Local proxy without auth: OpenAI SDK still requires a key. - # Use a dummy key if none provided. if not self._api_key: - call_kwargs["api_key"] = "sk-dummy" + call_kwargs["api_key"] = "sk-dummy" # LiteLLM requires a non-empty key even for unauthenticated local proxies (OpenAI SDK requirement) try: stream = await litellm.acompletion(**call_kwargs)