diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml index 82886d6a2..0673f262e 100644 --- a/.github/workflows/e2e_tests_providers.yaml +++ b/.github/workflows/e2e_tests_providers.yaml @@ -2,9 +2,10 @@ name: E2E Inference Provider Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: e2e_tests: @@ -259,11 +260,17 @@ jobs: exit 1 } + # Wait for watsonx library mode to finish before running server mode + # watsonx has a rate limit of 2 calls / second + - name: Wait for watsonx library mode to finish + if: matrix.environment == 'watsonx' && matrix.mode == 'server' + run: sleep 2400 # 40 minutes + # watsonx has a different convention than "/" - name: Set watsonx test overrides if: matrix.environment == 'watsonx' run: | - echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV + echo "E2E_DEFAULT_MODEL_OVERRIDE=meta-llama/llama-4-maverick-17b-128e-instruct-fp8" >> $GITHUB_ENV echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV - name: Run e2e tests diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 54a0080e2..85b2f2e56 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -2,9 +2,10 @@ name: RHAIIS E2E Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: @@ -26,6 +27,8 @@ jobs: RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }} RHAIIS_MODEL: ${{ vars.RHAIIS_MODEL }} FAISS_VECTOR_STORE_ID: ${{ vars.FAISS_VECTOR_STORE_ID }} + E2E_DEFAULT_MODEL_OVERRIDE: ${{ vars.RHAIIS_MODEL }} + E2E_DEFAULT_PROVIDER_OVERRIDE: vllm steps: - uses: actions/checkout@v4 diff --git a/Makefile b/Makefile index e9ec83739..a2db5a2df 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ test-integration: ## Run integration tests tests COVERAGE_FILE="${ARTIFACT_DIR}/.coverage.integration" uv run python -m pytest tests/integration --cov=src --cov-report term-missing --cov-report "json:${ARTIFACT_DIR}/coverage_integration.json" --junit-xml="${ARTIFACT_DIR}/junit_integration.xml" --cov-fail-under=10 test-e2e: ## Run end to end tests for the service - uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt + script -q -e -c "uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt" test-e2e-local: ## Run end to end tests for the service uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index 3c198c0a8..3a77fc1c8 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -67,6 +67,7 @@ services: - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} - WATSONX_API_KEY=${WATSONX_API_KEY:-} + - LITELLM_DROP_PARAMS=true # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test diff --git a/docker-compose.yaml b/docker-compose.yaml index 4ee0d30c1..99e744c37 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -59,6 +59,7 @@ services: - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} - WATSONX_API_KEY=${WATSONX_API_KEY:-} + - LITELLM_DROP_PARAMS=true # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml index 25dfe1e22..894e24528 100644 --- a/examples/azure-run.yaml +++ b/examples/azure-run.yaml @@ -23,7 +23,7 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/openai/v1 api_version: 2024-02-15-preview - provider_id: openai provider_type: remote::openai @@ -50,14 +50,17 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -105,7 +108,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG + type: kv_sqlite + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -130,10 +136,21 @@ registered_resources: provider_id: azure model_type: llm provider_model_id: gpt-4o-mini + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} datasets: [] scoring_fns: [] benchmarks: [] @@ -144,6 +161,6 @@ vector_stores: default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 safety: default_shield_id: llama-guard diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml index 6ce7cbdad..6a49e350f 100644 --- a/examples/vertexai-run.yaml +++ b/examples/vertexai-run.yaml @@ -50,14 +50,17 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -105,7 +108,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG + type: kv_sqlite + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -125,11 +131,22 @@ storage: namespace: prompts backend: kv_default registered_resources: - models: [] + models: + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} datasets: [] scoring_fns: [] benchmarks: [] @@ -140,8 +157,6 @@ vector_stores: default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 safety: default_shield_id: llama-guard -telemetry: - enabled: true diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml index c848e2ce2..ec7c988c4 100644 --- a/examples/watsonx-run.yaml +++ b/examples/watsonx-run.yaml @@ -22,7 +22,7 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=key-not-set} project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} timeout: 1200 @@ -61,11 +61,14 @@ providers: - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: # Define the storage backend for RAG persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -111,12 +114,15 @@ server: port: 8321 storage: backends: - kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + kv_default: + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -138,11 +144,21 @@ registered_resources: provider_id: watsonx model_type: llm provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini - vector_stores: [] + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} datasets: [] scoring_fns: [] benchmarks: [] @@ -153,4 +169,4 @@ vector_stores: default_provider_id: faiss default_embedding_model: # Define the default embedding model for RAG provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 diff --git a/src/utils/responses.py b/src/utils/responses.py index 48a20e412..903322e50 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -13,6 +13,9 @@ OpenAIResponseInputMessageContent as InputMessageContent, OpenAIResponseInputMessageContentFile as InputFilePart, OpenAIResponseInputMessageContentText as InputTextPart, + OpenAIResponseInputTool as InputTool, + OpenAIResponseInputToolChoice as ToolChoice, + OpenAIResponseInputToolChoiceMode as ToolChoiceMode, OpenAIResponseInputToolFileSearch as InputToolFileSearch, OpenAIResponseInputToolMCP as InputToolMCP, OpenAIResponseMCPApprovalRequest as MCPApprovalRequest, @@ -28,17 +31,14 @@ OpenAIResponseOutputMessageMCPListTools as MCPListTools, OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall, OpenAIResponseUsage as ResponseUsage, - OpenAIResponseInputTool as InputTool, OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails, OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails, - OpenAIResponseInputToolChoiceMode as ToolChoiceMode, - OpenAIResponseInputToolChoice as ToolChoice, ) from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient -from client import AsyncLlamaStackClientHolder import constants import metrics +from client import AsyncLlamaStackClientHolder from configuration import configuration from constants import DEFAULT_RAG_TOOL from log import get_logger @@ -334,7 +334,6 @@ async def prepare_responses_params( # pylint: disable=too-many-arguments,too-ma # Build x-llamastack-provider-data header from MCP tool headers extra_headers = _build_provider_data_headers(tools) - return ResponsesApiParams( input=input_text, model=model, @@ -1006,6 +1005,12 @@ async def check_model_configured( for model in models: if model.id == model_id: return True + + # Workaround to llama-stack watsonx bug + if model_id.startswith("watsonx/") and model.id == model_id.removeprefix( + "watsonx/" + ): + return True return False except APIStatusError as e: response = InternalServerErrorResponse.generic() @@ -1080,6 +1085,14 @@ async def select_model_for_responses( model = llm_models[0] logger.info("Selected first LLM model: %s", model.id) + + # Workaround to llama-stack bug for watsonx + # model needs to be "watsonx/" in the response request + metadata = model.custom_metadata or {} + if metadata.get("provider_id") == "watsonx": + provider_resource_id = metadata.get("provider_resource_id") + if isinstance(provider_resource_id, str): + return provider_resource_id return model.id diff --git a/test.containerfile b/test.containerfile index ecfc54313..884fd8525 100644 --- a/test.containerfile +++ b/test.containerfile @@ -20,7 +20,8 @@ COPY src ./src RUN uv sync --locked --no-install-project --group llslibdev # Add virtual environment to PATH for llama command -ENV PATH="/opt/app-root/.venv/bin:$PATH" +ENV PATH="/opt/app-root/.venv/bin:$PATH" \ + PYTHONPATH="/opt/app-root/src" # Set HOME directory so llama-stack uses /opt/app-root/src/.llama ENV HOME="/opt/app-root/src" diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index bca3e4583..ffa265fa2 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -23,8 +23,9 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/openai/v1 api_version: 2024-02-15-preview + allowed_models: ["gpt-4o-mini"] - provider_id: openai provider_type: remote::openai config: @@ -50,9 +51,12 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index d37720c91..8e613bec0 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -26,6 +26,10 @@ providers: api_token: ${env.RHAIIS_API_KEY} tls_verify: false max_tokens: 2048 + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} - config: {} provider_id: sentence-transformers provider_type: inline::sentence-transformers @@ -142,7 +146,7 @@ registered_resources: shields: - shield_id: llama-guard provider_id: llama-guard - provider_shield_id: vllm/${env.RHAIIS_MODEL} + provider_shield_id: openai/gpt-4o-mini vector_stores: - embedding_dimension: 768 embedding_model: sentence-transformers/all-mpnet-base-v2 diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml index bfa69b40a..6a49e350f 100644 --- a/tests/e2e/configs/run-vertexai.yaml +++ b/tests/e2e/configs/run-vertexai.yaml @@ -50,9 +50,12 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml index 4f02853f7..fdf26b2dc 100644 --- a/tests/e2e/configs/run-watsonx.yaml +++ b/tests/e2e/configs/run-watsonx.yaml @@ -22,7 +22,7 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=key-not-set} project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} timeout: 1200 @@ -61,6 +61,9 @@ providers: - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: # Define the storage backend for RAG persistence: @@ -137,10 +140,6 @@ storage: backend: kv_default registered_resources: models: - - model_id: custom-watsonx-model - provider_id: watsonx - model_type: llm - provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct - model_id: all-mpnet-base-v2 model_type: embedding provider_id: sentence-transformers @@ -156,7 +155,6 @@ registered_resources: embedding_model: sentence-transformers/all-mpnet-base-v2 provider_id: faiss vector_store_id: ${env.FAISS_VECTOR_STORE_ID} - vector_stores: [] datasets: [] scoring_fns: [] benchmarks: [] diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py index 580250bff..ec80b0d28 100644 --- a/tests/e2e/utils/utils.py +++ b/tests/e2e/utils/utils.py @@ -73,7 +73,7 @@ def validate_json(message: Any, schema: Any) -> None: assert False, "The provided schema is faulty:" + str(e) -def wait_for_container_health(container_name: str, max_attempts: int = 3) -> None: +def wait_for_container_health(container_name: str, max_attempts: int = 6) -> None: """Wait for container to be healthy. Polls a Docker container until its health status becomes `healthy` or the