From 91b10993b128c25de9afd64af4fbb5336af55072 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:56:32 +0100 Subject: [PATCH 01/16] Run tests on push --- .github/workflows/e2e_tests_providers.yaml | 7 ++++--- .github/workflows/e2e_tests_rhaiis.yaml | 7 ++++--- .github/workflows/e2e_tests_rhelai.yaml | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml index 82886d6a2..5e44a765e 100644 --- a/.github/workflows/e2e_tests_providers.yaml +++ b/.github/workflows/e2e_tests_providers.yaml @@ -2,9 +2,10 @@ name: E2E Inference Provider Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: e2e_tests: diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 54a0080e2..652a23807 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -2,9 +2,10 @@ name: RHAIIS E2E Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml index c9717bf62..6b03f8755 100644 --- a/.github/workflows/e2e_tests_rhelai.yaml +++ b/.github/workflows/e2e_tests_rhelai.yaml @@ -2,9 +2,10 @@ name: RHEL AI E2E Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: e2e_tests: From cd2429de295064ed24bcf295a422bb4b56577b06 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:58:27 +0100 Subject: [PATCH 02/16] Fix vertex ai --- tests/e2e/configs/run-vertexai.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml index bfa69b40a..6a49e350f 100644 --- a/tests/e2e/configs/run-vertexai.yaml +++ b/tests/e2e/configs/run-vertexai.yaml @@ -50,9 +50,12 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: From 1cb8cbd9119eeb9707a1540acdd9bc5d65ff88ca Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 09:58:48 +0100 Subject: [PATCH 03/16] Fix constants module not found --- test.containerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test.containerfile b/test.containerfile index ecfc54313..884fd8525 100644 --- a/test.containerfile +++ b/test.containerfile @@ -20,7 +20,8 @@ COPY src ./src RUN uv sync --locked --no-install-project --group llslibdev # Add virtual environment to PATH for llama command -ENV PATH="/opt/app-root/.venv/bin:$PATH" +ENV PATH="/opt/app-root/.venv/bin:$PATH" \ + PYTHONPATH="/opt/app-root/src" # Set HOME directory so llama-stack uses /opt/app-root/src/.llama ENV HOME="/opt/app-root/src" From 00d15fe5d5e4ef2a8295624da980827987dc315d Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:36:59 +0100 Subject: [PATCH 04/16] Fix watsonx, azure config change --- examples/azure-run.yaml | 2 +- examples/watsonx-run.yaml | 2 +- tests/e2e/configs/run-azure.yaml | 2 +- tests/e2e/configs/run-watsonx.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml index 25dfe1e22..bc23c008d 100644 --- a/examples/azure-run.yaml +++ b/examples/azure-run.yaml @@ -23,7 +23,7 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/ api_version: 2024-02-15-preview - provider_id: openai provider_type: remote::openai diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml index c848e2ce2..e40579f4d 100644 --- a/examples/watsonx-run.yaml +++ b/examples/watsonx-run.yaml @@ -22,7 +22,7 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=key-not-set} project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} timeout: 1200 diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index bca3e4583..d4779b111 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -23,7 +23,7 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - api_base: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/ api_version: 2024-02-15-preview - provider_id: openai provider_type: remote::openai diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml index 4f02853f7..3392189e7 100644 --- a/tests/e2e/configs/run-watsonx.yaml +++ b/tests/e2e/configs/run-watsonx.yaml @@ -22,7 +22,7 @@ providers: - provider_id: watsonx provider_type: remote::watsonx config: - url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} + base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=key-not-set} project_id: ${env.WATSONX_PROJECT_ID:=project-not-set} timeout: 1200 From 7fe5a4b79bf9cef7464bea2d6f7cd16e2c7aadf5 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:07:12 +0100 Subject: [PATCH 05/16] Fix watson and vertex --- tests/e2e/configs/run-azure.yaml | 5 ++++- tests/e2e/configs/run-watsonx.yaml | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index d4779b111..e09a9fb3a 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -50,9 +50,12 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml index 3392189e7..44af4f367 100644 --- a/tests/e2e/configs/run-watsonx.yaml +++ b/tests/e2e/configs/run-watsonx.yaml @@ -61,6 +61,9 @@ providers: - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: # Define the storage backend for RAG persistence: From e29492c38290e8a7c151a66dbe6f25ad5954ebf6 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:58:22 +0100 Subject: [PATCH 06/16] Fix Azure --- examples/azure-run.yaml | 27 ++++++++++++++++++++++----- examples/vertexai-run.yaml | 29 ++++++++++++++++++++++------- examples/watsonx-run.yaml | 27 ++++++++++++++++++++++----- tests/e2e/configs/run-azure.yaml | 2 +- 4 files changed, 67 insertions(+), 18 deletions(-) diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml index bc23c008d..894e24528 100644 --- a/examples/azure-run.yaml +++ b/examples/azure-run.yaml @@ -23,7 +23,7 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - base_url: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/openai/v1 api_version: 2024-02-15-preview - provider_id: openai provider_type: remote::openai @@ -50,14 +50,17 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -105,7 +108,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG + type: kv_sqlite + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -130,10 +136,21 @@ registered_resources: provider_id: azure model_type: llm provider_model_id: gpt-4o-mini + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} datasets: [] scoring_fns: [] benchmarks: [] @@ -144,6 +161,6 @@ vector_stores: default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 safety: default_shield_id: llama-guard diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml index 6ce7cbdad..6a49e350f 100644 --- a/examples/vertexai-run.yaml +++ b/examples/vertexai-run.yaml @@ -50,14 +50,17 @@ providers: provider_id: basic provider_type: inline::basic tool_runtime: - - config: {} + - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -105,7 +108,10 @@ storage: backends: kv_default: type: kv_sqlite - db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG + type: kv_sqlite + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} @@ -125,11 +131,22 @@ storage: namespace: prompts backend: kv_default registered_resources: - models: [] + models: + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} datasets: [] scoring_fns: [] benchmarks: [] @@ -140,8 +157,6 @@ vector_stores: default_provider_id: faiss default_embedding_model: provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 safety: default_shield_id: llama-guard -telemetry: - enabled: true diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml index e40579f4d..44af4f367 100644 --- a/examples/watsonx-run.yaml +++ b/examples/watsonx-run.yaml @@ -61,11 +61,14 @@ providers: - config: {} # Enable the RAG tool provider_id: rag-runtime provider_type: inline::rag-runtime + - config: {} # Enable MCP (Model Context Protocol) support + provider_id: model-context-protocol + provider_type: remote::model-context-protocol vector_io: - config: # Define the storage backend for RAG persistence: namespace: vector_io::faiss - backend: kv_default + backend: kv_rag provider_id: faiss provider_type: inline::faiss agents: @@ -111,12 +114,15 @@ server: port: 8321 storage: backends: - kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks + kv_default: + type: kv_sqlite + db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db} + kv_rag: # Define the storage backend type for RAG type: kv_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} sql_default: type: sql_sqlite - db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db} + db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db} stores: metadata: namespace: registry @@ -138,10 +144,21 @@ registered_resources: provider_id: watsonx model_type: llm provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct + - model_id: all-mpnet-base-v2 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: all-mpnet-base-v2 + metadata: + embedding_dimension: 768 shields: - shield_id: llama-guard provider_id: llama-guard provider_shield_id: openai/gpt-4o-mini + vector_stores: + - embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: faiss + vector_store_id: ${env.FAISS_VECTOR_STORE_ID} vector_stores: [] datasets: [] scoring_fns: [] @@ -153,4 +170,4 @@ vector_stores: default_provider_id: faiss default_embedding_model: # Define the default embedding model for RAG provider_id: sentence-transformers - model_id: nomic-ai/nomic-embed-text-v1.5 + model_id: all-mpnet-base-v2 diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index e09a9fb3a..894e24528 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -23,7 +23,7 @@ providers: provider_type: remote::azure config: api_key: ${env.AZURE_API_KEY} - base_url: https://ols-test.openai.azure.com/ + base_url: https://ols-test.openai.azure.com/openai/v1 api_version: 2024-02-15-preview - provider_id: openai provider_type: remote::openai From 621b5c895b615f172d83bf6cbf727d8058822214 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:21:53 +0100 Subject: [PATCH 07/16] Fix watsonx provider --- .github/workflows/e2e_tests_providers.yaml | 2 +- .github/workflows/e2e_tests_rhaiis.yaml | 7 +++---- .github/workflows/e2e_tests_rhelai.yaml | 7 +++---- Makefile | 2 +- docker-compose-library.yaml | 1 + docker-compose.yaml | 1 + examples/watsonx-run.yaml | 1 - src/utils/responses.py | 14 ++++++++++---- tests/e2e/configs/run-watsonx.yaml | 5 ----- 9 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml index 5e44a765e..d0e5fd2a9 100644 --- a/.github/workflows/e2e_tests_providers.yaml +++ b/.github/workflows/e2e_tests_providers.yaml @@ -264,7 +264,7 @@ jobs: - name: Set watsonx test overrides if: matrix.environment == 'watsonx' run: | - echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV + echo "E2E_DEFAULT_MODEL_OVERRIDE=meta-llama/llama-4-maverick-17b-128e-instruct-fp8" >> $GITHUB_ENV echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV - name: Run e2e tests diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 652a23807..54a0080e2 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -2,10 +2,9 @@ name: RHAIIS E2E Tests on: - push - # schedule: - # - cron: "0 0 * * *" # Runs once a day at midnight UTC - # workflow_dispatch: + schedule: + - cron: "0 0 * * *" # Runs once a day at midnight UTC + workflow_dispatch: jobs: diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml index 6b03f8755..c9717bf62 100644 --- a/.github/workflows/e2e_tests_rhelai.yaml +++ b/.github/workflows/e2e_tests_rhelai.yaml @@ -2,10 +2,9 @@ name: RHEL AI E2E Tests on: - push - # schedule: - # - cron: "0 0 * * *" # Runs once a day at midnight UTC - # workflow_dispatch: + schedule: + - cron: "0 0 * * *" # Runs once a day at midnight UTC + workflow_dispatch: jobs: e2e_tests: diff --git a/Makefile b/Makefile index e9ec83739..a2db5a2df 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ test-integration: ## Run integration tests tests COVERAGE_FILE="${ARTIFACT_DIR}/.coverage.integration" uv run python -m pytest tests/integration --cov=src --cov-report term-missing --cov-report "json:${ARTIFACT_DIR}/coverage_integration.json" --junit-xml="${ARTIFACT_DIR}/junit_integration.xml" --cov-fail-under=10 test-e2e: ## Run end to end tests for the service - uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt + script -q -e -c "uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt" test-e2e-local: ## Run end to end tests for the service uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index 3c198c0a8..3a77fc1c8 100644 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -67,6 +67,7 @@ services: - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} - WATSONX_API_KEY=${WATSONX_API_KEY:-} + - LITELLM_DROP_PARAMS=true # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test diff --git a/docker-compose.yaml b/docker-compose.yaml index 4ee0d30c1..99e744c37 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -59,6 +59,7 @@ services: - WATSONX_BASE_URL=${WATSONX_BASE_URL:-} - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-} - WATSONX_API_KEY=${WATSONX_API_KEY:-} + - LITELLM_DROP_PARAMS=true # Enable debug logging if needed - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml index 44af4f367..ec7c988c4 100644 --- a/examples/watsonx-run.yaml +++ b/examples/watsonx-run.yaml @@ -159,7 +159,6 @@ registered_resources: embedding_model: sentence-transformers/all-mpnet-base-v2 provider_id: faiss vector_store_id: ${env.FAISS_VECTOR_STORE_ID} - vector_stores: [] datasets: [] scoring_fns: [] benchmarks: [] diff --git a/src/utils/responses.py b/src/utils/responses.py index 48a20e412..f49f3f8b1 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -13,6 +13,9 @@ OpenAIResponseInputMessageContent as InputMessageContent, OpenAIResponseInputMessageContentFile as InputFilePart, OpenAIResponseInputMessageContentText as InputTextPart, + OpenAIResponseInputTool as InputTool, + OpenAIResponseInputToolChoice as ToolChoice, + OpenAIResponseInputToolChoiceMode as ToolChoiceMode, OpenAIResponseInputToolFileSearch as InputToolFileSearch, OpenAIResponseInputToolMCP as InputToolMCP, OpenAIResponseMCPApprovalRequest as MCPApprovalRequest, @@ -28,17 +31,14 @@ OpenAIResponseOutputMessageMCPListTools as MCPListTools, OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall, OpenAIResponseUsage as ResponseUsage, - OpenAIResponseInputTool as InputTool, OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails, OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails, - OpenAIResponseInputToolChoiceMode as ToolChoiceMode, - OpenAIResponseInputToolChoice as ToolChoice, ) from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient -from client import AsyncLlamaStackClientHolder import constants import metrics +from client import AsyncLlamaStackClientHolder from configuration import configuration from constants import DEFAULT_RAG_TOOL from log import get_logger @@ -1006,6 +1006,12 @@ async def check_model_configured( for model in models: if model.id == model_id: return True + # Workaround to llama-stack bug + # TODO(are-ces): fix upstream + if model_id.startswith("watsonx/") and model.id == model_id.removeprefix( + "watsonx/" + ): + return True return False except APIStatusError as e: response = InternalServerErrorResponse.generic() diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml index 44af4f367..fdf26b2dc 100644 --- a/tests/e2e/configs/run-watsonx.yaml +++ b/tests/e2e/configs/run-watsonx.yaml @@ -140,10 +140,6 @@ storage: backend: kv_default registered_resources: models: - - model_id: custom-watsonx-model - provider_id: watsonx - model_type: llm - provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct - model_id: all-mpnet-base-v2 model_type: embedding provider_id: sentence-transformers @@ -159,7 +155,6 @@ registered_resources: embedding_model: sentence-transformers/all-mpnet-base-v2 provider_id: faiss vector_store_id: ${env.FAISS_VECTOR_STORE_ID} - vector_stores: [] datasets: [] scoring_fns: [] benchmarks: [] From 0aa2612725d0fcc914ab673aa9b535bf9aaaa891 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:31:41 +0100 Subject: [PATCH 08/16] Add allowed_models to Azure provider config Co-Authored-By: Claude Opus 4.6 --- tests/e2e/configs/run-azure.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml index 894e24528..ffa265fa2 100644 --- a/tests/e2e/configs/run-azure.yaml +++ b/tests/e2e/configs/run-azure.yaml @@ -25,6 +25,7 @@ providers: api_key: ${env.AZURE_API_KEY} base_url: https://ols-test.openai.azure.com/openai/v1 api_version: 2024-02-15-preview + allowed_models: ["gpt-4o-mini"] - provider_id: openai provider_type: remote::openai config: From 0f3f5c8220e63195e1524389331eb40f8f07921f Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 24 Mar 2026 10:57:57 +0100 Subject: [PATCH 09/16] Increase default health check attempts after container restart Bump max_attempts in wait_for_container_health from 3 to 6 (30s instead of 15s) to prevent ConnectionResetError after config switch restarts in library mode. Co-Authored-By: Claude Opus 4.6 --- tests/e2e/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py index 580250bff..ec80b0d28 100644 --- a/tests/e2e/utils/utils.py +++ b/tests/e2e/utils/utils.py @@ -73,7 +73,7 @@ def validate_json(message: Any, schema: Any) -> None: assert False, "The provided schema is faulty:" + str(e) -def wait_for_container_health(container_name: str, max_attempts: int = 3) -> None: +def wait_for_container_health(container_name: str, max_attempts: int = 6) -> None: """Wait for container to be healthy. Polls a Docker container until its health status becomes `healthy` or the From f4a8e56d8300cb9658b6ed89672bc304a903e768 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 24 Mar 2026 13:20:11 +0100 Subject: [PATCH 10/16] Add watsonx model selection workaround in select_model_for_responses WatsonX models are registered without the provider prefix in llama-stack. Add workaround to return provider_resource_id when the selected model is a watsonx model. Co-Authored-By: Claude Opus 4.6 --- src/utils/responses.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/utils/responses.py b/src/utils/responses.py index f49f3f8b1..903322e50 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -334,7 +334,6 @@ async def prepare_responses_params( # pylint: disable=too-many-arguments,too-ma # Build x-llamastack-provider-data header from MCP tool headers extra_headers = _build_provider_data_headers(tools) - return ResponsesApiParams( input=input_text, model=model, @@ -1006,8 +1005,8 @@ async def check_model_configured( for model in models: if model.id == model_id: return True - # Workaround to llama-stack bug - # TODO(are-ces): fix upstream + + # Workaround to llama-stack watsonx bug if model_id.startswith("watsonx/") and model.id == model_id.removeprefix( "watsonx/" ): @@ -1086,6 +1085,14 @@ async def select_model_for_responses( model = llm_models[0] logger.info("Selected first LLM model: %s", model.id) + + # Workaround to llama-stack bug for watsonx + # model needs to be "watsonx/" in the response request + metadata = model.custom_metadata or {} + if metadata.get("provider_id") == "watsonx": + provider_resource_id = metadata.get("provider_resource_id") + if isinstance(provider_resource_id, str): + return provider_resource_id return model.id From 1aafc714c0108a067504c76b78bdc2d5bb81c21b Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Wed, 25 Mar 2026 09:32:34 +0100 Subject: [PATCH 11/16] Run watsonx server mode e2e tests after library mode Add a 40-minute wait for watsonx server mode to avoid concurrent requests hitting the Lite plan rate limit. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/e2e_tests_providers.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml index d0e5fd2a9..0673f262e 100644 --- a/.github/workflows/e2e_tests_providers.yaml +++ b/.github/workflows/e2e_tests_providers.yaml @@ -260,6 +260,12 @@ jobs: exit 1 } + # Wait for watsonx library mode to finish before running server mode + # watsonx has a rate limit of 2 calls / second + - name: Wait for watsonx library mode to finish + if: matrix.environment == 'watsonx' && matrix.mode == 'server' + run: sleep 2400 # 40 minutes + # watsonx has a different convention than "/" - name: Set watsonx test overrides if: matrix.environment == 'watsonx' From 5351082e0b606989f2c9cb99a3ada06b3e47d673 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Wed, 25 Mar 2026 10:28:10 +0100 Subject: [PATCH 12/16] Enable RHAIIS e2e tests on push Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/e2e_tests_rhaiis.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 54a0080e2..652a23807 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -2,9 +2,10 @@ name: RHAIIS E2E Tests on: - schedule: - - cron: "0 0 * * *" # Runs once a day at midnight UTC - workflow_dispatch: + push + # schedule: + # - cron: "0 0 * * *" # Runs once a day at midnight UTC + # workflow_dispatch: jobs: From e00a9000ad2d327c20a3f62a8ed3051d3f755ca5 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Wed, 25 Mar 2026 12:49:33 +0100 Subject: [PATCH 13/16] Add OpenAI provider to rhaiis config and use it for llama-guard Co-Authored-By: Claude Sonnet 4.6 --- tests/e2e/configs/run-rhaiis.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index d37720c91..8e613bec0 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -26,6 +26,10 @@ providers: api_token: ${env.RHAIIS_API_KEY} tls_verify: false max_tokens: 2048 + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} - config: {} provider_id: sentence-transformers provider_type: inline::sentence-transformers @@ -142,7 +146,7 @@ registered_resources: shields: - shield_id: llama-guard provider_id: llama-guard - provider_shield_id: vllm/${env.RHAIIS_MODEL} + provider_shield_id: openai/gpt-4o-mini vector_stores: - embedding_dimension: 768 embedding_model: sentence-transformers/all-mpnet-base-v2 From 3ae9527bd64be9c09f0c211fec81cb295cac9d02 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:39:11 +0100 Subject: [PATCH 14/16] Register openai/gpt-4o-mini model in rhaiis config The library-mode default inference uses openai/gpt-4o-mini but the model was not registered in Llama Stack, causing requests to be misrouted to the vLLM provider which rejects them as a non-chat model. Co-Authored-By: Claude Opus 4.6 --- tests/e2e/configs/run-rhaiis.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index 8e613bec0..9a76eae9f 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -137,6 +137,10 @@ registered_resources: provider_id: vllm model_type: llm provider_model_id: ${env.RHAIIS_MODEL} + - model_id: openai/gpt-4o-mini + provider_id: openai + model_type: llm + provider_model_id: gpt-4o-mini - model_id: all-mpnet-base-v2 model_type: embedding provider_id: sentence-transformers From a4f15638c76ee903d6d5519c08db7babfb9bfdd5 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:49:28 +0100 Subject: [PATCH 15/16] Revert "Register openai/gpt-4o-mini model in rhaiis config" This reverts commit 3ae9527bd64be9c09f0c211fec81cb295cac9d02. --- tests/e2e/configs/run-rhaiis.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml index 9a76eae9f..8e613bec0 100644 --- a/tests/e2e/configs/run-rhaiis.yaml +++ b/tests/e2e/configs/run-rhaiis.yaml @@ -137,10 +137,6 @@ registered_resources: provider_id: vllm model_type: llm provider_model_id: ${env.RHAIIS_MODEL} - - model_id: openai/gpt-4o-mini - provider_id: openai - model_type: llm - provider_model_id: gpt-4o-mini - model_id: all-mpnet-base-v2 model_type: embedding provider_id: sentence-transformers From 6b16a6ddf92a9ce900688f962bd6f1d9a9969430 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Thu, 26 Mar 2026 17:51:39 +0100 Subject: [PATCH 16/16] Fix e2e model detection in library mode for RHAIIS In library mode, Llama Stack auto-discovers 80 OpenAI models and the test framework picks babbage-002 (a completions-only model) as the default, causing all query tests to fail with "not a chat model". Set E2E_DEFAULT_MODEL_OVERRIDE and E2E_DEFAULT_PROVIDER_OVERRIDE to bypass auto-detection and use the correct RHAIIS vLLM model. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/e2e_tests_rhaiis.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml index 652a23807..85b2f2e56 100644 --- a/.github/workflows/e2e_tests_rhaiis.yaml +++ b/.github/workflows/e2e_tests_rhaiis.yaml @@ -27,6 +27,8 @@ jobs: RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }} RHAIIS_MODEL: ${{ vars.RHAIIS_MODEL }} FAISS_VECTOR_STORE_ID: ${{ vars.FAISS_VECTOR_STORE_ID }} + E2E_DEFAULT_MODEL_OVERRIDE: ${{ vars.RHAIIS_MODEL }} + E2E_DEFAULT_PROVIDER_OVERRIDE: vllm steps: - uses: actions/checkout@v4