From 91b10993b128c25de9afd64af4fbb5336af55072 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 09:56:32 +0100
Subject: [PATCH 01/16] Run tests on push

---
 .github/workflows/e2e_tests_providers.yaml | 7 ++++---
 .github/workflows/e2e_tests_rhaiis.yaml    | 7 ++++---
 .github/workflows/e2e_tests_rhelai.yaml    | 7 ++++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml
index 82886d6a2..5e44a765e 100644
--- a/.github/workflows/e2e_tests_providers.yaml
+++ b/.github/workflows/e2e_tests_providers.yaml
@@ -2,9 +2,10 @@
 name: E2E Inference Provider Tests
 
 on: 
-  schedule:
-    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  workflow_dispatch:
+  push
+  # schedule:
+  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  # workflow_dispatch:
 
 jobs:
   e2e_tests:
diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
index 54a0080e2..652a23807 100644
--- a/.github/workflows/e2e_tests_rhaiis.yaml
+++ b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -2,9 +2,10 @@
 name: RHAIIS E2E Tests
 
 on:
-  schedule:
-    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  workflow_dispatch:
+  push
+  # schedule:
+  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  # workflow_dispatch:
 
 
 jobs:
diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml
index c9717bf62..6b03f8755 100644
--- a/.github/workflows/e2e_tests_rhelai.yaml
+++ b/.github/workflows/e2e_tests_rhelai.yaml
@@ -2,9 +2,10 @@
 name: RHEL AI E2E Tests
 
 on:
-  schedule:
-    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  workflow_dispatch:
+  push
+  # schedule:
+  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  # workflow_dispatch:
 
 jobs:
   e2e_tests:

From cd2429de295064ed24bcf295a422bb4b56577b06 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 09:58:27 +0100
Subject: [PATCH 02/16] Fix vertex ai

---
 tests/e2e/configs/run-vertexai.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/configs/run-vertexai.yaml b/tests/e2e/configs/run-vertexai.yaml
index bfa69b40a..6a49e350f 100644
--- a/tests/e2e/configs/run-vertexai.yaml
+++ b/tests/e2e/configs/run-vertexai.yaml
@@ -50,9 +50,12 @@ providers:
     provider_id: basic
     provider_type: inline::basic
   tool_runtime:
-  - config: {}
+  - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config:
       persistence:

From 1cb8cbd9119eeb9707a1540acdd9bc5d65ff88ca Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 09:58:48 +0100
Subject: [PATCH 03/16] Fix constants module not found

---
 test.containerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test.containerfile b/test.containerfile
index ecfc54313..884fd8525 100644
--- a/test.containerfile
+++ b/test.containerfile
@@ -20,7 +20,8 @@ COPY src ./src
 RUN uv sync --locked --no-install-project --group llslibdev
 
 # Add virtual environment to PATH for llama command
-ENV PATH="/opt/app-root/.venv/bin:$PATH"
+ENV PATH="/opt/app-root/.venv/bin:$PATH" \
+    PYTHONPATH="/opt/app-root/src"
 
 # Set HOME directory so llama-stack uses /opt/app-root/src/.llama
 ENV HOME="/opt/app-root/src"

From 00d15fe5d5e4ef2a8295624da980827987dc315d Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 10:36:59 +0100
Subject: [PATCH 04/16] Fix watsonx, azure config change

---
 examples/azure-run.yaml            | 2 +-
 examples/watsonx-run.yaml          | 2 +-
 tests/e2e/configs/run-azure.yaml   | 2 +-
 tests/e2e/configs/run-watsonx.yaml | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml
index 25dfe1e22..bc23c008d 100644
--- a/examples/azure-run.yaml
+++ b/examples/azure-run.yaml
@@ -23,7 +23,7 @@ providers:
     provider_type: remote::azure
     config: 
       api_key: ${env.AZURE_API_KEY}
-      api_base: https://ols-test.openai.azure.com/
+      base_url: https://ols-test.openai.azure.com/
       api_version: 2024-02-15-preview
   - provider_id: openai
     provider_type: remote::openai
diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml
index c848e2ce2..e40579f4d 100644
--- a/examples/watsonx-run.yaml
+++ b/examples/watsonx-run.yaml
@@ -22,7 +22,7 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=key-not-set}
       project_id: ${env.WATSONX_PROJECT_ID:=project-not-set}
       timeout: 1200
diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml
index bca3e4583..d4779b111 100644
--- a/tests/e2e/configs/run-azure.yaml
+++ b/tests/e2e/configs/run-azure.yaml
@@ -23,7 +23,7 @@ providers:
     provider_type: remote::azure
     config: 
       api_key: ${env.AZURE_API_KEY}
-      api_base: https://ols-test.openai.azure.com/
+      base_url: https://ols-test.openai.azure.com/
       api_version: 2024-02-15-preview
   - provider_id: openai
     provider_type: remote::openai
diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml
index 4f02853f7..3392189e7 100644
--- a/tests/e2e/configs/run-watsonx.yaml
+++ b/tests/e2e/configs/run-watsonx.yaml
@@ -22,7 +22,7 @@ providers:
   - provider_id: watsonx
     provider_type: remote::watsonx
     config:
-      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
       api_key: ${env.WATSONX_API_KEY:=key-not-set}
       project_id: ${env.WATSONX_PROJECT_ID:=project-not-set}
       timeout: 1200

From 7fe5a4b79bf9cef7464bea2d6f7cd16e2c7aadf5 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 11:07:12 +0100
Subject: [PATCH 05/16] Fix watson and vertex

---
 tests/e2e/configs/run-azure.yaml   | 5 ++++-
 tests/e2e/configs/run-watsonx.yaml | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml
index d4779b111..e09a9fb3a 100644
--- a/tests/e2e/configs/run-azure.yaml
+++ b/tests/e2e/configs/run-azure.yaml
@@ -50,9 +50,12 @@ providers:
     provider_id: basic
     provider_type: inline::basic
   tool_runtime:
-  - config: {}
+  - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config:
       persistence:
diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml
index 3392189e7..44af4f367 100644
--- a/tests/e2e/configs/run-watsonx.yaml
+++ b/tests/e2e/configs/run-watsonx.yaml
@@ -61,6 +61,9 @@ providers:
   - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config: # Define the storage backend for RAG
       persistence:

From e29492c38290e8a7c151a66dbe6f25ad5954ebf6 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 19 Mar 2026 11:58:22 +0100
Subject: [PATCH 06/16] Fix Azure

---
 examples/azure-run.yaml          | 27 ++++++++++++++++++++++-----
 examples/vertexai-run.yaml       | 29 ++++++++++++++++++++++-------
 examples/watsonx-run.yaml        | 27 ++++++++++++++++++++++-----
 tests/e2e/configs/run-azure.yaml |  2 +-
 4 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/examples/azure-run.yaml b/examples/azure-run.yaml
index bc23c008d..894e24528 100644
--- a/examples/azure-run.yaml
+++ b/examples/azure-run.yaml
@@ -23,7 +23,7 @@ providers:
     provider_type: remote::azure
     config: 
       api_key: ${env.AZURE_API_KEY}
-      base_url: https://ols-test.openai.azure.com/
+      base_url: https://ols-test.openai.azure.com/openai/v1
       api_version: 2024-02-15-preview
   - provider_id: openai
     provider_type: remote::openai
@@ -50,14 +50,17 @@ providers:
     provider_id: basic
     provider_type: inline::basic
   tool_runtime:
-  - config: {}
+  - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config:
       persistence:
         namespace: vector_io::faiss
-        backend: kv_default
+        backend: kv_rag
     provider_id: faiss
     provider_type: inline::faiss
   agents:
@@ -105,7 +108,10 @@ storage:
   backends:
     kv_default:
       type: kv_sqlite
-      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db}
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db}
+    kv_rag: # Define the storage backend type for RAG
+      type: kv_sqlite
+      db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db}
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
@@ -130,10 +136,21 @@ registered_resources:
     provider_id: azure
     model_type: llm
     provider_model_id: gpt-4o-mini
+  - model_id: all-mpnet-base-v2
+    model_type: embedding
+    provider_id: sentence-transformers
+    provider_model_id: all-mpnet-base-v2
+    metadata:
+      embedding_dimension: 768
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
     provider_shield_id: openai/gpt-4o-mini
+  vector_stores: 
+  - embedding_dimension: 768
+    embedding_model: sentence-transformers/all-mpnet-base-v2
+    provider_id: faiss
+    vector_store_id: ${env.FAISS_VECTOR_STORE_ID}
   datasets: []
   scoring_fns: []
   benchmarks: []
@@ -144,6 +161,6 @@ vector_stores:
   default_provider_id: faiss
   default_embedding_model:
     provider_id: sentence-transformers
-    model_id: nomic-ai/nomic-embed-text-v1.5
+    model_id: all-mpnet-base-v2
 safety:
   default_shield_id: llama-guard
diff --git a/examples/vertexai-run.yaml b/examples/vertexai-run.yaml
index 6ce7cbdad..6a49e350f 100644
--- a/examples/vertexai-run.yaml
+++ b/examples/vertexai-run.yaml
@@ -50,14 +50,17 @@ providers:
     provider_id: basic
     provider_type: inline::basic
   tool_runtime:
-  - config: {}
+  - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config:
       persistence:
         namespace: vector_io::faiss
-        backend: kv_default
+        backend: kv_rag
     provider_id: faiss
     provider_type: inline::faiss
   agents:
@@ -105,7 +108,10 @@ storage:
   backends:
     kv_default:
       type: kv_sqlite
-      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db}
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db}
+    kv_rag: # Define the storage backend type for RAG
+      type: kv_sqlite
+      db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db}
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
@@ -125,11 +131,22 @@ storage:
       namespace: prompts
       backend: kv_default
 registered_resources:
-  models: []
+  models:
+  - model_id: all-mpnet-base-v2
+    model_type: embedding
+    provider_id: sentence-transformers
+    provider_model_id: all-mpnet-base-v2
+    metadata:
+      embedding_dimension: 768
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
     provider_shield_id: openai/gpt-4o-mini
+  vector_stores: 
+  - embedding_dimension: 768
+    embedding_model: sentence-transformers/all-mpnet-base-v2
+    provider_id: faiss
+    vector_store_id: ${env.FAISS_VECTOR_STORE_ID}
   datasets: []
   scoring_fns: []
   benchmarks: []
@@ -140,8 +157,6 @@ vector_stores:
   default_provider_id: faiss
   default_embedding_model:
     provider_id: sentence-transformers
-    model_id: nomic-ai/nomic-embed-text-v1.5
+    model_id: all-mpnet-base-v2
 safety:
   default_shield_id: llama-guard
-telemetry:
-  enabled: true
diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml
index e40579f4d..44af4f367 100644
--- a/examples/watsonx-run.yaml
+++ b/examples/watsonx-run.yaml
@@ -61,11 +61,14 @@ providers:
   - config: {} # Enable the RAG tool
     provider_id: rag-runtime
     provider_type: inline::rag-runtime
+  - config: {} # Enable MCP (Model Context Protocol) support
+    provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
   vector_io:
   - config: # Define the storage backend for RAG
       persistence:
         namespace: vector_io::faiss
-        backend: kv_default
+        backend: kv_rag
     provider_id: faiss
     provider_type: inline::faiss
   agents:
@@ -111,12 +114,15 @@ server:
   port: 8321
 storage:
   backends:
-    kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/kv_store.db}
+    kv_rag: # Define the storage backend type for RAG
       type: kv_sqlite
-      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/rag/kv_store.db}
+      db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db}
     sql_default:
       type: sql_sqlite
-      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/storage/sql_store.db}
+      db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
   stores:
     metadata:
       namespace: registry
@@ -138,10 +144,21 @@ registered_resources:
     provider_id: watsonx
     model_type: llm
     provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct
+  - model_id: all-mpnet-base-v2
+    model_type: embedding
+    provider_id: sentence-transformers
+    provider_model_id: all-mpnet-base-v2
+    metadata:
+      embedding_dimension: 768
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
     provider_shield_id: openai/gpt-4o-mini
+  vector_stores: 
+  - embedding_dimension: 768
+    embedding_model: sentence-transformers/all-mpnet-base-v2
+    provider_id: faiss
+    vector_store_id: ${env.FAISS_VECTOR_STORE_ID}
   vector_stores: []
   datasets: []
   scoring_fns: []
@@ -153,4 +170,4 @@ vector_stores:
   default_provider_id: faiss
   default_embedding_model: # Define the default embedding model for RAG
     provider_id: sentence-transformers
-    model_id: nomic-ai/nomic-embed-text-v1.5
+    model_id: all-mpnet-base-v2
diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml
index e09a9fb3a..894e24528 100644
--- a/tests/e2e/configs/run-azure.yaml
+++ b/tests/e2e/configs/run-azure.yaml
@@ -23,7 +23,7 @@ providers:
     provider_type: remote::azure
     config: 
       api_key: ${env.AZURE_API_KEY}
-      base_url: https://ols-test.openai.azure.com/
+      base_url: https://ols-test.openai.azure.com/openai/v1
       api_version: 2024-02-15-preview
   - provider_id: openai
     provider_type: remote::openai

From 621b5c895b615f172d83bf6cbf727d8058822214 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Fri, 20 Mar 2026 14:21:53 +0100
Subject: [PATCH 07/16] Fix watsonx provider

---
 .github/workflows/e2e_tests_providers.yaml |  2 +-
 .github/workflows/e2e_tests_rhaiis.yaml    |  7 +++----
 .github/workflows/e2e_tests_rhelai.yaml    |  7 +++----
 Makefile                                   |  2 +-
 docker-compose-library.yaml                |  1 +
 docker-compose.yaml                        |  1 +
 examples/watsonx-run.yaml                  |  1 -
 src/utils/responses.py                     | 14 ++++++++++----
 tests/e2e/configs/run-watsonx.yaml         |  5 -----
 9 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml
index 5e44a765e..d0e5fd2a9 100644
--- a/.github/workflows/e2e_tests_providers.yaml
+++ b/.github/workflows/e2e_tests_providers.yaml
@@ -264,7 +264,7 @@ jobs:
       - name: Set watsonx test overrides
         if: matrix.environment == 'watsonx'
         run: |
-          echo "E2E_DEFAULT_MODEL_OVERRIDE=watsonx/watsonx/meta-llama/llama-3-3-70b-instruct" >> $GITHUB_ENV
+          echo "E2E_DEFAULT_MODEL_OVERRIDE=meta-llama/llama-4-maverick-17b-128e-instruct-fp8" >> $GITHUB_ENV
           echo "E2E_DEFAULT_PROVIDER_OVERRIDE=watsonx" >> $GITHUB_ENV
 
       - name: Run e2e tests
diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
index 652a23807..54a0080e2 100644
--- a/.github/workflows/e2e_tests_rhaiis.yaml
+++ b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -2,10 +2,9 @@
 name: RHAIIS E2E Tests
 
 on:
-  push
-  # schedule:
-  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  # workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  workflow_dispatch:
 
 
 jobs:
diff --git a/.github/workflows/e2e_tests_rhelai.yaml b/.github/workflows/e2e_tests_rhelai.yaml
index 6b03f8755..c9717bf62 100644
--- a/.github/workflows/e2e_tests_rhelai.yaml
+++ b/.github/workflows/e2e_tests_rhelai.yaml
@@ -2,10 +2,9 @@
 name: RHEL AI E2E Tests
 
 on:
-  push
-  # schedule:
-  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  # workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  workflow_dispatch:
 
 jobs:
   e2e_tests:
diff --git a/Makefile b/Makefile
index e9ec83739..a2db5a2df 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ test-integration: ## Run integration tests tests
 	COVERAGE_FILE="${ARTIFACT_DIR}/.coverage.integration" uv run python -m pytest tests/integration --cov=src --cov-report term-missing --cov-report "json:${ARTIFACT_DIR}/coverage_integration.json" --junit-xml="${ARTIFACT_DIR}/junit_integration.xml" --cov-fail-under=10
 
 test-e2e: ## Run end to end tests for the service
-	uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt
+	script -q -e -c "uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt"
 
 test-e2e-local: ## Run end to end tests for the service
 	uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt
diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index 3c198c0a8..3a77fc1c8 100644
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -67,6 +67,7 @@ services:
       - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
       - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
       - WATSONX_API_KEY=${WATSONX_API_KEY:-}
+      - LITELLM_DROP_PARAMS=true
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
       # FAISS test
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 4ee0d30c1..99e744c37 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -59,6 +59,7 @@ services:
       - WATSONX_BASE_URL=${WATSONX_BASE_URL:-}
       - WATSONX_PROJECT_ID=${WATSONX_PROJECT_ID:-}
       - WATSONX_API_KEY=${WATSONX_API_KEY:-}
+      - LITELLM_DROP_PARAMS=true
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
       # FAISS test
diff --git a/examples/watsonx-run.yaml b/examples/watsonx-run.yaml
index 44af4f367..ec7c988c4 100644
--- a/examples/watsonx-run.yaml
+++ b/examples/watsonx-run.yaml
@@ -159,7 +159,6 @@ registered_resources:
     embedding_model: sentence-transformers/all-mpnet-base-v2
     provider_id: faiss
     vector_store_id: ${env.FAISS_VECTOR_STORE_ID}
-  vector_stores: []
   datasets: []
   scoring_fns: []
   benchmarks: []
diff --git a/src/utils/responses.py b/src/utils/responses.py
index 48a20e412..f49f3f8b1 100644
--- a/src/utils/responses.py
+++ b/src/utils/responses.py
@@ -13,6 +13,9 @@
     OpenAIResponseInputMessageContent as InputMessageContent,
     OpenAIResponseInputMessageContentFile as InputFilePart,
     OpenAIResponseInputMessageContentText as InputTextPart,
+    OpenAIResponseInputTool as InputTool,
+    OpenAIResponseInputToolChoice as ToolChoice,
+    OpenAIResponseInputToolChoiceMode as ToolChoiceMode,
     OpenAIResponseInputToolFileSearch as InputToolFileSearch,
     OpenAIResponseInputToolMCP as InputToolMCP,
     OpenAIResponseMCPApprovalRequest as MCPApprovalRequest,
@@ -28,17 +31,14 @@
     OpenAIResponseOutputMessageMCPListTools as MCPListTools,
     OpenAIResponseOutputMessageWebSearchToolCall as WebSearchCall,
     OpenAIResponseUsage as ResponseUsage,
-    OpenAIResponseInputTool as InputTool,
     OpenAIResponseUsageInputTokensDetails as UsageInputTokensDetails,
     OpenAIResponseUsageOutputTokensDetails as UsageOutputTokensDetails,
-    OpenAIResponseInputToolChoiceMode as ToolChoiceMode,
-    OpenAIResponseInputToolChoice as ToolChoice,
 )
 from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
 
-from client import AsyncLlamaStackClientHolder
 import constants
 import metrics
+from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from constants import DEFAULT_RAG_TOOL
 from log import get_logger
@@ -1006,6 +1006,12 @@ async def check_model_configured(
         for model in models:
             if model.id == model_id:
                 return True
+            # Workaround to llama-stack bug
+            # TODO(are-ces): fix upstream
+            if model_id.startswith("watsonx/") and model.id == model_id.removeprefix(
+                "watsonx/"
+            ):
+                return True
         return False
     except APIStatusError as e:
         response = InternalServerErrorResponse.generic()
diff --git a/tests/e2e/configs/run-watsonx.yaml b/tests/e2e/configs/run-watsonx.yaml
index 44af4f367..fdf26b2dc 100644
--- a/tests/e2e/configs/run-watsonx.yaml
+++ b/tests/e2e/configs/run-watsonx.yaml
@@ -140,10 +140,6 @@ storage:
       backend: kv_default
 registered_resources:
   models: 
-  - model_id: custom-watsonx-model
-    provider_id: watsonx
-    model_type: llm
-    provider_model_id: watsonx/meta-llama/llama-3-3-70b-instruct
   - model_id: all-mpnet-base-v2
     model_type: embedding
     provider_id: sentence-transformers
@@ -159,7 +155,6 @@ registered_resources:
     embedding_model: sentence-transformers/all-mpnet-base-v2
     provider_id: faiss
     vector_store_id: ${env.FAISS_VECTOR_STORE_ID}
-  vector_stores: []
   datasets: []
   scoring_fns: []
   benchmarks: []

From 0aa2612725d0fcc914ab673aa9b535bf9aaaa891 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Tue, 24 Mar 2026 10:31:41 +0100
Subject: [PATCH 08/16] Add allowed_models to Azure provider config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e/configs/run-azure.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/e2e/configs/run-azure.yaml b/tests/e2e/configs/run-azure.yaml
index 894e24528..ffa265fa2 100644
--- a/tests/e2e/configs/run-azure.yaml
+++ b/tests/e2e/configs/run-azure.yaml
@@ -25,6 +25,7 @@ providers:
       api_key: ${env.AZURE_API_KEY}
       base_url: https://ols-test.openai.azure.com/openai/v1
       api_version: 2024-02-15-preview
+      allowed_models: ["gpt-4o-mini"]
   - provider_id: openai
     provider_type: remote::openai
     config:

From 0f3f5c8220e63195e1524389331eb40f8f07921f Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Tue, 24 Mar 2026 10:57:57 +0100
Subject: [PATCH 09/16] Increase default health check attempts after container
 restart

Bump max_attempts in wait_for_container_health from 3 to 6 (30s instead
of 15s) to prevent ConnectionResetError after config switch restarts
in library mode.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e/utils/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py
index 580250bff..ec80b0d28 100644
--- a/tests/e2e/utils/utils.py
+++ b/tests/e2e/utils/utils.py
@@ -73,7 +73,7 @@ def validate_json(message: Any, schema: Any) -> None:
         assert False, "The provided schema is faulty:" + str(e)
 
 
-def wait_for_container_health(container_name: str, max_attempts: int = 3) -> None:
+def wait_for_container_health(container_name: str, max_attempts: int = 6) -> None:
     """Wait for container to be healthy.
 
     Polls a Docker container until its health status becomes `healthy` or the

From f4a8e56d8300cb9658b6ed89672bc304a903e768 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Tue, 24 Mar 2026 13:20:11 +0100
Subject: [PATCH 10/16] Add watsonx model selection workaround in
 select_model_for_responses

WatsonX models are registered without the provider prefix in
llama-stack. Add workaround to return provider_resource_id when the
selected model is a watsonx model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/utils/responses.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/utils/responses.py b/src/utils/responses.py
index f49f3f8b1..903322e50 100644
--- a/src/utils/responses.py
+++ b/src/utils/responses.py
@@ -334,7 +334,6 @@ async def prepare_responses_params(  # pylint: disable=too-many-arguments,too-ma
 
     # Build x-llamastack-provider-data header from MCP tool headers
     extra_headers = _build_provider_data_headers(tools)
-
     return ResponsesApiParams(
         input=input_text,
         model=model,
@@ -1006,8 +1005,8 @@ async def check_model_configured(
         for model in models:
             if model.id == model_id:
                 return True
-            # Workaround to llama-stack bug
-            # TODO(are-ces): fix upstream
+            
+            # Workaround to llama-stack watsonx bug
             if model_id.startswith("watsonx/") and model.id == model_id.removeprefix(
                 "watsonx/"
             ):
@@ -1086,6 +1085,14 @@ async def select_model_for_responses(
 
     model = llm_models[0]
     logger.info("Selected first LLM model: %s", model.id)
+
+    # Workaround to llama-stack bug for watsonx
+    # model needs to be "watsonx/<model_id>" in the response request
+    metadata = model.custom_metadata or {}
+    if metadata.get("provider_id") == "watsonx":
+        provider_resource_id = metadata.get("provider_resource_id")
+        if isinstance(provider_resource_id, str):
+            return provider_resource_id
     return model.id
 
 

From 1aafc714c0108a067504c76b78bdc2d5bb81c21b Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Wed, 25 Mar 2026 09:32:34 +0100
Subject: [PATCH 11/16] Run watsonx server mode e2e tests after library mode

Add a 40-minute wait for watsonx server mode to avoid concurrent
requests hitting the Lite plan rate limit.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/e2e_tests_providers.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/e2e_tests_providers.yaml b/.github/workflows/e2e_tests_providers.yaml
index d0e5fd2a9..0673f262e 100644
--- a/.github/workflows/e2e_tests_providers.yaml
+++ b/.github/workflows/e2e_tests_providers.yaml
@@ -260,6 +260,12 @@ jobs:
             exit 1
           }
 
+      # Wait for watsonx library mode to finish before running server mode
+      # watsonx has a rate limit of 2 calls / second
+      - name: Wait for watsonx library mode to finish
+        if: matrix.environment == 'watsonx' && matrix.mode == 'server'
+        run: sleep 2400  # 40 minutes
+
       # watsonx has a different convention than "<provider>/<model>"
       - name: Set watsonx test overrides
         if: matrix.environment == 'watsonx'

From 5351082e0b606989f2c9cb99a3ada06b3e47d673 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Wed, 25 Mar 2026 10:28:10 +0100
Subject: [PATCH 12/16] Enable RHAIIS e2e tests on push

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/e2e_tests_rhaiis.yaml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
index 54a0080e2..652a23807 100644
--- a/.github/workflows/e2e_tests_rhaiis.yaml
+++ b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -2,9 +2,10 @@
 name: RHAIIS E2E Tests
 
 on:
-  schedule:
-    - cron: "0 0 * * *"  # Runs once a day at midnight UTC
-  workflow_dispatch:
+  push
+  # schedule:
+  #   - cron: "0 0 * * *"  # Runs once a day at midnight UTC
+  # workflow_dispatch:
 
 
 jobs:

From e00a9000ad2d327c20a3f62a8ed3051d3f755ca5 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Wed, 25 Mar 2026 12:49:33 +0100
Subject: [PATCH 13/16] Add OpenAI provider to rhaiis config and use it for
 llama-guard

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 tests/e2e/configs/run-rhaiis.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml
index d37720c91..8e613bec0 100644
--- a/tests/e2e/configs/run-rhaiis.yaml
+++ b/tests/e2e/configs/run-rhaiis.yaml
@@ -26,6 +26,10 @@ providers:
       api_token: ${env.RHAIIS_API_KEY}
       tls_verify: false
       max_tokens: 2048
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY}
   - config: {}
     provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
@@ -142,7 +146,7 @@ registered_resources:
   shields:
   - shield_id: llama-guard
     provider_id: llama-guard
-    provider_shield_id: vllm/${env.RHAIIS_MODEL}
+    provider_shield_id: openai/gpt-4o-mini
   vector_stores: 
   - embedding_dimension: 768
     embedding_model: sentence-transformers/all-mpnet-base-v2

From 3ae9527bd64be9c09f0c211fec81cb295cac9d02 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 26 Mar 2026 17:39:11 +0100
Subject: [PATCH 14/16] Register openai/gpt-4o-mini model in rhaiis config

The library-mode default inference uses openai/gpt-4o-mini but the model
was not registered in Llama Stack, causing requests to be misrouted to
the vLLM provider which rejects them as a non-chat model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/e2e/configs/run-rhaiis.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml
index 8e613bec0..9a76eae9f 100644
--- a/tests/e2e/configs/run-rhaiis.yaml
+++ b/tests/e2e/configs/run-rhaiis.yaml
@@ -137,6 +137,10 @@ registered_resources:
     provider_id: vllm
     model_type: llm
     provider_model_id: ${env.RHAIIS_MODEL}
+  - model_id: openai/gpt-4o-mini
+    provider_id: openai
+    model_type: llm
+    provider_model_id: gpt-4o-mini
   - model_id: all-mpnet-base-v2
     model_type: embedding
     provider_id: sentence-transformers

From a4f15638c76ee903d6d5519c08db7babfb9bfdd5 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 26 Mar 2026 17:49:28 +0100
Subject: [PATCH 15/16] Revert "Register openai/gpt-4o-mini model in rhaiis
 config"

This reverts commit 3ae9527bd64be9c09f0c211fec81cb295cac9d02.
---
 tests/e2e/configs/run-rhaiis.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/e2e/configs/run-rhaiis.yaml b/tests/e2e/configs/run-rhaiis.yaml
index 9a76eae9f..8e613bec0 100644
--- a/tests/e2e/configs/run-rhaiis.yaml
+++ b/tests/e2e/configs/run-rhaiis.yaml
@@ -137,10 +137,6 @@ registered_resources:
     provider_id: vllm
     model_type: llm
     provider_model_id: ${env.RHAIIS_MODEL}
-  - model_id: openai/gpt-4o-mini
-    provider_id: openai
-    model_type: llm
-    provider_model_id: gpt-4o-mini
   - model_id: all-mpnet-base-v2
     model_type: embedding
     provider_id: sentence-transformers

From 6b16a6ddf92a9ce900688f962bd6f1d9a9969430 Mon Sep 17 00:00:00 2001
From: are-ces <195810094+are-ces@users.noreply.github.com>
Date: Thu, 26 Mar 2026 17:51:39 +0100
Subject: [PATCH 16/16] Fix e2e model detection in library mode for RHAIIS

In library mode, Llama Stack auto-discovers 80 OpenAI models and the
test framework picks babbage-002 (a completions-only model) as the
default, causing all query tests to fail with "not a chat model".

Set E2E_DEFAULT_MODEL_OVERRIDE and E2E_DEFAULT_PROVIDER_OVERRIDE to
bypass auto-detection and use the correct RHAIIS vLLM model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/e2e_tests_rhaiis.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/e2e_tests_rhaiis.yaml b/.github/workflows/e2e_tests_rhaiis.yaml
index 652a23807..85b2f2e56 100644
--- a/.github/workflows/e2e_tests_rhaiis.yaml
+++ b/.github/workflows/e2e_tests_rhaiis.yaml
@@ -27,6 +27,8 @@ jobs:
       RHAIIS_API_KEY: ${{ secrets.RHAIIS_API_KEY }}
       RHAIIS_MODEL: ${{ vars.RHAIIS_MODEL }}
       FAISS_VECTOR_STORE_ID: ${{ vars.FAISS_VECTOR_STORE_ID }}
+      E2E_DEFAULT_MODEL_OVERRIDE: ${{ vars.RHAIIS_MODEL }}
+      E2E_DEFAULT_PROVIDER_OVERRIDE: vllm
 
     steps:
       - uses: actions/checkout@v4