From e8cf465345821fc21939e9e5b6335c0a725aca6f Mon Sep 17 00:00:00 2001
From: Drew Minnear <dminnear@redhat.com>
Date: Wed, 12 Mar 2025 18:39:47 -0400
Subject: [PATCH] add MS SQL Server as a DB Provider for RAG backend

---
 Makefile                                      |  14 +-
 .../create-gpu-machineset-azure.yaml          |  65 +++
 ...ne-set.yaml => create-gpu-machineset.yaml} |   2 +-
 .../templates/gpu-machineset-azure.j2         |  44 ++
 ...{gpu-machine-sets.j2 => gpu-machineset.j2} |   2 +-
 charts/all/llm-serving-service/Chart.yaml     |  24 --
 .../templates/_helpers.tpl                    |  62 ---
 .../templates/download-model.yaml             |  55 ---
 .../templates/inference-service.yaml          |  37 --
 .../templates/model-pvc.yaml                  |  11 -
 .../templates/serving-runtime.yaml            |  68 ---
 charts/all/llm-serving-service/values.yaml    |   2 -
 charts/all/minio/.helmignore                  |  23 -
 .../all/minio/templates/external-secret.yaml  |  13 -
 charts/all/minio/templates/setup-minio.yaml   | 397 ------------------
 charts/all/minio/values.yaml                  |   6 -
 .../templates/node-feature-discovery.yaml     |   4 +-
 .../nvidia-cluster-policy-config.yaml         |   2 -
 .../all/rag-llm/charts/azure-sql/Chart.yaml   |   6 +
 .../azure-sql/templates/external-secret.yaml  |  36 ++
 .../all/rag-llm/charts/azure-sql/values.yaml  |   7 +
 .../elastic/templates/elasticsearch.yaml      |   2 +-
 .../charts/mssql}/Chart.yaml                  |   7 +-
 .../charts/mssql}/templates/_helpers.tpl      |  28 +-
 .../charts/mssql/templates/deployment.yaml    |  76 ++++
 .../mssql/templates/external-secret.yaml      |  31 ++
 .../charts/mssql/templates/mssqlconfig.yaml   |  25 ++
 .../rag-llm/charts/mssql/templates/pvc.yaml   |  53 +++
 .../charts/mssql/templates/service.yaml       |  16 +
 charts/all/rag-llm/charts/mssql/values.yaml   |  52 +++
 charts/all/rag-llm/files/config.yaml          |   8 +-
 charts/all/rag-llm/templates/deployment.yaml  |  40 +-
 .../templates/populate-vectordb-job.yaml      |  48 ++-
 charts/all/rag-llm/values.yaml                |  64 +--
 charts/all/rhods/Chart.yaml                   |   2 +-
 charts/all/rhods/templates/_helpers.tpl       |  20 +-
 charts/all/rhods/templates/dsc.yaml           |   3 +-
 charts/all/tgis-server/.helmignore            |  23 -
 charts/all/tgis-server/Chart.yaml             |  24 --
 .../all/tgis-server/templates/deployment.yaml |  99 -----
 charts/all/tgis-server/templates/hpa.yaml     |  32 --
 charts/all/tgis-server/templates/pvc.yaml     |  28 --
 charts/all/tgis-server/templates/service.yaml |  20 -
 .../tgis-server/templates/serviceaccount.yaml |  13 -
 charts/all/tgis-server/values.yaml            | 130 ------
 .../.helmignore                               |   0
 charts/all/vllm-inference-service/Chart.yaml  |   6 +
 .../templates/_helpers.tpl                    |  20 +-
 .../templates/accelerator-profile.yaml        |   6 +-
 .../templates/inference-service.yaml          |  60 +++
 .../templates/route.yaml                      |  21 +
 .../templates/serving-runtime.yaml            |  56 +++
 charts/all/vllm-inference-service/values.yaml |  61 +++
 charts/region/.keep                           |   0
 overrides/values-AWS.yaml                     |  26 --
 overrides/values-Azure.yaml                   |  17 +
 overrides/values-IBMCloud.yaml                |  10 -
 values-global.yaml                            |  15 +-
 values-group-one.yaml                         | 106 -----
 values-hub.yaml                               |  82 +---
 values-rag-llm-gitops.yaml                    |   0
 values-secret.yaml.template                   |  21 +-
 62 files changed, 849 insertions(+), 1382 deletions(-)
 create mode 100644 ansible/playbooks/create-gpu-machineset-azure.yaml
 rename ansible/playbooks/{create-gpu-machine-set.yaml => create-gpu-machineset.yaml} (98%)
 create mode 100644 ansible/playbooks/templates/gpu-machineset-azure.j2
 rename ansible/playbooks/templates/{gpu-machine-sets.j2 => gpu-machineset.j2} (99%)
 delete mode 100644 charts/all/llm-serving-service/Chart.yaml
 delete mode 100644 charts/all/llm-serving-service/templates/_helpers.tpl
 delete mode 100644 charts/all/llm-serving-service/templates/download-model.yaml
 delete mode 100644 charts/all/llm-serving-service/templates/inference-service.yaml
 delete mode 100644 charts/all/llm-serving-service/templates/model-pvc.yaml
 delete mode 100644 charts/all/llm-serving-service/templates/serving-runtime.yaml
 delete mode 100644 charts/all/llm-serving-service/values.yaml
 delete mode 100644 charts/all/minio/.helmignore
 delete mode 100644 charts/all/minio/templates/external-secret.yaml
 delete mode 100644 charts/all/minio/templates/setup-minio.yaml
 delete mode 100644 charts/all/minio/values.yaml
 create mode 100644 charts/all/rag-llm/charts/azure-sql/Chart.yaml
 create mode 100644 charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml
 create mode 100644 charts/all/rag-llm/charts/azure-sql/values.yaml
 rename charts/all/{minio => rag-llm/charts/mssql}/Chart.yaml (92%)
 rename charts/all/{minio => rag-llm/charts/mssql}/templates/_helpers.tpl (72%)
 create mode 100644 charts/all/rag-llm/charts/mssql/templates/deployment.yaml
 create mode 100644 charts/all/rag-llm/charts/mssql/templates/external-secret.yaml
 create mode 100644 charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml
 create mode 100644 charts/all/rag-llm/charts/mssql/templates/pvc.yaml
 create mode 100644 charts/all/rag-llm/charts/mssql/templates/service.yaml
 create mode 100644 charts/all/rag-llm/charts/mssql/values.yaml
 delete mode 100644 charts/all/tgis-server/.helmignore
 delete mode 100644 charts/all/tgis-server/Chart.yaml
 delete mode 100644 charts/all/tgis-server/templates/deployment.yaml
 delete mode 100644 charts/all/tgis-server/templates/hpa.yaml
 delete mode 100644 charts/all/tgis-server/templates/pvc.yaml
 delete mode 100644 charts/all/tgis-server/templates/service.yaml
 delete mode 100644 charts/all/tgis-server/templates/serviceaccount.yaml
 delete mode 100644 charts/all/tgis-server/values.yaml
 rename charts/all/{llm-serving-service => vllm-inference-service}/.helmignore (100%)
 create mode 100644 charts/all/vllm-inference-service/Chart.yaml
 rename charts/all/{tgis-server => vllm-inference-service}/templates/_helpers.tpl (70%)
 rename charts/all/{llm-serving-service => vllm-inference-service}/templates/accelerator-profile.yaml (63%)
 create mode 100644 charts/all/vllm-inference-service/templates/inference-service.yaml
 create mode 100644 charts/all/vllm-inference-service/templates/route.yaml
 create mode 100644 charts/all/vllm-inference-service/templates/serving-runtime.yaml
 create mode 100644 charts/all/vllm-inference-service/values.yaml
 delete mode 100644 charts/region/.keep
 delete mode 100644 overrides/values-AWS.yaml
 create mode 100644 overrides/values-Azure.yaml
 delete mode 100644 overrides/values-IBMCloud.yaml
 delete mode 100644 values-group-one.yaml
 delete mode 100644 values-rag-llm-gitops.yaml

diff --git a/Makefile b/Makefile
index 61426ed5..52850dab 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,8 @@
+# Azure gpu vars
+GPU_VM_SIZE ?= Standard_NC8as_T4_v3
+GPU_REPLICAS ?= 1
+OVERRIDE_ZONE ?=
+
 .PHONY: default
 default: help
 
@@ -16,8 +21,13 @@ install: operator-deploy post-install ## installs the pattern and loads the secr
 	@echo "Installed"
 
 .PHONY: create-gpu-machineset
-create-gpu-machineset: ## Creates a gpu machineset
-	ansible-playbook ansible/playbooks/create-gpu-machine-set.yaml
+create-gpu-machineset: ## Creates a gpu machineset for AWS
+	ansible-playbook ansible/playbooks/create-gpu-machineset.yaml
+
+.PHONY: create-gpu-machineset-azure
+create-gpu-machineset-azure: ## Creates an Azure GPU machineset (overrides: GPU_VM_SIZE, GPU_REPLICAS, OVERRIDE_ZONE)
+	ansible-playbook ansible/playbooks/create-gpu-machineset-azure.yaml \
+		-e "gpu_vm_size=$(GPU_VM_SIZE) gpu_replicas=$(GPU_REPLICAS) override_zone=$(OVERRIDE_ZONE)"
 
 .PHONY: post-install
 post-install: ## Post-install tasks
diff --git a/ansible/playbooks/create-gpu-machineset-azure.yaml b/ansible/playbooks/create-gpu-machineset-azure.yaml
new file mode 100644
index 00000000..5a96896d
--- /dev/null
+++ b/ansible/playbooks/create-gpu-machineset-azure.yaml
@@ -0,0 +1,65 @@
+- name: Generate GPU MachineSet for Azure-based clusters
+  hosts: localhost
+  connection: local
+  gather_facts: false
+  vars:
+    gpu_vm_size: Standard_NC8as_T4_v3
+    gpu_replicas: 1
+    override_zone: ""
+    namespace: openshift-machine-api
+  tasks:
+    - name: Get cluster infrastructure object
+      kubernetes.core.k8s_info:
+        api_version: config.openshift.io/v1
+        kind: Infrastructure
+        name: cluster
+      register: infra_info
+
+    - name: Save cluster ID
+      set_fact:
+        cluster_id: "{{ infra_info.resources[0].status.infrastructureName }}"
+
+    - name: Gather all MachineSets
+      kubernetes.core.k8s_info:
+        api_version: machine.openshift.io/v1beta1
+        kind: MachineSet
+        namespace: "{{ namespace }}"
+      register: ms_list
+
+    - name: Pick the first *worker* MachineSet as a template
+      set_fact:
+        base_ms: "{{ item }}"
+      loop: "{{ ms_list.resources | sort(attribute='metadata.name') }}"
+      when:
+        - "'worker' in (item.metadata.labels['machine.openshift.io/cluster-api-machine-role'] | default(''))"
+      run_once: true
+
+    - name: Extract provider-specific details from the base MachineSet
+      set_fact:
+        azure_location: "{{ base_ms.spec.template.spec.providerSpec.value.location }}"
+        base_zone:      "{{ base_ms.spec.template.spec.providerSpec.value.zone }}"
+        resource_group: "{{ base_ms.spec.template.spec.providerSpec.value.resourceGroup }}"
+        network_resource_group: "{{ base_ms.spec.template.spec.providerSpec.value.networkResourceGroup }}"
+        vnet:           "{{ base_ms.spec.template.spec.providerSpec.value.vnet }}"
+        subnet:         "{{ base_ms.spec.template.spec.providerSpec.value.subnet }}"
+        image:          "{{ base_ms.spec.template.spec.providerSpec.value.image }}"
+        os_disk:        "{{ base_ms.spec.template.spec.providerSpec.value.osDisk }}"
+        user_data:      "{{ base_ms.spec.template.spec.providerSpec.value.userDataSecret }}"
+        cred_secret:    "{{ base_ms.spec.template.spec.providerSpec.value.credentialsSecret }}"
+        public_ip:      "{{ base_ms.spec.template.spec.providerSpec.value.publicIP | default(false) }}"
+
+    - name: Decide which availability zone to use
+      set_fact:
+        gpu_zone: "{{ (override_zone | trim) | default(base_zone, true) }}"
+
+    - name: Render GPU MachineSet manifest
+      template:
+        src:  templates/gpu-machineset-azure.j2
+        dest: /tmp/gpu-machineset-azure.yaml
+      vars:
+        ms_name: "nvidia-worker-{{ azure_location | replace(' ', '') }}{{ gpu_zone }}"
+
+    - name: Apply the GPU MachineSet
+      kubernetes.core.k8s:
+        state: present
+        src: /tmp/gpu-machineset-azure.yaml
diff --git a/ansible/playbooks/create-gpu-machine-set.yaml b/ansible/playbooks/create-gpu-machineset.yaml
similarity index 98%
rename from ansible/playbooks/create-gpu-machine-set.yaml
rename to ansible/playbooks/create-gpu-machineset.yaml
index 3e4d894e..62c8d437 100644
--- a/ansible/playbooks/create-gpu-machine-set.yaml
+++ b/ansible/playbooks/create-gpu-machineset.yaml
@@ -72,7 +72,7 @@
 
     - name: "[create-gpu-machine-set] Generate machineset"
       ansible.builtin.template:
-        src: templates/gpu-machine-sets.j2
+        src: templates/gpu-machineset.j2
         dest: /tmp/gpu-machineset.yaml
 
     - name: "[create-gpu-machine-set] Apply machineset to cluster {{ clusterId }}"
diff --git a/ansible/playbooks/templates/gpu-machineset-azure.j2 b/ansible/playbooks/templates/gpu-machineset-azure.j2
new file mode 100644
index 00000000..15cfe224
--- /dev/null
+++ b/ansible/playbooks/templates/gpu-machineset-azure.j2
@@ -0,0 +1,44 @@
+apiVersion: machine.openshift.io/v1beta1
+kind: MachineSet
+metadata:
+  name: {{ ms_name }}
+  namespace: openshift-machine-api
+  labels:
+    machine.openshift.io/cluster-api-cluster: {{ cluster_id }}
+spec:
+  replicas: {{ gpu_replicas }}
+  selector:
+    matchLabels:
+      machine.openshift.io/cluster-api-cluster: {{ cluster_id }}
+      machine.openshift.io/cluster-api-machineset: {{ ms_name }}
+  template:
+    metadata:
+      labels:
+        machine.openshift.io/cluster-api-cluster: {{ cluster_id }}
+        machine.openshift.io/cluster-api-machine-role: worker
+        machine.openshift.io/cluster-api-machine-type: worker
+        machine.openshift.io/cluster-api-machineset: {{ ms_name }}
+    spec:
+      taints:
+        - key: odh-notebook
+          value: "true"
+          effect: NoSchedule
+      metadata:
+        labels:
+          node-role.kubernetes.io/odh-notebook: ''
+      providerSpec:
+        value:
+          apiVersion: machine.openshift.io/v1beta1
+          kind: AzureMachineProviderSpec
+          credentialsSecret: {{ cred_secret | to_json }}
+          location: {{ azure_location | to_json }}
+          zone: {{ gpu_zone | to_json }}
+          resourceGroup: {{ resource_group | to_json }}
+          networkResourceGroup: {{ network_resource_group | to_json }}
+          vnet: {{ vnet | to_json }}
+          subnet: {{ subnet | to_json }}
+          vmSize: {{ gpu_vm_size | to_json }}
+          image: {{ image | to_json }}
+          osDisk: {{ os_disk | to_json }}
+          publicIP: {{ public_ip | to_json }}
+          userDataSecret: {{ user_data | to_json }}
diff --git a/ansible/playbooks/templates/gpu-machine-sets.j2 b/ansible/playbooks/templates/gpu-machineset.j2
similarity index 99%
rename from ansible/playbooks/templates/gpu-machine-sets.j2
rename to ansible/playbooks/templates/gpu-machineset.j2
index 00ef4063..801ad2ea 100644
--- a/ansible/playbooks/templates/gpu-machine-sets.j2
+++ b/ansible/playbooks/templates/gpu-machineset.j2
@@ -11,7 +11,7 @@ metadata:
   name: {{ clusterId }}-gpu-{{ cloudRegion }}
   namespace: openshift-machine-api
 spec:
-  replicas: 3
+  replicas: 1
   selector:
     matchLabels:
       machine.openshift.io/cluster-api-cluster: {{ clusterId }}
diff --git a/charts/all/llm-serving-service/Chart.yaml b/charts/all/llm-serving-service/Chart.yaml
deleted file mode 100644
index 2cd9a568..00000000
--- a/charts/all/llm-serving-service/Chart.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-apiVersion: v2
-name: llm-service
-description: A Helm chart for Kubernetes
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
-type: application
-
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
diff --git a/charts/all/llm-serving-service/templates/_helpers.tpl b/charts/all/llm-serving-service/templates/_helpers.tpl
deleted file mode 100644
index f415bbf2..00000000
--- a/charts/all/llm-serving-service/templates/_helpers.tpl
+++ /dev/null
@@ -1,62 +0,0 @@
-{{/*
-Expand the name of the chart.
-*/}}
-{{- define "minio.name" -}}
-{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Create a default fully qualified app name.
-We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
-If release name contains chart name it will be used as a full name.
-*/}}
-{{- define "minio.fullname" -}}
-{{- if .Values.fullnameOverride }}
-{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- $name := default .Chart.Name .Values.nameOverride }}
-{{- if contains $name .Release.Name }}
-{{- .Release.Name | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
-{{- end }}
-{{- end }}
-{{- end }}
-
-{{/*
-Create chart name and version as used by the chart label.
-*/}}
-{{- define "minio.chart" -}}
-{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Common labels
-*/}}
-{{- define "minio.labels" -}}
-helm.sh/chart: {{ include "minio.chart" . }}
-{{ include "minio.selectorLabels" . }}
-{{- if .Chart.AppVersion }}
-app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
-{{- end }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}
-
-{{/*
-Selector labels
-*/}}
-{{- define "minio.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "minio.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
-
-{{/*
-Create the name of the service account to use
-*/}}
-{{- define "minio.serviceAccountName" -}}
-{{- if .Values.serviceAccount.create }}
-{{- default (include "minio.fullname" .) .Values.serviceAccount.name }}
-{{- else }}
-{{- default "default" .Values.serviceAccount.name }}
-{{- end }}
-{{- end }}
diff --git a/charts/all/llm-serving-service/templates/download-model.yaml b/charts/all/llm-serving-service/templates/download-model.yaml
deleted file mode 100644
index 2c36351f..00000000
--- a/charts/all/llm-serving-service/templates/download-model.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: download-model
-spec:
-  selector: {}
-  template:
-    spec:
-      containers:
-        - args:
-            - -ec
-            - |-
-              pip install huggingface_hub;
-              export HF_HOME=/tmp/cache/
-              cat << 'EOF' | python3
-              from huggingface_hub import snapshot_download
-              from pathlib import Path
-              from huggingface_hub import login
-              import subprocess, os
-
-              # Get the environment variable 'hftoken'
-              hf_token = os.getenv('hftoken')
-              # Get model id
-              modelid = os.getenv('modelId')
-              model_id = modelid.split('/')[-1]
-
-              def run_command(command):
-                  """Run a shell command and check for errors."""
-                  result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
-                  print(result.stdout)
-                  if result.stderr:
-                      print(result.stderr)
-
-              if hf_token is not None and hf_token.strip() != "None":
-                print("hftoken is set.")
-                login(token=hf_token)
-              mistral_models_path = "/cache/models"
-              snapshot_download(repo_id=modelid, local_dir=mistral_models_path)
-              EOF
-          command:
-            - /bin/bash
-          envFrom:
-            - secretRef:
-                name: huggingface-secret
-          image: registry.access.redhat.com/ubi9/python-39
-          imagePullPolicy: IfNotPresent
-          name: download-model
-          volumeMounts:
-            - mountPath: /cache/models
-              name: models
-      volumes:
-        - name: models
-          persistentVolumeClaim:
-            claimName: model-pvc
-      restartPolicy: OnFailure
diff --git a/charts/all/llm-serving-service/templates/inference-service.yaml b/charts/all/llm-serving-service/templates/inference-service.yaml
deleted file mode 100644
index 49490152..00000000
--- a/charts/all/llm-serving-service/templates/inference-service.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-apiVersion: serving.kserve.io/v1beta1
-kind: InferenceService
-metadata:
-  annotations:
-    openshift.io/display-name: ibm-granite-instruct
-    serving.knative.openshift.io/enablePassthrough: 'true'
-    sidecar.istio.io/inject: 'true'
-    sidecar.istio.io/rewriteAppHTTPProbers: 'true'
-  name: ibm-granite-instruct
-  namespace: rag-llm
-  labels:
-    opendatahub.io/dashboard: 'true'
-spec:
-  predictor:
-    annotations:
-      serving.knative.dev/progress-deadline: 30m
-    maxReplicas: 1
-    minReplicas: 1
-    model:
-      modelFormat:
-        name: vLLM
-      name: ''
-      resources:
-        limits:
-          cpu: '8'
-          memory: 10Gi
-          nvidia.com/gpu: '1'
-        requests:
-          cpu: '2'
-          memory: 8Gi
-          nvidia.com/gpu: '1'
-      runtime: ibm-granite-instruct
-    restartPolicy: OnFailure
-    tolerations:
-      - effect: NoSchedule
-        key: odh-notebook
-        operator: Exists
\ No newline at end of file
diff --git a/charts/all/llm-serving-service/templates/model-pvc.yaml b/charts/all/llm-serving-service/templates/model-pvc.yaml
deleted file mode 100644
index 34c31812..00000000
--- a/charts/all/llm-serving-service/templates/model-pvc.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-kind: PersistentVolumeClaim
-apiVersion: v1
-metadata:
-  name: model-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 50Gi
-  volumeMode: Filesystem
\ No newline at end of file
diff --git a/charts/all/llm-serving-service/templates/serving-runtime.yaml b/charts/all/llm-serving-service/templates/serving-runtime.yaml
deleted file mode 100644
index 9ab06125..00000000
--- a/charts/all/llm-serving-service/templates/serving-runtime.yaml
+++ /dev/null
@@ -1,68 +0,0 @@
-apiVersion: serving.kserve.io/v1alpha1
-kind: ServingRuntime
-metadata:
-  annotations:
-    opendatahub.io/accelerator-name: nvidia-gpu
-    opendatahub.io/apiProtocol: REST
-    opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
-    openshift.io/display-name: ibm-granite-instruct
-  name: ibm-granite-instruct
-  namespace: rag-llm
-  labels:
-    opendatahub.io/dashboard: 'true'
-spec:
-  annotations:
-    prometheus.io/path: /metrics
-    prometheus.io/port: '8080'
-  containers:
-    - args:
-        - '--port=8080'
-        - '--model=/cache/models'
-        - '--distributed-executor-backend=mp'
-        - '--served-model-name=ibm-granite-instruct'
-        - '--max-model-len=4096'
-        - '--dtype=half'
-        - '--gpu-memory-utilization'
-        - '0.98'
-        - '--enforce-eager'
-      command:
-        - python
-        - '-m'
-        - vllm.entrypoints.openai.api_server
-      env:
-        - name: HF_HOME
-          value: /cache
-        - name: HF_TOKEN
-          valueFrom:
-            secretKeyRef:
-              key: hftoken
-              name: huggingface-secret
-        - name: MODEL_ID
-          valueFrom:
-            secretKeyRef:
-              key: modelId
-              name: huggingface-secret
-        - name: HF_HUB_OFFLINE
-          value: '0'
-      image: 'quay.io/modh/vllm@sha256:c86ff1e89c86bc9821b75d7f2bbc170b3c13e3ccf538bf543b1110f23e056316'
-      name: kserve-container
-      ports:
-        - containerPort: 8080
-          protocol: TCP
-      volumeMounts:
-        - mountPath: /dev/shm
-          name: shm
-        - mountPath: /cache/models
-          name: models
-  multiModel: false
-  supportedModelFormats:
-    - autoSelect: true
-      name: vLLM
-  volumes:
-    - emptyDir:
-        medium: Memory
-        sizeLimit: 2Gi
-      name: shm
-    - name: models
-      persistentVolumeClaim:
-        claimName: model-pvc
diff --git a/charts/all/llm-serving-service/values.yaml b/charts/all/llm-serving-service/values.yaml
deleted file mode 100644
index 3a23d362..00000000
--- a/charts/all/llm-serving-service/values.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-hfmodel:
-  key: secret/data/hub/hfmodel
diff --git a/charts/all/minio/.helmignore b/charts/all/minio/.helmignore
deleted file mode 100644
index 0e8a0eb3..00000000
--- a/charts/all/minio/.helmignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
diff --git a/charts/all/minio/templates/external-secret.yaml b/charts/all/minio/templates/external-secret.yaml
deleted file mode 100644
index 0317f063..00000000
--- a/charts/all/minio/templates/external-secret.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-apiVersion: "external-secrets.io/v1beta1"
-kind: ExternalSecret
-metadata:
-  name: minio-secret
-  namespace: rag-llm
-spec:
-  refreshInterval: 15s
-  secretStoreRef:
-    name: {{ .Values.secretStore.name }}
-    kind: {{ .Values.secretStore.kind }}
-  dataFrom:
-  - extract:
-      key: {{ .Values.minio.key }}
\ No newline at end of file
diff --git a/charts/all/minio/templates/setup-minio.yaml b/charts/all/minio/templates/setup-minio.yaml
deleted file mode 100644
index c54c8331..00000000
--- a/charts/all/minio/templates/setup-minio.yaml
+++ /dev/null
@@ -1,397 +0,0 @@
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: demo-setup
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRoleBinding
-metadata:
-  name: demo-setup-cluster-reader
-subjects:
-- kind: ServiceAccount
-  name: demo-setup
-  namespace: rag-llm
-roleRef:
-  kind: ClusterRole
-  name: cluster-reader
-  apiGroup: rbac.authorization.k8s.io
----
-apiVersion: rbac.authorization.k8s.io/v1
-kind: RoleBinding
-metadata:
-  name: demo-setup-edit
-roleRef:
-  apiGroup: rbac.authorization.k8s.io
-  kind: ClusterRole
-  name: edit
-subjects:
-  - kind: ServiceAccount
-    name: demo-setup
----
-kind: PersistentVolumeClaim
-apiVersion: v1
-metadata:
-  name: minio-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 50Gi
-  volumeMode: Filesystem
----
-kind: Deployment
-apiVersion: apps/v1
-metadata:
-  name: minio
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: minio
-  template:
-    metadata:
-      creationTimestamp: null
-      labels:
-        app: minio
-    spec:
-      volumes:
-        - name: data
-          persistentVolumeClaim:
-            claimName: minio-pvc
-      containers:
-        - resources:
-            limits:
-              cpu: 250m
-              memory: 1Gi
-            requests:
-              cpu: 20m
-              memory: 100Mi
-          readinessProbe:
-            tcpSocket:
-              port: 9000
-            initialDelaySeconds: 5
-            timeoutSeconds: 1
-            periodSeconds: 5
-            successThreshold: 1
-            failureThreshold: 3
-          terminationMessagePath: /dev/termination-log
-          name: minio
-          livenessProbe:
-            tcpSocket:
-              port: 9000
-            initialDelaySeconds: 30
-            timeoutSeconds: 1
-            periodSeconds: 5
-            successThreshold: 1
-            failureThreshold: 3
-          env:
-            - name: MINIO_ROOT_USER
-              valueFrom:
-                secretKeyRef:
-                  name: minio-secret
-                  key: MINIO_ROOT_USER
-            - name: MINIO_ROOT_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: minio-secret
-                  key: MINIO_ROOT_PASSWORD
-          ports:
-            - containerPort: 9000
-              protocol: TCP
-            - containerPort: 9090
-              protocol: TCP
-          imagePullPolicy: IfNotPresent
-          volumeMounts:
-            - name: data
-              mountPath: /data
-              subPath: minio
-          terminationMessagePolicy: File
-          image: >-
-            quay.io/minio/minio:latest
-          args:
-            - server
-            - /data
-            - --console-address
-            - :9090
-      restartPolicy: Always
-      terminationGracePeriodSeconds: 30
-      dnsPolicy: ClusterFirst
-      securityContext: {}
-      schedulerName: default-scheduler
-  strategy:
-    type: Recreate
-  revisionHistoryLimit: 10
-  progressDeadlineSeconds: 600
----
-kind: Service
-apiVersion: v1
-metadata:
-  name: minio-service
-spec:
-  ipFamilies:
-    - IPv4
-  ports:
-    - name: api
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-    - name: ui
-      protocol: TCP
-      port: 9090
-      targetPort: 9090
-  internalTrafficPolicy: Cluster
-  type: ClusterIP
-  ipFamilyPolicy: SingleStack
-  sessionAffinity: None
-  selector:
-    app: minio
----
-kind: Route
-apiVersion: route.openshift.io/v1
-metadata:
-  name: minio-api
-spec:
-  to:
-    kind: Service
-    name: minio-service
-    weight: 100
-  port:
-    targetPort: api
-  wildcardPolicy: None
-  tls:
-    termination: edge
-    insecureEdgeTerminationPolicy: Redirect
----
-kind: Route
-apiVersion: route.openshift.io/v1
-metadata:
-  name: minio-ui
-spec:
-  to:
-    kind: Service
-    name: minio-service
-    weight: 100
-  port:
-    targetPort: ui
-  wildcardPolicy: None
-  tls:
-    termination: edge
-    insecureEdgeTerminationPolicy: Redirect
-# ---
-# apiVersion: batch/v1
-# kind: Job
-# metadata:
-#   labels:
-#     app.kubernetes.io/component: minio
-#     app.kubernetes.io/instance: minio
-#     app.kubernetes.io/name: minio
-#     app.kubernetes.io/part-of: minio
-#     component: minio
-#   name: create-minio-buckets
-# spec:
-#   selector: {}
-#   template:
-#     metadata:
-#       labels:
-#         app.kubernetes.io/component: minio
-#         app.kubernetes.io/instance: minio
-#         app.kubernetes.io/name: minio
-#         app.kubernetes.io/part-of: minio
-#         component: minio
-#     spec:
-#       containers:
-#         - args:
-#             - -ec
-#             - |-
-#               env | grep MINIO
-#               pip install minio;
-#               cat << 'EOF' | python3
-#               from minio import Minio
-#               import os
-#               client = Minio(
-#                   "minio-service:9000",
-#                   access_key=os.getenv("MINIO_ROOT_USER"),
-#                   secret_key=os.getenv("MINIO_ROOT_PASSWORD"),
-#                   secure=False
-#               )
-#               bucket = 'models'
-              
-#               print('creating models bucket')
-#               if client.bucket_exists(bucket):
-#                 print("bucket 'models' exists")
-#               else:
-#                 client.make_bucket(bucket)
-#                 print("bucket 'models' created successfully")
-#               EOF
-#           command:
-#             - /bin/bash
-#           envFrom:
-#             - secretRef:
-#                 name: minio-secret
-#           image: registry.access.redhat.com/ubi8/python-38
-#           imagePullPolicy: IfNotPresent
-#           name: create-buckets
-#       initContainers:
-#         - args:
-#             - -ec
-#             - |-
-#               echo -n 'Waiting for minio root user secret'
-#               while ! oc get secret minio-secret 2>/dev/null | grep -qF minio-secret; do
-#               echo -n .
-#               sleep 5
-#               done; echo
-
-#               echo -n 'Waiting for minio deployment'
-#               while ! oc get deployment minio 2>/dev/null | grep -qF minio; do
-#                 echo -n .
-#                 sleep 5
-#               done; echo
-#               oc wait --for=condition=available --timeout=60s deployment/minio
-#               sleep 10
-#           command:
-#             - /bin/bash
-#           image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest
-#           imagePullPolicy: IfNotPresent
-#           name: wait-for-minio
-#       restartPolicy: Never
-#       serviceAccountName: demo-setup
-# ---
-# apiVersion: batch/v1
-# kind: Job
-# metadata:
-#   labels:
-#     app.kubernetes.io/component: minio
-#     app.kubernetes.io/instance: minio
-#     app.kubernetes.io/name: minio
-#     app.kubernetes.io/part-of: minio
-#     component: minio
-#   name: load-model-set
-# spec:
-#   selector: {}
-#   template:
-#     metadata:
-#       labels:
-#         app.kubernetes.io/component: minio
-#         app.kubernetes.io/instance: minio
-#         app.kubernetes.io/name: minio
-#         app.kubernetes.io/part-of: minio
-#         component: minio
-#     spec:
-#       containers:
-#         - args:
-#             - -ec
-#             - |-
-#               pip install huggingface_hub boto3;
-#               cat << 'EOF' | python3
-#               import boto3, os, botocore, subprocess
-#               from huggingface_hub import snapshot_download
-#               from pathlib import Path
-#               from huggingface_hub import login
-
-#               # Get the environment variable 'hftoken'
-#               hf_token = os.getenv('hftoken')
-#               # Get model id
-#               modelid = os.getenv('modelId')
-#               model_id = modelid.split('/')[-1]
-
-#               def run_command(command):
-#                   """Run a shell command and check for errors."""
-#                   result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True)
-#                   print(result.stdout)
-#                   if result.stderr:
-#                       print(result.stderr)
-
-#               # Upload Model to bucket
-#               endpoint_url = "http://minio-service:9000"
-#               aws_access_key_id = os.getenv("MINIO_ROOT_USER")
-#               aws_secret_access_key = os.getenv("MINIO_ROOT_PASSWORD")
-#               region_name = "us"
-#               bucket_name = "models"
-
-#               if not all([aws_access_key_id, aws_secret_access_key, endpoint_url, region_name, bucket_name]):
-#                   raise ValueError("One or data connection variables are empty.  "
-#                                   "Please check your data connection to an S3 bucket.")
-
-#               session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
-#                                               aws_secret_access_key=aws_secret_access_key)
-
-#               s3_resource = session.resource(
-#                   's3',
-#                   config=botocore.client.Config(signature_version='s3v4'),
-#                   endpoint_url=endpoint_url,
-#                   region_name=region_name)
-
-#               bucket = s3_resource.Bucket(bucket_name)
-
-#               def upload_directory_to_s3(local_directory, s3_prefix):
-#                   num_files = 0
-#                   for root, dirs, files in os.walk(local_directory):
-#                       for filename in files:
-#                           file_path = os.path.join(root, filename)
-#                           relative_path = os.path.relpath(file_path, local_directory)
-#                           s3_key = os.path.join(s3_prefix, relative_path)
-#                           print(f"{file_path} -> {s3_key}")
-#                           bucket.upload_file(file_path, s3_key)
-#                           num_files += 1
-#                   return num_files
-
-
-#               def list_objects(prefix):
-#                   filter = bucket.objects.filter(Prefix=prefix)
-#                   for obj in filter.all():
-#                       print(obj.key)
-
-#               if hf_token is not None and hf_token.strip() != "None":
-#                 # If 'hftoken' is not None and not empty, execute this code
-#                 print("hftoken is set.")
-#                 login(token=hf_token)
-#               mistral_models_path = Path.home().joinpath(model_id)
-#               mistral_models_path.mkdir(parents=True, exist_ok=True)
-#               snapshot_download(repo_id=modelid, local_dir=mistral_models_path)
-
-#               list_objects("models")
-#               print(model_id)
-#               num_files = upload_directory_to_s3(model_id, "llm-models/" + model_id)
-
-#               if num_files == 0:
-#                   raise ValueError("No files uploaded.")
-
-#               list_objects("models")
-
-#               EOF
-#           command:
-#             - /bin/bash
-#           envFrom:
-#             - secretRef:
-#                 name: minio-secret
-#             - secretRef:
-#                 name: huggingface-secret
-#           image: registry.access.redhat.com/ubi8/python-38
-#           imagePullPolicy: IfNotPresent
-#           name: download-model
-#       initContainers:
-#         - args:
-#             - -ec
-#             - |-
-#               echo -n 'Waiting for minio root user secret'
-#               while ! oc get secret minio-secret 2>/dev/null | grep -qF minio-secret; do
-#               echo -n .
-#               sleep 5
-#               done; echo
-
-#               echo -n 'Waiting for minio deployment'
-#               while ! oc get deployment minio 2>/dev/null | grep -qF minio; do
-#                 echo -n .
-#                 sleep 5
-#               done; echo
-#               oc wait --for=condition=available --timeout=600s deployment/minio
-#               sleep 10
-#           command:
-#             - /bin/bash
-#           image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest
-#           imagePullPolicy: IfNotPresent
-#           name: wait-for-minio
-#       restartPolicy: Never
-#       serviceAccountName: demo-setup
diff --git a/charts/all/minio/values.yaml b/charts/all/minio/values.yaml
deleted file mode 100644
index 6473bd1b..00000000
--- a/charts/all/minio/values.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-secretStore:
-  name: vault-backend
-  kind: ClusterSecretStore
-
-minio:
-  key: secret/data/hub/minio
\ No newline at end of file
diff --git a/charts/all/nfd-config/templates/node-feature-discovery.yaml b/charts/all/nfd-config/templates/node-feature-discovery.yaml
index 484fbad1..de543668 100644
--- a/charts/all/nfd-config/templates/node-feature-discovery.yaml
+++ b/charts/all/nfd-config/templates/node-feature-discovery.yaml
@@ -6,7 +6,7 @@ metadata:
 spec:
   operand:
     image: >-
-      registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9:v{{ .Values.global.clusterVersion }}
+      registry.redhat.io/openshift4/ose-node-feature-discovery{{ ternary "-rhel9" "" (semverCompare ">=4.15.0" .Values.global.clusterVersion) }}:v{{ .Values.global.clusterVersion }}
     servicePort: 12000
   workerConfig:
     configData: |
@@ -24,7 +24,7 @@ spec:
       #    stderrthreshold: 2
       #    v: 0
       #    vmodule:
-      ##   NOTE: the following options are not dynamically run-time 
+      ##   NOTE: the following options are not dynamically run-time
       ##          configurable and require a nfd-worker restart to take effect
       ##          after being changed
       #    logDir:
diff --git a/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml b/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml
index 61ad7972..98c91233 100644
--- a/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml
+++ b/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml
@@ -32,8 +32,6 @@ spec:
     licensingConfig:
       configMapName: ''
       nlsEnabled: false
-    repoConfig:
-      configMapName: ''
     upgradePolicy:
       autoUpgrade: true
       drain:
diff --git a/charts/all/rag-llm/charts/azure-sql/Chart.yaml b/charts/all/rag-llm/charts/azure-sql/Chart.yaml
new file mode 100644
index 00000000..331fe1e3
--- /dev/null
+++ b/charts/all/rag-llm/charts/azure-sql/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: azuresql
+description: Helpers for using Azure SQL server as a RAG DB backend
+type: application
+version: 0.1.0
+appVersion: 0.1.0
diff --git a/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml b/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml
new file mode 100644
index 00000000..da4d6d56
--- /dev/null
+++ b/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml
@@ -0,0 +1,36 @@
+{{- if eq .Values.global.db.type "AZURESQL" }}
+apiVersion: "external-secrets.io/v1beta1"
+kind: ExternalSecret
+metadata:
+  name: azuresql-external-secret
+spec:
+  refreshInterval: 15s
+  secretStoreRef:
+    name: {{ .Values.secretStore.name }}
+    kind: {{ .Values.secretStore.kind }}
+  target:
+    name: azuresql-secret
+    template:
+      type: Opaque
+      engineVersion: v2
+      data:
+        CONNECTION_STRING: >
+          Driver={{ printf "{%s}" .Values.driver }};
+          Server={{ "{{ .server }}" }},1433;
+          Database={{ .Values.databaseName }};
+          UID={{ "{{ .user }}" }};
+          PWD={{ "{{ .password }}" }};
+  data:
+  - secretKey: user
+    remoteRef:
+      key: {{ .Values.secretStore.key }}
+      property: "user"
+  - secretKey: password
+    remoteRef:
+      key: {{ .Values.secretStore.key }}
+      property: "password"
+  - secretKey: server
+    remoteRef:
+      key: {{ .Values.secretStore.key }}
+      property: "server"
+{{- end }}
diff --git a/charts/all/rag-llm/charts/azure-sql/values.yaml b/charts/all/rag-llm/charts/azure-sql/values.yaml
new file mode 100644
index 00000000..1717686a
--- /dev/null
+++ b/charts/all/rag-llm/charts/azure-sql/values.yaml
@@ -0,0 +1,7 @@
+secretStore:
+  name: vault-backend
+  kind: ClusterSecretStore
+  key: secret/data/hub/azuresql
+
+driver: "ODBC Driver 18 for SQL Server"
+databaseName: embeddings
diff --git a/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml b/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml
index 80b9bd12..f20b3f7d 100644
--- a/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml
+++ b/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml
@@ -12,7 +12,7 @@ spec:
   nodeSets:
     - config:
         node.store.allow_mmap: false
-      count: 1
+      count: 3
       name: default
       podTemplate:
         metadata:
diff --git a/charts/all/minio/Chart.yaml b/charts/all/rag-llm/charts/mssql/Chart.yaml
similarity index 92%
rename from charts/all/minio/Chart.yaml
rename to charts/all/rag-llm/charts/mssql/Chart.yaml
index 50fbb849..73d66be1 100644
--- a/charts/all/minio/Chart.yaml
+++ b/charts/all/rag-llm/charts/mssql/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-name: minio
+name: mssql
 description: A Helm chart for Kubernetes
 
 # A chart can be either an 'application' or a 'library' chart.
@@ -15,10 +15,9 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.1.1
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
+appVersion: 1.16.0
diff --git a/charts/all/minio/templates/_helpers.tpl b/charts/all/rag-llm/charts/mssql/templates/_helpers.tpl
similarity index 72%
rename from charts/all/minio/templates/_helpers.tpl
rename to charts/all/rag-llm/charts/mssql/templates/_helpers.tpl
index f415bbf2..5420b1ae 100644
--- a/charts/all/minio/templates/_helpers.tpl
+++ b/charts/all/rag-llm/charts/mssql/templates/_helpers.tpl
@@ -1,7 +1,7 @@
 {{/*
 Expand the name of the chart.
 */}}
-{{- define "minio.name" -}}
+{{- define "mssql.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
@@ -10,7 +10,7 @@ Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
-{{- define "minio.fullname" -}}
+{{- define "mssql.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
 {{/*
 Create chart name and version as used by the chart label.
 */}}
-{{- define "minio.chart" -}}
+{{- define "mssql.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
 Common labels
 */}}
-{{- define "minio.labels" -}}
-helm.sh/chart: {{ include "minio.chart" . }}
-{{ include "minio.selectorLabels" . }}
+{{- define "mssql.labels" -}}
+helm.sh/chart: {{ include "mssql.chart" . }}
+{{ include "mssql.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
@@ -45,18 +45,26 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{/*
 Selector labels
 */}}
-{{- define "minio.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "minio.name" . }}
+{{- define "mssql.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "mssql.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 
 {{/*
 Create the name of the service account to use
 */}}
-{{- define "minio.serviceAccountName" -}}
+{{- define "mssql.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
-{{- default (include "minio.fullname" .) .Values.serviceAccount.name }}
+{{- default (include "mssql.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
 {{- end }}
+
+
+{{/*
+Create the name for the SA password secret key.
+*/}}
+{{- define "mssql.sapassword" -}}
+  sa_password
+{{- end -}}
diff --git a/charts/all/rag-llm/charts/mssql/templates/deployment.yaml b/charts/all/rag-llm/charts/mssql/templates/deployment.yaml
new file mode 100644
index 00000000..a62e67c0
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/templates/deployment.yaml
@@ -0,0 +1,76 @@
+{{- if eq .Values.global.db.type "MSSQL" }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "mssql.fullname" . }}
+  labels:
+    {{- include "mssql.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicas}}
+  selector:
+    matchLabels:
+      {{- include "mssql.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "mssql.selectorLabels" . | nindent 8 }}
+    spec:
+      hostname: {{ .Values.hostname}}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: {{ .Chart.Name }}
+          command:
+            - /bin/bash
+            - -c
+            - cp /var/opt/config/mssql.conf /var/opt/mssql/mssql.conf && /opt/mssql/bin/sqlservr
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          securityContext:
+          {{- toYaml .Values.containerSecurityContext | nindent 12 }}
+          ports:
+            - containerPort: {{ .Values.containers.ports.containerPort}}
+          env:
+           - name: MSSQL_PID
+             value: "{{ .Values.MSSQL_PID.value}}"
+           - name: ACCEPT_EULA
+             value: "{{ .Values.ACCEPT_EULA.value | upper}}"
+           - name: MSSQL_AGENT_ENABLED
+             value: "{{ .Values.MSSQL_AGENT_ENABLED.value}}"
+           - name: SA_PASSWORD
+             valueFrom:
+              secretKeyRef:
+               name: mssql-secret
+               key: SA_PASSWORD
+          volumeMounts:
+           - name: mssqldb
+             mountPath: /var/opt/mssql
+           - name: mssqluserdb
+             mountPath: /var/opt/mssql/userdata
+           - name: mssqllog
+             mountPath: /var/opt/mssql/userlog
+           - name: mssqltemp
+             mountPath: /var/opt/mssql/tempdb
+           - name: mssql-config-volume
+             mountPath: /var/opt/config
+      volumes:
+       - name: mssqldb
+         persistentVolumeClaim:
+          claimName: mssql-data
+       - name: mssqluserdb
+         persistentVolumeClaim:
+          claimName: mssql-userdb
+       - name: mssqllog
+         persistentVolumeClaim:
+          claimName: mssql-log
+       - name: mssqltemp
+         persistentVolumeClaim:
+          claimName: mssql-temp
+       - name: mssql-config-volume
+         configMap:
+          name: mssql-config
+{{- end }}
diff --git a/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml b/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml
new file mode 100644
index 00000000..d67f4394
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml
@@ -0,0 +1,31 @@
+{{- if eq .Values.global.db.type "MSSQL" }}
+apiVersion: "external-secrets.io/v1beta1"
+kind: ExternalSecret
+metadata:
+  name: mssql-external-secret
+spec:
+  refreshInterval: 15s
+  secretStoreRef:
+    name: {{ .Values.secretStore.name }}
+    kind: {{ .Values.secretStore.kind }}
+  target:
+    name: mssql-secret
+    template:
+      type: Opaque
+      engineVersion: v2
+      data:
+        CONNECTION_STRING: >
+          Driver={{ printf "{%s}" .Values.driver }};
+          Server={{ include "mssql.fullname" . }},{{ toString .Values.service.port }};
+          Database={{ .Values.databaseName }};
+          UID=sa;
+          PWD={{ "{{ .sapass }}" }};
+          TrustServerCertificate=yes;
+          Encrypt=no;
+        SA_PASSWORD: "{{ `{{ .sapass }}` }}"
+  data:
+  - secretKey: sapass
+    remoteRef:
+      key: {{ .Values.secretStore.mssqlSecretKey }}
+      property: "sa-pass"
+{{- end }}
diff --git a/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml b/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml
new file mode 100644
index 00000000..f890a62d
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml
@@ -0,0 +1,25 @@
+{{- if eq .Values.global.db.type "MSSQL" }}
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: mssql-config
+data:
+  mssql.conf: |
+    [EULA]
+    accepteula = Y
+    accepteulaml = Y
+
+    [coredump]
+    captureminiandfull = true
+    coredumptype = full
+
+    [hadr]
+    hadrenabled = 1
+
+    [language]
+    lcid = 1033
+
+    [filelocation]
+    defaultdatadir = /var/opt/mssql/userdata
+    defaultlogdir = /var/opt/mssql/userlog
+{{- end }}
diff --git a/charts/all/rag-llm/charts/mssql/templates/pvc.yaml b/charts/all/rag-llm/charts/mssql/templates/pvc.yaml
new file mode 100644
index 00000000..e5e62c1f
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/templates/pvc.yaml
@@ -0,0 +1,53 @@
+{{- if eq .Values.global.db.type "MSSQL" }}
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: mssql-data
+  annotations:
+    volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }}
+spec:
+  accessModes:
+  - {{ .Values.pvc.mssqldataaccessMode | quote}}
+  resources:
+    requests:
+      storage: {{ .Values.pvc.mssqldbsize}}
+---
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: mssql-userdb
+  annotations:
+    volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }}
+spec:
+  accessModes:
+  - {{ .Values.pvc.userdbaccessMode | quote}}
+  resources:
+    requests:
+      storage: {{ .Values.pvc.userdbsize}}
+---
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: mssql-log
+  annotations:
+    volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }}
+spec:
+  accessModes:
+   - {{ .Values.pvc.userlogaccessMode | quote}}
+  resources:
+    requests:
+      storage: {{ .Values.pvc.userlogsize}}
+---
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  name: mssql-temp
+  annotations:
+    volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }}
+spec:
+  accessModes:
+   - {{ .Values.pvc.tempdbaccessMode | quote}}
+  resources:
+    requests:
+      storage: {{ .Values.pvc.tempsize}}
+{{- end }}
diff --git a/charts/all/rag-llm/charts/mssql/templates/service.yaml b/charts/all/rag-llm/charts/mssql/templates/service.yaml
new file mode 100644
index 00000000..6a98ab74
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/templates/service.yaml
@@ -0,0 +1,16 @@
+{{- if eq .Values.global.db.type "MSSQL" }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mssql.fullname" . }}
+  labels:
+    {{- include "mssql.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: {{ .Values.service.port }}
+      protocol: TCP
+  selector:
+    {{- include "mssql.selectorLabels" . | nindent 4 }}
+{{- end }}
diff --git a/charts/all/rag-llm/charts/mssql/values.yaml b/charts/all/rag-llm/charts/mssql/values.yaml
new file mode 100644
index 00000000..6a92a526
--- /dev/null
+++ b/charts/all/rag-llm/charts/mssql/values.yaml
@@ -0,0 +1,52 @@
+global:
+  storageClass: gp3-csi
+
+secretStore:
+  name: vault-backend
+  kind: ClusterSecretStore
+  mssqlSecretKey: secret/data/hub/mssql
+
+replicas: 1
+
+image:
+  repository: mcr.microsoft.com/mssql/rhel/server
+  tag: 2025-latest
+  pullPolicy: IfNotPresent
+
+ACCEPT_EULA:
+    value: "y"
+MSSQL_PID:
+    value: "Developer"
+MSSQL_AGENT_ENABLED:
+    value: "true"
+hostname: mssqllatest
+containers:
+  ports:
+    containerPort: 1433
+
+podAnnotations: {}
+
+podSecurityContext:
+  fsGroupChangePolicy: OnRootMismatch
+
+containerSecurityContext:
+  capabilities:
+    add:
+      - NET_BIND_SERVICE
+
+service:
+  type: ClusterIP
+  port: 1433
+
+pvc:
+ userdbaccessMode: ReadWriteOnce
+ userdbsize: 5Gi
+ userlogaccessMode: ReadWriteOnce
+ userlogsize: 5Gi
+ tempdbaccessMode: ReadWriteOnce
+ tempsize: 2Gi
+ mssqldataaccessMode: ReadWriteOnce
+ mssqldbsize: 2Gi
+
+driver: "ODBC Driver 18 for SQL Server"
+databaseName: embeddings
diff --git a/charts/all/rag-llm/files/config.yaml b/charts/all/rag-llm/files/config.yaml
index 6a9975aa..8f50809e 100644
--- a/charts/all/rag-llm/files/config.yaml
+++ b/charts/all/rag-llm/files/config.yaml
@@ -2,10 +2,10 @@ llm_providers:
   - name: "OpenShift AI (vLLM)"
     enabled: True
     models:
-      - name: ibm-granite-instruct
+      - name: {{ (split "/" .Values.global.model.vllm)._1 }}
         weight: 1
         enabled: True
-        url: https://ibm-granite-instruct-{{ .Values.llmui.namespace }}.{{ coalesce .Values.global.localClusterDomain .Values.global.hubClusterDomain }}/v1
+        url: {{ printf "https://vllm-inference-service-predictor-%s.%s/v1" .Release.Namespace .Values.global.localClusterDomain }}
         params:
           - name: max_new_tokens
             value: 1024
@@ -22,6 +22,6 @@ llm_providers:
           - name: typical_p
             value: 0.95
 default_provider: "OpenShift AI (vLLM)"
-default_model: {{ .Values.global.model.modelId }}
+default_model: {{ .Values.global.model.vllm }}
 # type values=(default, round_robin,  all)
-type: all
\ No newline at end of file
+type: all
diff --git a/charts/all/rag-llm/templates/deployment.yaml b/charts/all/rag-llm/templates/deployment.yaml
index 08e3b50a..1534476b 100644
--- a/charts/all/rag-llm/templates/deployment.yaml
+++ b/charts/all/rag-llm/templates/deployment.yaml
@@ -36,11 +36,17 @@ spec:
           env:
             - name: HUGGINGFACE_HUB_CACHE
               value: /tmp/.cache
+          {{- range .Values.volumeMounts }}
+          {{- if eq .name "providerconfig" }}
             - name: CONFIG_FILE
-              value: /app-root/config/config.yaml
+              value: {{ .mountPath }}/config.yaml
+          {{- end }}
+          {{- end }}
             - name: APP_TITLE
               value: Talk with your documentation
-        {{- if eq .Values.global.db.type "REDIS" }}
+            - name: EMBEDDING_MODEL
+              value: {{ .Values.global.model.embedding }}
+          {{- if eq .Values.global.db.type "REDIS" }}
             - name: DB_TYPE
               value: REDIS
             - name: REDIS_URL
@@ -49,8 +55,8 @@ spec:
               value: {{ .Values.global.db.index }}
             - name: REDIS_SCHEMA
               value: redis_schema.yaml
-        {{- end }}
-        {{- if eq .Values.global.db.type "EDB" }}
+          {{- end }}
+          {{- if eq .Values.global.db.type "EDB" }}
             - name: DB_TYPE
               value: PGVECTOR
             - name: DB_USERNAME
@@ -87,8 +93,8 @@ spec:
               value: 'postgresql+psycopg://$(DB_USERNAME):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)'
             - name: PGVECTOR_COLLECTION_NAME
               value: {{ .Values.global.db.index }}
-        {{- end }}
-        {{- if eq .Values.global.db.type "ELASTIC" }}
+          {{- end }}
+          {{- if eq .Values.global.db.type "ELASTIC" }}
             - name: DB_TYPE
               value: "ELASTIC"
             - name: ELASTIC_INDEX
@@ -103,6 +109,28 @@ spec:
                   name: es-vectordb-es-elastic-user
                   key: elastic
           {{- end }}
+          {{- if eq .Values.global.db.type "MSSQL" }}
+            - name: DB_TYPE
+              value: MSSQL
+            - name: MSSQL_CONNECTION_STRING
+              valueFrom:
+                secretKeyRef:
+                  name: mssql-secret
+                  key: CONNECTION_STRING
+            - name: MSSQL_TABLE
+              value: {{ .Values.global.db.index }}
+          {{- end }}
+          {{- if eq .Values.global.db.type "AZURESQL" }}
+            - name: DB_TYPE
+              value: MSSQL
+            - name: MSSQL_CONNECTION_STRING
+              valueFrom:
+                secretKeyRef:
+                  name: azuresql-secret
+                  key: CONNECTION_STRING
+            - name: MSSQL_TABLE
+              value: {{ .Values.global.db.index }}
+          {{- end }}
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
diff --git a/charts/all/rag-llm/templates/populate-vectordb-job.yaml b/charts/all/rag-llm/templates/populate-vectordb-job.yaml
index 989fd474..46b0813d 100644
--- a/charts/all/rag-llm/templates/populate-vectordb-job.yaml
+++ b/charts/all/rag-llm/templates/populate-vectordb-job.yaml
@@ -26,16 +26,20 @@ spec:
         imagePullPolicy: {{ .Values.populateDbJob.image.pullPolicy }}
         name: populate-vectordb
         env:
-        - name: TRANSFORMERS_CACHE
-          value: '/cache/.cache'
-        - name: SENTENCE_TRANSFORMERS_HOME
-          value: '/cache/.cache'
-        - name: DOC_GIT_REPO
-          value: {{ .Values.populateDbJob.doc_git_repo | quote }}
-        - name: DOC_LOCATION
-          value:  {{ .Values.populateDbJob.doc_location | quote }}
         - name: TEMP_DIR
-          value: {{ .Values.populateDbJob.doc_dir | quote }}
+          value: {{ .Values.populateDbJob.tempDir }}
+        - name: LOG_LEVEL
+          value: {{ .Values.populateDbJob.logLevel }}
+        - name: REPO_SOURCES
+          value: {{ .Values.populateDbJob.repoSources | toJson | quote }}
+        - name: WEB_SOURCES
+          value: {{ .Values.populateDbJob.webSources | toJson | quote }}
+        - name: CHUNK_SIZE
+          value: {{ .Values.populateDbJob.chunking.size | quote }}
+        - name: CHUNK_OVERLAP
+          value: {{ .Values.populateDbJob.chunking.overlap | quote }}
+        - name: EMBEDDING_MODEL
+          value: {{ .Values.global.model.embedding }}
       {{- if eq .Values.global.db.type "REDIS" }}
         - name: DB_TYPE
           value: "REDIS"
@@ -98,8 +102,32 @@ spec:
             secretKeyRef:
               name: es-vectordb-es-elastic-user
               key: elastic
+      {{- end }}
+      {{- if eq .Values.global.db.type "MSSQL" }}
+        - name: DB_TYPE
+          value: MSSQL
+        - name: MSSQL_CONNECTION_STRING
+          valueFrom:
+            secretKeyRef:
+              name: mssql-secret
+              key: CONNECTION_STRING
+        - name: MSSQL_TABLE
+          value: {{ .Values.global.db.index }}
+      {{- end }}
+      {{- if eq .Values.global.db.type "AZURESQL" }}
+        - name: DB_TYPE
+          value: MSSQL
+        - name: MSSQL_CONNECTION_STRING
+          valueFrom:
+            secretKeyRef:
+              name: azuresql-secret
+              key: CONNECTION_STRING
+        - name: MSSQL_TABLE
+          value: {{ .Values.global.db.index }}
+      {{- end }}
+        {{- if .Values.populateDbJob.command }}
+        command: {{ .Values.populateDbJob.command }}
         {{- end }}
-        command: ["/usr/bin/bash", "/app/entrypoint.sh"]
         {{- if .Values.populateDbJob.args }}
         args: {{ .Values.populateDbJob.args }}
         {{- end }}
diff --git a/charts/all/rag-llm/values.yaml b/charts/all/rag-llm/values.yaml
index b8476cf6..e20629ad 100644
--- a/charts/all/rag-llm/values.yaml
+++ b/charts/all/rag-llm/values.yaml
@@ -1,12 +1,14 @@
-
 global:
-  localClusterDomain: example.com
+  localClusterDomain: apps.example.com
   hubClusterDomain: example.com
   db:
     index: docs
     type: EDB
   model:
-    modelId: ibm-granite/granite-3.1-8b-instruct
+    vllm: ibm-granite/granite-3.3-8b-instruct
+    embedding: sentence-transformers/all-mpnet-base-v2
+    storageClass: gp3-csi
+
 llmui:
   namespace: "rag-llm"
 
@@ -14,10 +16,10 @@ llmui:
 replicaCount: 1
 
 image:
-  repository: 'quay.io/ecosystem-appeng/rag-llm-ui'
-  pullPolicy: IfNotPresent
+  repository: quay.io/dminnear/gradio-tgi-multi-model-rag
+  pullPolicy: Always
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "1.1"
+  tag: latest
 
 imagePullSecrets: []
 nameOverride: ""
@@ -69,7 +71,7 @@ resources:
 
 livenessProbe:
   httpGet:
-    path: /queue/status
+    path: /
     port: http
     scheme: HTTP
   timeoutSeconds: 8
@@ -79,7 +81,7 @@ livenessProbe:
 
 readinessProbe:
   httpGet:
-    path: /queue/status
+    path: /
     port: http
     scheme: HTTP
   timeoutSeconds: 5
@@ -89,7 +91,7 @@ readinessProbe:
 
 startupProbe:
   httpGet:
-    path: /queue/status
+    path: /
     port: http
     scheme: HTTP
   timeoutSeconds: 1
@@ -128,7 +130,7 @@ volumes:
 # Additional volumeMounts on the output Deployment definition.
 volumeMounts:
   - name: providerconfig
-    mountPath: /app-root/config
+    mountPath: /opt/app-root/config
   - name: redis-schema
     mountPath: /opt/app-root/src/redis_schema.yaml
     subPath: redis_schema.yaml
@@ -161,23 +163,35 @@ route:
 populateDbJob:
   ## Job image
   image:
-    repository: "quay.io/ecosystem-appeng/embeddingjob"
-    tag: "0.0.4"
-    pullPolicy: IfNotPresent
-
-  command: ["/usr/bin/bash", "/app/entrypoint.sh"]
-  #args: ["echo 'consuming a message'; sleep 5"]
-
-  ## Define env
-  # env:
+    repository: quay.io/hybridcloudpatterns/vector-embedder
+    tag: latest
+    pullPolicy: Always
 
   ## Job configurations
   backoffLimit: 10
   restartPolicy: Never
 
-  doc_git_repo: https://github.com/RHEcosystemAppEng/llm-on-openshift.git
-  doc_location: examples/notebooks/langchain/rhods-doc
-  doc_dir: /docs
+  # Environment overrides
+  tempDir: /docs
+  logLevel: info
+  repoSources:
+    - repo: https://github.com/RHEcosystemAppEng/llm-on-openshift.git
+      globs:
+        - examples/notebooks/langchain/rhods-doc/*.pdf
+  webSources:
+    - https://ai-on-openshift.io/getting-started/openshift/
+    - https://ai-on-openshift.io/getting-started/opendatahub/
+    - https://ai-on-openshift.io/getting-started/openshift-ai/
+    - https://ai-on-openshift.io/odh-rhoai/configuration/
+    - https://ai-on-openshift.io/odh-rhoai/custom-notebooks/
+    - https://ai-on-openshift.io/odh-rhoai/nvidia-gpus/
+    - https://ai-on-openshift.io/odh-rhoai/custom-runtime-triton/
+    - https://ai-on-openshift.io/odh-rhoai/openshift-group-management/
+    - https://ai-on-openshift.io/tools-and-applications/minio/minio/
+  chunking:
+    size: 1024
+    overlap: 40
+  embeddingModel: sentence-transformers/all-mpnet-base-v2
 
 
   # By default, fullname uses '{{ .Release.Name }}-{{ .Chart.Name }}'. This
@@ -201,7 +215,7 @@ populateDbJob:
   #    memory: 500Mi
 
   securityContext:
-    runAsUser: 
+    runAsUser:
     runAsGroup:
     fsGroup:
 
@@ -210,7 +224,7 @@ populateDbJob:
       name: cache-volume
     - mountPath: /docs/
       name: doc-volume
-  
+
   volumes:
     - name: cache-volume
       emptyDir:
@@ -228,4 +242,4 @@ hfmodel:
 
 # Create NetworkPolicy to allow traffic from all namespaces to allow monitoring. Set to false if monitoring is not needed
 customnetworkpolicy:
-  enabled: true
\ No newline at end of file
+  enabled: true
diff --git a/charts/all/rhods/Chart.yaml b/charts/all/rhods/Chart.yaml
index 50fbb849..a7834eac 100644
--- a/charts/all/rhods/Chart.yaml
+++ b/charts/all/rhods/Chart.yaml
@@ -1,5 +1,5 @@
 apiVersion: v2
-name: minio
+name: rhods
 description: A Helm chart for Kubernetes
 
 # A chart can be either an 'application' or a 'library' chart.
diff --git a/charts/all/rhods/templates/_helpers.tpl b/charts/all/rhods/templates/_helpers.tpl
index f415bbf2..c7539c12 100644
--- a/charts/all/rhods/templates/_helpers.tpl
+++ b/charts/all/rhods/templates/_helpers.tpl
@@ -1,7 +1,7 @@
 {{/*
 Expand the name of the chart.
 */}}
-{{- define "minio.name" -}}
+{{- define "rhods.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
@@ -10,7 +10,7 @@ Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
-{{- define "minio.fullname" -}}
+{{- define "rhods.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
 {{/*
 Create chart name and version as used by the chart label.
 */}}
-{{- define "minio.chart" -}}
+{{- define "rhods.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
 Common labels
 */}}
-{{- define "minio.labels" -}}
-helm.sh/chart: {{ include "minio.chart" . }}
-{{ include "minio.selectorLabels" . }}
+{{- define "rhods.labels" -}}
+helm.sh/chart: {{ include "rhods.chart" . }}
+{{ include "rhods.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
@@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{/*
 Selector labels
 */}}
-{{- define "minio.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "minio.name" . }}
+{{- define "rhods.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "rhods.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 
 {{/*
 Create the name of the service account to use
 */}}
-{{- define "minio.serviceAccountName" -}}
+{{- define "rhods.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
-{{- default (include "minio.fullname" .) .Values.serviceAccount.name }}
+{{- default (include "rhods.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
diff --git a/charts/all/rhods/templates/dsc.yaml b/charts/all/rhods/templates/dsc.yaml
index af226cda..b21fb19c 100644
--- a/charts/all/rhods/templates/dsc.yaml
+++ b/charts/all/rhods/templates/dsc.yaml
@@ -3,7 +3,7 @@ kind: DataScienceCluster
 metadata:
   name: default-dsc
   annotations:
-    argocd.argoproj.io/sync-wave: "20"
+    argocd.argoproj.io/sync-wave: "10"
 spec:
   components:
     dashboard:
@@ -28,3 +28,4 @@ spec:
             type: SelfSigned
         managementState: Managed
         name: knative-serving
+      rawDeploymentServiceConfig: Headed
diff --git a/charts/all/tgis-server/.helmignore b/charts/all/tgis-server/.helmignore
deleted file mode 100644
index 0e8a0eb3..00000000
--- a/charts/all/tgis-server/.helmignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
diff --git a/charts/all/tgis-server/Chart.yaml b/charts/all/tgis-server/Chart.yaml
deleted file mode 100644
index 6fae49d1..00000000
--- a/charts/all/tgis-server/Chart.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-apiVersion: v2
-name: tgis-server
-description: A Helm chart for Kubernetes
-
-# A chart can be either an 'application' or a 'library' chart.
-#
-# Application charts are a collection of templates that can be packaged into versioned archives
-# to be deployed.
-#
-# Library charts provide useful utilities or functions for the chart developer. They're included as
-# a dependency of application charts to inject those utilities and functions into the rendering
-# pipeline. Library charts do not define any templates and therefore cannot be deployed.
-type: application
-
-# This is the chart version. This version number should be incremented each time you make changes
-# to the chart and its templates, including the app version.
-# Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
-
-# This is the version number of the application being deployed. This version number should be
-# incremented each time you make changes to the application. Versions are not expected to
-# follow Semantic Versioning. They should reflect the version the application is using.
-# It is recommended to use it with quotes.
-appVersion: "1.16.0"
diff --git a/charts/all/tgis-server/templates/deployment.yaml b/charts/all/tgis-server/templates/deployment.yaml
deleted file mode 100644
index 7a0fed19..00000000
--- a/charts/all/tgis-server/templates/deployment.yaml
+++ /dev/null
@@ -1,99 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: hf-text-generation-inference-server-model1
-  labels:
-    {{- include "tgis-server.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.autoscaling.enabled }}
-  replicas: {{ .Values.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "tgis-server.selectorLabels" . | nindent 6 }}
-  template:
-    metadata:
-      {{- with .Values.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "tgis-server.labels" . | nindent 8 }}
-        {{- with .Values.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      restartPolicy: Always
-      schedulerName: default-scheduler
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "tgis-server.serviceAccountName" . }}
-      terminationGracePeriodSeconds: 120
-      securityContext:
-        {{- toYaml .Values.podSecurityContext | nindent 8 }}
-      containers:
-        - name: {{ .Chart.Name }}
-          env:
-            - name: MODEL_ID
-              value: {{ .Values.env.MODEL_ID }}
-            - name: MAX_INPUT_LENGTH
-              value: {{ .Values.env.MAX_INPUT_LENGTH | quote }}
-            - name: MAX_TOTAL_TOKENS
-              value:  {{ .Values.env.MAX_TOTAL_TOKENS | quote }}
-            - name: HUGGINGFACE_HUB_CACHE
-              value: {{ .Values.env.HUGGINGFACE_HUB_CACHE }}
-            - name: PORT
-              value: {{ .Values.env.PORT | quote }}
-            - name: HOST
-              value:  {{ .Values.env.HOST | quote }}
-            {{- with .Values.env.HF_TOKEN }}
-            - name: HF_TOKEN
-              {{- toYaml .Values.env.HF_TOKEN | nindent 14 }}
-            {{- end }}
-          securityContext:
-            {{- toYaml .Values.securityContext | nindent 12 }}
-          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.image.pullPolicy }}
-          ports:
-            - name: http
-              containerPort: {{ .Values.service.port }}
-              protocol: TCP
-          livenessProbe:
-            {{- toYaml .Values.livenessProbe | nindent 12 }}
-          readinessProbe:
-            {{- toYaml .Values.readinessProbe | nindent 12 }}
-          resources:
-            {{- toYaml .Values.resources | nindent 12 }}
-          startupProbe:
-            {{- toYaml .Values.livenessProbe | nindent 12 }}
-          volumeMounts:
-            - name: models-cache
-              mountPath: /models-cache
-            - name: shm
-              mountPath: /dev/shm
-          terminationMessagePolicy: File
-      volumes:
-        - name: models-cache
-          persistentVolumeClaim:
-            claimName: {{ include "tgis-server.fullname" . }}
-        - name: shm
-          emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-      {{- with .Values.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      dnsPolicy: ClusterFirst
-      {{- with .Values.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-  strategy:
-    type: Recreate
diff --git a/charts/all/tgis-server/templates/hpa.yaml b/charts/all/tgis-server/templates/hpa.yaml
deleted file mode 100644
index 1f78fa9c..00000000
--- a/charts/all/tgis-server/templates/hpa.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-{{- if .Values.autoscaling.enabled }}
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ include "tgis-server.fullname" . }}
-  labels:
-    {{- include "tgis-server.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "tgis-server.fullname" . }}
-  minReplicas: {{ .Values.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
-  metrics:
-    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
-    {{- end }}
-{{- end }}
diff --git a/charts/all/tgis-server/templates/pvc.yaml b/charts/all/tgis-server/templates/pvc.yaml
deleted file mode 100644
index d0fdc41e..00000000
--- a/charts/all/tgis-server/templates/pvc.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-kind: PersistentVolumeClaim
-apiVersion: v1
-metadata:
-  name: {{ include "tgis-server.fullname" . }}
-  namespace: {{ .Release.Namespace }}
-{{- with .Values.persistence.annotations  }}
-  annotations:
-{{ toYaml . | indent 4 }}
-{{- end }}
-  labels:
-    app: {{ include "tgis-server.fullname" . }}
-    chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
-    release: "{{ .Release.Name }}"
-    heritage: "{{ .Release.Service }}"
-spec:
-  accessModes:
-    - {{ .Values.persistence.accessMode | quote }}
-  volumeMode: {{ .Values.persistence.volumeMode | quote }}
-  resources:
-    requests:
-      storage: {{ .Values.persistence.size | quote }}
-{{- if .Values.persistence.storageClass }}
-{{- if (eq "-" .Values.persistence.storageClass) }}
-  storageClassName: ""
-{{- else }}
-  storageClassName: "{{ .Values.persistence.storageClass }}"
-{{- end }}
-{{- end }}
diff --git a/charts/all/tgis-server/templates/service.yaml b/charts/all/tgis-server/templates/service.yaml
deleted file mode 100644
index 3c68f038..00000000
--- a/charts/all/tgis-server/templates/service.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: hf-text-generation-inference-server-model1
-  labels:
-    {{- include "tgis-server.labels" . | nindent 4 }}
-spec:
-  clusterIP: None
-  ipFamilies:
-    - IPv4
-  type: {{ .Values.service.type }}
-  ports:
-    - port: {{ .Values.service.port }}
-      targetPort: http
-      protocol: TCP
-      name: http
-  internalTrafficPolicy: Cluster
-  selector:
-    {{- include "tgis-server.selectorLabels" . | nindent 4 }}
-
diff --git a/charts/all/tgis-server/templates/serviceaccount.yaml b/charts/all/tgis-server/templates/serviceaccount.yaml
deleted file mode 100644
index d33d948d..00000000
--- a/charts/all/tgis-server/templates/serviceaccount.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-{{- if .Values.serviceAccount.create -}}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "tgis-server.serviceAccountName" . }}
-  labels:
-    {{- include "tgis-server.labels" . | nindent 4 }}
-  {{- with .Values.serviceAccount.annotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
-{{- end }}
diff --git a/charts/all/tgis-server/values.yaml b/charts/all/tgis-server/values.yaml
deleted file mode 100644
index 91bcda39..00000000
--- a/charts/all/tgis-server/values.yaml
+++ /dev/null
@@ -1,130 +0,0 @@
-# Default values for tgis-server.
-# This is a YAML-formatted file.
-# Declare variables to be passed into your templates.
-
-replicaCount: 1
-image:
-  repository: ghcr.io/huggingface/text-generation-inference
-  pullPolicy: IfNotPresent
-  # Overrides the image tag whose default is the chart appVersion.
-  tag: "1.3.3"
-
-imagePullSecrets: []
-nameOverride: ""
-fullnameOverride: ""
-
-serviceAccount:
-  # Specifies whether a service account should be created
-  create: true
-  # Automatically mount a ServiceAccount's API credentials?
-  automount: true
-  # Annotations to add to the service account
-  annotations: {}
-  # The name of the service account to use.
-  # If not set and create is true, a name is generated using the fullname template
-  name: ""
-
-podAnnotations: {}
-podLabels: {}
-
-podSecurityContext: {}
-  # fsGroup: 2000
-
-securityContext:
-  capabilities:
-    drop:
-      - ALL
-  runAsNonRoot: true
-  allowPrivilegeEscalation: false
-  seccompProfile:
-    type: RuntimeDefault
-
-
-service:
-  type: ClusterIP
-  port: 3000
-
-env:
-  MODEL_ID: mistral-community/Mistral-7B-v0.2
-  MAX_INPUT_LENGTH: '1024'
-  MAX_TOTAL_TOKENS: '2048'
-  HUGGINGFACE_HUB_CACHE: /models-cache
-  PORT: '3000'
-  HOST: 0.0.0.0
-  HF_TOKEN: {}
-    # valueFrom:
-    #   secretKeyRef:
-    #     name: hf_token_secret
-    #     key: hf_token
-
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-resources:
-  limits:
-    cpu: '2'
-    memory: 16Gi
-    nvidia.com/gpu: '1'
-  requests:
-    cpu: '2'
-
-livenessProbe:
-  httpGet:
-    path: /health
-    port: http
-    scheme: HTTP
-  timeoutSeconds: 8
-  periodSeconds: 100
-  successThreshold: 1
-  failureThreshold: 3
-
-readinessProbe:
-  httpGet:
-    path: /health
-    port: http
-    scheme: HTTP
-  timeoutSeconds: 5
-  periodSeconds: 30
-  successThreshold: 1
-  failureThreshold: 3
-
-startupProbe:
-  httpGet:
-    path: /health
-    port: http
-    scheme: HTTP
-  timeoutSeconds: 1
-  periodSeconds: 30
-  successThreshold: 1
-  failureThreshold: 24
-
-autoscaling:
-  enabled: false
-  minReplicas: 1
-  maxReplicas: 100
-  targetCPUUtilizationPercentage: 80
-  # targetMemoryUtilizationPercentage: 80
-
-nodeSelector: {}
-
-tolerations:
-  - key: odh-notebook
-    value: 'true'
-    effect: NoSchedule
-
-persistence:
-  accessMode: ReadWriteOnce
-  size: 30Gi
-  annotations: {}
-  volumeMode: Filesystem
-
-affinity: {}
-  # nodeAffinity:
-  #   requiredDuringSchedulingIgnoredDuringExecution:
-  #     nodeSelectorTerms:
-  #       - matchExpressions:
-  #           - key: nvidia.com/gpu.present
-  #             operator: In
-  #             values:
-  #               - "true"
diff --git a/charts/all/llm-serving-service/.helmignore b/charts/all/vllm-inference-service/.helmignore
similarity index 100%
rename from charts/all/llm-serving-service/.helmignore
rename to charts/all/vllm-inference-service/.helmignore
diff --git a/charts/all/vllm-inference-service/Chart.yaml b/charts/all/vllm-inference-service/Chart.yaml
new file mode 100644
index 00000000..12dd68c4
--- /dev/null
+++ b/charts/all/vllm-inference-service/Chart.yaml
@@ -0,0 +1,6 @@
+apiVersion: v2
+name: vllm-inference-service
+description: A Helm chart for Kubernetes
+type: application
+version: 0.1.0
+appVersion: "1.16.0"
diff --git a/charts/all/tgis-server/templates/_helpers.tpl b/charts/all/vllm-inference-service/templates/_helpers.tpl
similarity index 70%
rename from charts/all/tgis-server/templates/_helpers.tpl
rename to charts/all/vllm-inference-service/templates/_helpers.tpl
index 2a68af77..075ef98d 100644
--- a/charts/all/tgis-server/templates/_helpers.tpl
+++ b/charts/all/vllm-inference-service/templates/_helpers.tpl
@@ -1,7 +1,7 @@
 {{/*
 Expand the name of the chart.
 */}}
-{{- define "tgis-server.name" -}}
+{{- define "vllm-inference-service.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
@@ -10,7 +10,7 @@ Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
-{{- define "tgis-server.fullname" -}}
+{{- define "vllm-inference-service.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
@@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name.
 {{/*
 Create chart name and version as used by the chart label.
 */}}
-{{- define "tgis-server.chart" -}}
+{{- define "vllm-inference-service.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 
 {{/*
 Common labels
 */}}
-{{- define "tgis-server.labels" -}}
-helm.sh/chart: {{ include "tgis-server.chart" . }}
-{{ include "tgis-server.selectorLabels" . }}
+{{- define "vllm-inference-service.labels" -}}
+helm.sh/chart: {{ include "vllm-inference-service.chart" . }}
+{{ include "vllm-inference-service.selectorLabels" . }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
@@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{/*
 Selector labels
 */}}
-{{- define "tgis-server.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "tgis-server.name" . }}
+{{- define "vllm-inference-service.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "vllm-inference-service.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
 
 {{/*
 Create the name of the service account to use
 */}}
-{{- define "tgis-server.serviceAccountName" -}}
+{{- define "vllm-inference-service.serviceAccountName" -}}
 {{- if .Values.serviceAccount.create }}
-{{- default (include "tgis-server.fullname" .) .Values.serviceAccount.name }}
+{{- default (include "vllm-inference-service.fullname" .) .Values.serviceAccount.name }}
 {{- else }}
 {{- default "default" .Values.serviceAccount.name }}
 {{- end }}
diff --git a/charts/all/llm-serving-service/templates/accelerator-profile.yaml b/charts/all/vllm-inference-service/templates/accelerator-profile.yaml
similarity index 63%
rename from charts/all/llm-serving-service/templates/accelerator-profile.yaml
rename to charts/all/vllm-inference-service/templates/accelerator-profile.yaml
index 4f26e8e0..ebcafd19 100644
--- a/charts/all/llm-serving-service/templates/accelerator-profile.yaml
+++ b/charts/all/vllm-inference-service/templates/accelerator-profile.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.acceleratorProfile.enabled }}
 apiVersion: dashboard.opendatahub.io/v1
 kind: AcceleratorProfile
 metadata:
@@ -8,6 +9,5 @@ spec:
   enabled: true
   identifier: nvidia.com/gpu
   tolerations:
-  - effect: NoSchedule
-    key: odh-notebook
-    operator: Exists
+    {{- toYaml .Values.vllmInferenceService.tolerations | nindent 4 }}
+{{- end }}
diff --git a/charts/all/vllm-inference-service/templates/inference-service.yaml b/charts/all/vllm-inference-service/templates/inference-service.yaml
new file mode 100644
index 00000000..8d63b817
--- /dev/null
+++ b/charts/all/vllm-inference-service/templates/inference-service.yaml
@@ -0,0 +1,60 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    {{- toYaml .Values.vllmInferenceService.annotations | nindent 4 }}
+  name: {{ include "vllm-inference-service.fullname" . }}
+  labels:
+    opendatahub.io/dashboard: 'true'
+spec:
+  predictor:
+    annotations:
+      {{- toYaml .Values.vllmInferenceService.predictor.annotations | nindent 6 }}
+    maxReplicas: {{ .Values.vllmInferenceService.predictor.replicas }}
+    minReplicas: {{ .Values.vllmInferenceService.predictor.replicas }}
+    model:
+      modelFormat:
+        name: vLLM
+      name: ''
+      resources:
+        {{- toYaml .Values.vllmInferenceService.predictor.resources | nindent 8 }}
+      runtime: {{ include "vllm-inference-service.fullname" . }}
+    restartPolicy: Always
+    tolerations:
+      {{- toYaml .Values.vllmInferenceService.tolerations | nindent 6 }}
+    affinity:
+      {{- toYaml .Values.vllmInferenceService.predictor.affinity | nindent 6 }}
+    initContainers:
+      - name: download-model
+        image: registry.access.redhat.com/ubi9/python-39
+        imagePullPolicy: IfNotPresent
+        command: ["/bin/bash", "-ec"]
+        args:
+          - |
+            pip install --no-cache-dir huggingface_hub
+            python - <<'PY'
+            from huggingface_hub import snapshot_download, login
+            import os
+            token = os.environ.get("HF_TOKEN")
+            model = os.environ.get("MODEL_ID")
+            login(token=token)
+            snapshot_download(
+                repo_id=model,
+                local_dir="/cache/models"
+            )
+            PY
+        env:
+          - name: HF_HOME
+            value: /cache
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: huggingface-secret
+                key: hftoken
+          - name: MODEL_ID
+            value: {{ .Values.global.model.vllm | quote }}
+        volumeMounts:
+          - name: models
+            mountPath: /cache/models
+          - name: cache
+            mountPath: /cache
diff --git a/charts/all/vllm-inference-service/templates/route.yaml b/charts/all/vllm-inference-service/templates/route.yaml
new file mode 100644
index 00000000..dc33f50e
--- /dev/null
+++ b/charts/all/vllm-inference-service/templates/route.yaml
@@ -0,0 +1,21 @@
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: {{ include "vllm-inference-service.fullname" . }}
+  labels:
+    {{- include "vllm-inference-service.labels" . | nindent 4 }}
+  annotations:
+    haproxy.router.openshift.io/timeout: 5m
+    argocd.argoproj.io/sync-wave: "30"
+spec:
+  host: {{ printf "%s-predictor-%s.%s" (include "vllm-inference-service.fullname" .) .Release.Namespace .Values.global.localClusterDomain }}
+  port:
+    targetPort: http
+  tls:
+    insecureEdgeTerminationPolicy: Allow
+    termination: edge
+  to:
+    kind: Service
+    name: {{ printf "%s-predictor" (include "vllm-inference-service.fullname" .) }}
+    weight: 100
+  wildcardPolicy: None
diff --git a/charts/all/vllm-inference-service/templates/serving-runtime.yaml b/charts/all/vllm-inference-service/templates/serving-runtime.yaml
new file mode 100644
index 00000000..9c18f6fd
--- /dev/null
+++ b/charts/all/vllm-inference-service/templates/serving-runtime.yaml
@@ -0,0 +1,56 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    {{- toYaml .Values.vllmServingRuntime.annotations | nindent 4 }}
+  name: {{ include "vllm-inference-service.fullname" . }}
+  labels:
+    opendatahub.io/dashboard: 'true'
+spec:
+  annotations:
+    prometheus.io/path: /metrics
+    prometheus.io/port: '8080'
+  containers:
+    - args:
+        {{- toYaml .Values.vllmServingRuntime.args | nindent 8 }}
+        - {{ printf "--served-model-name=%s" ((split "/" .Values.global.model.vllm)._1) }}
+        - {{ printf "--port=%d" (int .Values.vllmServingRuntime.port) }}
+      command:
+        {{- toYaml .Values.vllmServingRuntime.command | nindent 8 }}
+      env:
+        - name: HF_HOME
+          value: /cache
+        - name: HF_TOKEN
+          valueFrom:
+            secretKeyRef:
+              key: hftoken
+              name: huggingface-secret
+        - name: MODEL_ID
+          value: {{ .Values.global.model.vllm }}
+        - name: HF_HUB_OFFLINE
+          value: '0'
+      image: "{{ .Values.vllmServingRuntime.image.repository }}:{{ .Values.vllmServingRuntime.image.tag }}"
+      name: kserve-container
+      ports:
+        - containerPort: {{ .Values.vllmServingRuntime.port }}
+          protocol: TCP
+      volumeMounts:
+        - mountPath: /dev/shm
+          name: shm
+        - mountPath: /cache/models
+          name: models
+        - mountPath: /cache
+          name: cache
+  multiModel: false
+  supportedModelFormats:
+    - autoSelect: true
+      name: vLLM
+  volumes:
+    - name: shm
+      emptyDir:
+        medium: Memory
+        sizeLimit: 2Gi
+    - name: models
+      emptyDir: {}
+    - name: cache
+      emptyDir: {}
diff --git a/charts/all/vllm-inference-service/values.yaml b/charts/all/vllm-inference-service/values.yaml
new file mode 100644
index 00000000..8131abc1
--- /dev/null
+++ b/charts/all/vllm-inference-service/values.yaml
@@ -0,0 +1,61 @@
+global:
+  model:
+    vllm: ibm-granite/granite-3.3-8b-instruct
+
+vllmInferenceService:
+  annotations:
+    openshift.io/display-name: vllm-inference
+    serving.kserve.io/deploymentMode: RawDeployment
+    argocd.argoproj.io/sync-wave: "20"
+
+  predictor:
+    annotations:
+      serving.knative.dev/progress-deadline: 30m
+    replicas: 1
+    resources:
+      limits:
+        nvidia.com/gpu: '1'
+    affinity:
+      nodeAffinity:
+        requiredDuringSchedulingIgnoredDuringExecution:
+          nodeSelectorTerms:
+            - matchExpressions:
+                - key: nvidia.com/gpu.present
+                  operator: In
+                  values: ["true"]
+
+  tolerations:
+    - effect: NoSchedule
+      key: odh-notebook
+      operator: Exists
+
+vllmServingRuntime:
+  annotations:
+    opendatahub.io/accelerator-name: nvidia-gpu
+    opendatahub.io/apiProtocol: REST
+    opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+    openshift.io/display-name: vllm-inference
+    argocd.argoproj.io/sync-wave: "20"
+
+  args:
+    - "--model=/cache/models"
+    - "--distributed-executor-backend=mp"
+    - "--max-model-len=4096"
+    - "--dtype=half"
+    - "--gpu-memory-utilization"
+    - "0.98"
+    - "--enforce-eager"
+
+  command:
+    - python
+    - '-m'
+    - vllm.entrypoints.openai.api_server
+
+  image:
+    repository: quay.io/modh/vllm
+    tag: rhoai-2.20-cuda
+
+  port: 8080
+
+acceleratorProfile:
+  enabled: true
diff --git a/charts/region/.keep b/charts/region/.keep
deleted file mode 100644
index e69de29b..00000000
diff --git a/overrides/values-AWS.yaml b/overrides/values-AWS.yaml
deleted file mode 100644
index 03fa0775..00000000
--- a/overrides/values-AWS.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-# The following snippet can be commented out in oroder
-# to enable letsencrypt certificates on API endpoint and default
-# ingress of the cluster
-# It is currently very experimental and unsupported.
-# PLEASE read https://github.com/hybrid-cloud-patterns/common/tree/main/letsencrypt#readme
-# for all the limitations around it
-
-
-# letsencrypt:
-#   enabled: true
-#   api_endpoint: true
-#   # FIXME: tweak this to match your region
-#   region: eu-central-1
-#   server: https://acme-v02.api.letsencrypt.org/directory
-#   # server: https://acme-staging-v02.api.letsencrypt.org/directory
-#   # FIXME: set this to your correct email
-#   email: iwashere@iwashere.com
-#
-# clusterGroup:
-#   applications:
-#     letsencrypt:
-#       name: letsencrypt
-#       namespace: letsencrypt
-#       # Using 'default' as that exists everywhere
-#       project: default
-#       path: common/letsencrypt
diff --git a/overrides/values-Azure.yaml b/overrides/values-Azure.yaml
new file mode 100644
index 00000000..ab1af5b0
--- /dev/null
+++ b/overrides/values-Azure.yaml
@@ -0,0 +1,17 @@
+global:
+  db:
+    type: MSSQL
+  model:
+    vllm: solidrust/Mistral-7B-Instruct-v0.3-AWQ
+    embedding: sentence-transformers/distiluse-base-multilingual-cased
+  storageClass: azurefile-csi
+
+vllmServingRuntime:
+  args:
+    - "--model=/cache/models"
+    - "--distributed-executor-backend=mp"
+    - "--max-model-len=4096"
+    - "--quantization=awq"
+    - "--gpu-memory-utilization"
+    - "0.98"
+    - "--enforce-eager"
diff --git a/overrides/values-IBMCloud.yaml b/overrides/values-IBMCloud.yaml
deleted file mode 100644
index 38d7be76..00000000
--- a/overrides/values-IBMCloud.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-# When using IBM ROKS the route certificates are signed by letsencrypt
-# By default the ESO configuration uses the kube-root-ca.crt configmap
-# to validate the connection to vault. Since this configmap will not contain
-# the letsencrypt CA, ESO will be unable to connect to the vault and return an
-# x509 CA unknown error.
-# Uncomment the following if you are using IBM ROKS (IPI installs on IBM Cloud are unaffected)
-
-# golangExternalSecrets:
-#   caProvider:
-#     enabled: false
diff --git a/values-global.yaml b/values-global.yaml
index b07de26c..8943afab 100644
--- a/values-global.yaml
+++ b/values-global.yaml
@@ -5,13 +5,22 @@ global:
     useCSV: false
     syncPolicy: Automatic
     installPlanApproval: Automatic
-# Possible value for db.type = [REDIS, EDB, ELASTIC]
+  # Possible values for RAG vector DB db.type:
+  #   REDIS    -> Redis (Local chart deploy)
+  #   EDB      -> PGVector (Local chart deploy)
+  #   ELASTIC  -> Elasticsearch (Local chart deploy)
+  #   MSSQL    -> MS SQL Server (Local chart deploy)
+  #   AZURESQL -> Azure SQL (Pre-existing in Azure)
   db:
     index: docs
     type: EDB
-# Add for model ID
+  # Models used by the inference service (should be a HuggingFace model ID)
   model:
-      modelId: ibm-granite/granite-3.1-8b-instruct
+    vllm: ibm-granite/granite-3.3-8b-instruct
+    embedding: sentence-transformers/all-mpnet-base-v2
+
+  storageClass: gp3-csi
+
 main:
   clusterGroupName: hub
   multiSourceConfig:
diff --git a/values-group-one.yaml b/values-group-one.yaml
deleted file mode 100644
index 536313f6..00000000
--- a/values-group-one.yaml
+++ /dev/null
@@ -1,106 +0,0 @@
-global:
-  options:
-    useCSV: False
-    syncPolicy: Automatic
-    installPlanApproval: Automatic
-clusterGroup:
-  name: group-one
-  isHubCluster: false
-  namespaces:
-    - rag-llm
-    - golang-external-secrets
-  subscriptions:
-  projects:
-    - eso
-    - rag-llm
-    - llm-monitoring
-  applications:
-    golang-external-secrets:
-      name: golang-external-secrets
-      namespace: golang-external-secrets
-      project: eso
-      path: common/golang-external-secrets
-    rag-llm:
-      name: rag-llm
-      namespace: rag-llm
-      project: rag-llm
-      path: charts/all/rag-llm
-    llm-monitoring:
-      name: llm-monitoring
-      namespace: llm-monitoring
-      project: llm-monitoring
-      kustomize: true
-      path: charts/all/llm-monitoring/kustomize/overlays/dev
-  imperative:
-    # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm
-    # The default schedule is every 10 minutes: imperative.schedule
-    # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds
-    # imagePullPolicy is set to always: imperative.imagePullPolicy
-    # For additional overrides that apply to the jobs, please refer to
-    # https://hybrid-cloud-patterns.io/imperative-actions/#additional-job-customizations
-    jobs:
-      - name: hello-world
-        # ansible playbook to be run
-        playbook: common/ansible/playbooks/hello-world/hello-world.yaml
-        # per playbook timeout in seconds
-        timeout: 234
-        # verbosity: "-v"
-  # Explicitly mention the cluster-state based overrides we plan to use for this pattern.
-  # We can use self-referential variables because the chart calls the tpl function with these variables defined
-  sharedValueFiles:
-    - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml'
-    # To mirror the "Classic" magic include structure, the clusterGroup would need all of these:
-    # sharedValueFiles:
-    #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml'
-    #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.global.clusterVersion }}.yaml'
-    #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.clusterGroup.name }}.yaml'
-    #   - '/overrides/values-{{ $.Values.global.clusterVersion }}-{{ $.Values.clusterGroup.name }}.yaml"
-#  To have apps in multiple flavors, use namespaces and use helm overrides as appropriate
-#
-#    pipelines:
-#      name: pipelines
-#      namespace: production
-#      project: datacenter
-#      path: applications/pipeline
-#      repoURL: https://github.com/you/applications.git
-#      targetRevision: stable
-#      overrides:
-#      - name: myparam
-#        value: myparam
-#
-#    pipelines_staging:
-#    - name: pipelines
-#      namespace: staging
-#      project: datacenter
-#      path: applications/pipeline
-#      repoURL: https://github.com/you/applications.git
-#      targetRevision: main
-#
-#   Additional applications
-#   Be sure to include additional resources your apps will require
-#   +X machines
-#   +Y RAM
-#   +Z CPU
-#    vendor-app:
-#      name: vendor-app
-#      namespace: default
-#      project: vendor
-#      path: path/to/myapp
-#      repoURL: https://github.com/vendor/applications.git
-#      targetRevision: main
-
-#  managedSites:
-#    factory:
-#      name: factory
-#      # repoURL: https://github.com/dagger-refuse-cool/manuela-factory.git
-#      targetRevision: main
-#      path: applications/factory
-#      helmOverrides:
-#      - name: site.isHubCluster
-#        value: false
-#      clusterSelector:
-#        matchExpressions:
-#        - key: vendor
-#          operator: In
-#          values:
-#            - OpenShift
\ No newline at end of file
diff --git a/values-hub.yaml b/values-hub.yaml
index d9c6a8be..1a483092 100644
--- a/values-hub.yaml
+++ b/values-hub.yaml
@@ -1,6 +1,7 @@
 clusterGroup:
   name: hub
   isHubCluster: true
+
   namespaces:
     - open-cluster-management
     - vault
@@ -12,7 +13,7 @@ clusterGroup:
         targetNamespaces: []
     - rag-llm:
         operatorGroup: true
-        targetNamespaces: 
+        targetNamespaces:
           - rag-llm
         labels:
           opendatahub.io/dashboard: "true"
@@ -20,71 +21,43 @@ clusterGroup:
     - openshift-serverless:
         operatorGroup: true
         targetNamespaces: []
+
   subscriptions:
-    # Don't install RHOAI via validated patterns
-    # RHOAI must be installed after service mesh and other dependencies are installed or it will break. 
-    # The RHOAI Application will install the operator after a validation check has passed
     nfd:
       name: nfd
       namespace: openshift-nfd
-      channel: stable
     nvidia:
       name: gpu-operator-certified
       namespace: nvidia-gpu-operator
-      channel: v24.6
       source: certified-operators
     edb:
       name: cloud-native-postgresql
       namespace: openshift-operators
-      channel: stable-v1.23
       source: certified-operators
     elastic:
       name: elasticsearch-eck-operator-certified
       namespace: rag-llm
-      channel: stable
       source: certified-operators
-      sourceNamespace: openshift-marketplace
     serverless:
       name: serverless-operator
       namespace: openshift-serverless
-      channel: stable
     servicemesh:
       name: servicemeshoperator
       namespace: openshift-operators
-      channel: stable
     rhoai:
       name: rhods-operator
       namespace: redhat-ods-operator
-      channel: stable-2.19
-      source: redhat-operators
-      sourceNamespace: openshift-marketplace
+
   projects:
     - hub
     - rag-llm
     - llm-monitoring
     - gpu-config
     - openshift-ai
-  # Explicitly mention the cluster-state based overrides we plan to use for this pattern.
-  # We can use self-referential variables because the chart calls the tpl function with these variables defined
+
   sharedValueFiles:
     - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml'
-    - 'values-rag-llm-gitops.yaml'
-  # sharedValueFiles is a flexible mechanism that will add the listed valuefiles to every app defined in the
-  # applications section. We intend this to supplement and possibly even replace previous "magic" mechanisms, though
-  # we do not at present have a target date for removal.
-  #
-  # To replicate the "classic" magic include structure, the clusterGroup would need all of these
-  # sharedValueFiles, in this order:
-  #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml'
-  #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.global.clusterVersion }}.yaml'
-  #   - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.clusterGroup.name }}.yaml'
-  #   - '/overrides/values-{{ $.Values.global.clusterVersion }}-{{ $.Values.clusterGroup.name }}.yaml"
-  #   - '/overrides/values-{{ $.Values.global.localClusterName }}.yaml'
 
-  # This kind of variable substitution will work with any of the variables the Validated Patterns operator knows
-  # about and sets, so this is also possible, for example:
-  #   - '/overrides/values-{{ $.Values.global.hubClusterDomain }}.yaml'
-  #   - '/overrides/values-{{ $.Values.global.localClusterDomain }}.yaml'
   applications:
     vault:
       name: vault
@@ -98,20 +71,16 @@ clusterGroup:
       project: hub
       chart: golang-external-secrets
       chartVersion: 0.1.*
-    minio:
-      name: minio
-      namespace: rag-llm
-      project: hub
-      path: charts/all/minio
-    llm-serving-service:
-      name: llm-serving-service
+    vllm-inference-service:
+      name: vllm-inference-service
       namespace: rag-llm
       project: hub
-      path: charts/all/llm-serving-service
+      path: charts/all/vllm-inference-service
       syncPolicy:
-        automated: {}
+        automated:
+          selfHeal: true
         retry:
-          limit: 50
+          limit: 20
     rag-llm:
       name: rag-llm
       namespace: rag-llm
@@ -123,57 +92,28 @@ clusterGroup:
       project: llm-monitoring
       kustomize: true
       path: charts/all/llm-monitoring/kustomize/overlays/dev
-
     nfd-config:
       name: nfd-config
       namespace: openshift-cfd
       project: gpu-config
       path: charts/all/nfd-config
-
     nvidia-config:
       name: nvidia-config
       namespace: nvidia-network-operator
       project: gpu-config
       path: charts/all/nvidia-gpu-config
-
     llm-ui-config:
       name: rag-llm-ui-config
       namespace: rag-llm
       project: gpu-config
       path: charts/all/rag-llm-ui-config
-
     llm-monitoring-config:
       name: grafana-ui-config
       namespace: llm-monitoring
       project: gpu-config
       path: charts/all/llm-monitoring-config
-
     openshift-ai:
       name: openshift-ai
       namespace: redhat-ods-operator
       project: openshift-ai
       path: charts/all/rhods
-
-  imperative:
-    # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm
-    # The default schedule is every 10 minutes: imperative.schedule
-    # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds
-    # imagePullPolicy is set to always: imperative.imagePullPolicy
-    # For additional overrides that apply to the jobs, please refer to
-    # https://hybrid-cloud-patterns.io/imperative-actions/#additional-job-customizations
-    jobs:
-      - name: hello-world
-        # ansible playbook to be run
-        playbook: rhvp.cluster_utils.hello_world
-        # per playbook timeout in seconds
-        timeout: 234
-        # verbosity: "-v"
-  managedClusterGroups:
-    exampleRegion:
-      name: group-one
-      acmlabels:
-        - name: clusterGroup
-          value: group-one
-      helmOverrides:
-        - name: clusterGroup.isHubCluster
-          value: false
diff --git a/values-rag-llm-gitops.yaml b/values-rag-llm-gitops.yaml
deleted file mode 100644
index e69de29b..00000000
diff --git a/values-secret.yaml.template b/values-secret.yaml.template
index b9e9d6d5..cc2ac68a 100644
--- a/values-secret.yaml.template
+++ b/values-secret.yaml.template
@@ -15,12 +15,21 @@ secrets:
     fields:
     - name: hftoken
       value: null
-    - name: modelId
-      value: "ibm-granite/granite-3.1-8b-instruct"
-  - name: minio
+  - name: mssql
     fields:
-    - name: MINIO_ROOT_USER
-      value: minio
-    - name: MINIO_ROOT_PASSWORD
+    - name: sa-pass
       value: null
       onMissingValue: generate
+      description: mssql password for sa user
+  - name: azuresql
+    fields:
+    - name: user
+      value: adminuser
+      description: server admin user for azure sql
+    - name: password
+      value: null
+      onMissingValue: generate
+      description: server admin password for azure sql
+    - name: server
+      value: rag-llm-gitops.database.windows.net
+      description: server hostname