From e8cf465345821fc21939e9e5b6335c0a725aca6f Mon Sep 17 00:00:00 2001 From: Drew Minnear Date: Wed, 12 Mar 2025 18:39:47 -0400 Subject: [PATCH] add MS SQL Server as a DB Provider for RAG backend --- Makefile | 14 +- .../create-gpu-machineset-azure.yaml | 65 +++ ...ne-set.yaml => create-gpu-machineset.yaml} | 2 +- .../templates/gpu-machineset-azure.j2 | 44 ++ ...{gpu-machine-sets.j2 => gpu-machineset.j2} | 2 +- charts/all/llm-serving-service/Chart.yaml | 24 -- .../templates/_helpers.tpl | 62 --- .../templates/download-model.yaml | 55 --- .../templates/inference-service.yaml | 37 -- .../templates/model-pvc.yaml | 11 - .../templates/serving-runtime.yaml | 68 --- charts/all/llm-serving-service/values.yaml | 2 - charts/all/minio/.helmignore | 23 - .../all/minio/templates/external-secret.yaml | 13 - charts/all/minio/templates/setup-minio.yaml | 397 ------------------ charts/all/minio/values.yaml | 6 - .../templates/node-feature-discovery.yaml | 4 +- .../nvidia-cluster-policy-config.yaml | 2 - .../all/rag-llm/charts/azure-sql/Chart.yaml | 6 + .../azure-sql/templates/external-secret.yaml | 36 ++ .../all/rag-llm/charts/azure-sql/values.yaml | 7 + .../elastic/templates/elasticsearch.yaml | 2 +- .../charts/mssql}/Chart.yaml | 7 +- .../charts/mssql}/templates/_helpers.tpl | 28 +- .../charts/mssql/templates/deployment.yaml | 76 ++++ .../mssql/templates/external-secret.yaml | 31 ++ .../charts/mssql/templates/mssqlconfig.yaml | 25 ++ .../rag-llm/charts/mssql/templates/pvc.yaml | 53 +++ .../charts/mssql/templates/service.yaml | 16 + charts/all/rag-llm/charts/mssql/values.yaml | 52 +++ charts/all/rag-llm/files/config.yaml | 8 +- charts/all/rag-llm/templates/deployment.yaml | 40 +- .../templates/populate-vectordb-job.yaml | 48 ++- charts/all/rag-llm/values.yaml | 64 +-- charts/all/rhods/Chart.yaml | 2 +- charts/all/rhods/templates/_helpers.tpl | 20 +- charts/all/rhods/templates/dsc.yaml | 3 +- charts/all/tgis-server/.helmignore | 23 - charts/all/tgis-server/Chart.yaml | 24 -- .../all/tgis-server/templates/deployment.yaml | 99 ----- charts/all/tgis-server/templates/hpa.yaml | 32 -- charts/all/tgis-server/templates/pvc.yaml | 28 -- charts/all/tgis-server/templates/service.yaml | 20 - .../tgis-server/templates/serviceaccount.yaml | 13 - charts/all/tgis-server/values.yaml | 130 ------ .../.helmignore | 0 charts/all/vllm-inference-service/Chart.yaml | 6 + .../templates/_helpers.tpl | 20 +- .../templates/accelerator-profile.yaml | 6 +- .../templates/inference-service.yaml | 60 +++ .../templates/route.yaml | 21 + .../templates/serving-runtime.yaml | 56 +++ charts/all/vllm-inference-service/values.yaml | 61 +++ charts/region/.keep | 0 overrides/values-AWS.yaml | 26 -- overrides/values-Azure.yaml | 17 + overrides/values-IBMCloud.yaml | 10 - values-global.yaml | 15 +- values-group-one.yaml | 106 ----- values-hub.yaml | 82 +--- values-rag-llm-gitops.yaml | 0 values-secret.yaml.template | 21 +- 62 files changed, 849 insertions(+), 1382 deletions(-) create mode 100644 ansible/playbooks/create-gpu-machineset-azure.yaml rename ansible/playbooks/{create-gpu-machine-set.yaml => create-gpu-machineset.yaml} (98%) create mode 100644 ansible/playbooks/templates/gpu-machineset-azure.j2 rename ansible/playbooks/templates/{gpu-machine-sets.j2 => gpu-machineset.j2} (99%) delete mode 100644 charts/all/llm-serving-service/Chart.yaml delete mode 100644 charts/all/llm-serving-service/templates/_helpers.tpl delete mode 100644 charts/all/llm-serving-service/templates/download-model.yaml delete mode 100644 charts/all/llm-serving-service/templates/inference-service.yaml delete mode 100644 charts/all/llm-serving-service/templates/model-pvc.yaml delete mode 100644 charts/all/llm-serving-service/templates/serving-runtime.yaml delete mode 100644 charts/all/llm-serving-service/values.yaml delete mode 100644 charts/all/minio/.helmignore delete mode 100644 charts/all/minio/templates/external-secret.yaml delete mode 100644 charts/all/minio/templates/setup-minio.yaml delete mode 100644 charts/all/minio/values.yaml create mode 100644 charts/all/rag-llm/charts/azure-sql/Chart.yaml create mode 100644 charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml create mode 100644 charts/all/rag-llm/charts/azure-sql/values.yaml rename charts/all/{minio => rag-llm/charts/mssql}/Chart.yaml (92%) rename charts/all/{minio => rag-llm/charts/mssql}/templates/_helpers.tpl (72%) create mode 100644 charts/all/rag-llm/charts/mssql/templates/deployment.yaml create mode 100644 charts/all/rag-llm/charts/mssql/templates/external-secret.yaml create mode 100644 charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml create mode 100644 charts/all/rag-llm/charts/mssql/templates/pvc.yaml create mode 100644 charts/all/rag-llm/charts/mssql/templates/service.yaml create mode 100644 charts/all/rag-llm/charts/mssql/values.yaml delete mode 100644 charts/all/tgis-server/.helmignore delete mode 100644 charts/all/tgis-server/Chart.yaml delete mode 100644 charts/all/tgis-server/templates/deployment.yaml delete mode 100644 charts/all/tgis-server/templates/hpa.yaml delete mode 100644 charts/all/tgis-server/templates/pvc.yaml delete mode 100644 charts/all/tgis-server/templates/service.yaml delete mode 100644 charts/all/tgis-server/templates/serviceaccount.yaml delete mode 100644 charts/all/tgis-server/values.yaml rename charts/all/{llm-serving-service => vllm-inference-service}/.helmignore (100%) create mode 100644 charts/all/vllm-inference-service/Chart.yaml rename charts/all/{tgis-server => vllm-inference-service}/templates/_helpers.tpl (70%) rename charts/all/{llm-serving-service => vllm-inference-service}/templates/accelerator-profile.yaml (63%) create mode 100644 charts/all/vllm-inference-service/templates/inference-service.yaml create mode 100644 charts/all/vllm-inference-service/templates/route.yaml create mode 100644 charts/all/vllm-inference-service/templates/serving-runtime.yaml create mode 100644 charts/all/vllm-inference-service/values.yaml delete mode 100644 charts/region/.keep delete mode 100644 overrides/values-AWS.yaml create mode 100644 overrides/values-Azure.yaml delete mode 100644 overrides/values-IBMCloud.yaml delete mode 100644 values-group-one.yaml delete mode 100644 values-rag-llm-gitops.yaml diff --git a/Makefile b/Makefile index 61426ed5..52850dab 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,8 @@ +# Azure gpu vars +GPU_VM_SIZE ?= Standard_NC8as_T4_v3 +GPU_REPLICAS ?= 1 +OVERRIDE_ZONE ?= + .PHONY: default default: help @@ -16,8 +21,13 @@ install: operator-deploy post-install ## installs the pattern and loads the secr @echo "Installed" .PHONY: create-gpu-machineset -create-gpu-machineset: ## Creates a gpu machineset - ansible-playbook ansible/playbooks/create-gpu-machine-set.yaml +create-gpu-machineset: ## Creates a gpu machineset for AWS + ansible-playbook ansible/playbooks/create-gpu-machineset.yaml + +.PHONY: create-gpu-machineset-azure +create-gpu-machineset-azure: ## Creates an Azure GPU machineset (overrides: GPU_VM_SIZE, GPU_REPLICAS, OVERRIDE_ZONE) + ansible-playbook ansible/playbooks/create-gpu-machineset-azure.yaml \ + -e "gpu_vm_size=$(GPU_VM_SIZE) gpu_replicas=$(GPU_REPLICAS) override_zone=$(OVERRIDE_ZONE)" .PHONY: post-install post-install: ## Post-install tasks diff --git a/ansible/playbooks/create-gpu-machineset-azure.yaml b/ansible/playbooks/create-gpu-machineset-azure.yaml new file mode 100644 index 00000000..5a96896d --- /dev/null +++ b/ansible/playbooks/create-gpu-machineset-azure.yaml @@ -0,0 +1,65 @@ +- name: Generate GPU MachineSet for Azure-based clusters + hosts: localhost + connection: local + gather_facts: false + vars: + gpu_vm_size: Standard_NC8as_T4_v3 + gpu_replicas: 1 + override_zone: "" + namespace: openshift-machine-api + tasks: + - name: Get cluster infrastructure object + kubernetes.core.k8s_info: + api_version: config.openshift.io/v1 + kind: Infrastructure + name: cluster + register: infra_info + + - name: Save cluster ID + set_fact: + cluster_id: "{{ infra_info.resources[0].status.infrastructureName }}" + + - name: Gather all MachineSets + kubernetes.core.k8s_info: + api_version: machine.openshift.io/v1beta1 + kind: MachineSet + namespace: "{{ namespace }}" + register: ms_list + + - name: Pick the first *worker* MachineSet as a template + set_fact: + base_ms: "{{ item }}" + loop: "{{ ms_list.resources | sort(attribute='metadata.name') }}" + when: + - "'worker' in (item.metadata.labels['machine.openshift.io/cluster-api-machine-role'] | default(''))" + run_once: true + + - name: Extract provider-specific details from the base MachineSet + set_fact: + azure_location: "{{ base_ms.spec.template.spec.providerSpec.value.location }}" + base_zone: "{{ base_ms.spec.template.spec.providerSpec.value.zone }}" + resource_group: "{{ base_ms.spec.template.spec.providerSpec.value.resourceGroup }}" + network_resource_group: "{{ base_ms.spec.template.spec.providerSpec.value.networkResourceGroup }}" + vnet: "{{ base_ms.spec.template.spec.providerSpec.value.vnet }}" + subnet: "{{ base_ms.spec.template.spec.providerSpec.value.subnet }}" + image: "{{ base_ms.spec.template.spec.providerSpec.value.image }}" + os_disk: "{{ base_ms.spec.template.spec.providerSpec.value.osDisk }}" + user_data: "{{ base_ms.spec.template.spec.providerSpec.value.userDataSecret }}" + cred_secret: "{{ base_ms.spec.template.spec.providerSpec.value.credentialsSecret }}" + public_ip: "{{ base_ms.spec.template.spec.providerSpec.value.publicIP | default(false) }}" + + - name: Decide which availability zone to use + set_fact: + gpu_zone: "{{ (override_zone | trim) | default(base_zone, true) }}" + + - name: Render GPU MachineSet manifest + template: + src: templates/gpu-machineset-azure.j2 + dest: /tmp/gpu-machineset-azure.yaml + vars: + ms_name: "nvidia-worker-{{ azure_location | replace(' ', '') }}{{ gpu_zone }}" + + - name: Apply the GPU MachineSet + kubernetes.core.k8s: + state: present + src: /tmp/gpu-machineset-azure.yaml diff --git a/ansible/playbooks/create-gpu-machine-set.yaml b/ansible/playbooks/create-gpu-machineset.yaml similarity index 98% rename from ansible/playbooks/create-gpu-machine-set.yaml rename to ansible/playbooks/create-gpu-machineset.yaml index 3e4d894e..62c8d437 100644 --- a/ansible/playbooks/create-gpu-machine-set.yaml +++ b/ansible/playbooks/create-gpu-machineset.yaml @@ -72,7 +72,7 @@ - name: "[create-gpu-machine-set] Generate machineset" ansible.builtin.template: - src: templates/gpu-machine-sets.j2 + src: templates/gpu-machineset.j2 dest: /tmp/gpu-machineset.yaml - name: "[create-gpu-machine-set] Apply machineset to cluster {{ clusterId }}" diff --git a/ansible/playbooks/templates/gpu-machineset-azure.j2 b/ansible/playbooks/templates/gpu-machineset-azure.j2 new file mode 100644 index 00000000..15cfe224 --- /dev/null +++ b/ansible/playbooks/templates/gpu-machineset-azure.j2 @@ -0,0 +1,44 @@ +apiVersion: machine.openshift.io/v1beta1 +kind: MachineSet +metadata: + name: {{ ms_name }} + namespace: openshift-machine-api + labels: + machine.openshift.io/cluster-api-cluster: {{ cluster_id }} +spec: + replicas: {{ gpu_replicas }} + selector: + matchLabels: + machine.openshift.io/cluster-api-cluster: {{ cluster_id }} + machine.openshift.io/cluster-api-machineset: {{ ms_name }} + template: + metadata: + labels: + machine.openshift.io/cluster-api-cluster: {{ cluster_id }} + machine.openshift.io/cluster-api-machine-role: worker + machine.openshift.io/cluster-api-machine-type: worker + machine.openshift.io/cluster-api-machineset: {{ ms_name }} + spec: + taints: + - key: odh-notebook + value: "true" + effect: NoSchedule + metadata: + labels: + node-role.kubernetes.io/odh-notebook: '' + providerSpec: + value: + apiVersion: machine.openshift.io/v1beta1 + kind: AzureMachineProviderSpec + credentialsSecret: {{ cred_secret | to_json }} + location: {{ azure_location | to_json }} + zone: {{ gpu_zone | to_json }} + resourceGroup: {{ resource_group | to_json }} + networkResourceGroup: {{ network_resource_group | to_json }} + vnet: {{ vnet | to_json }} + subnet: {{ subnet | to_json }} + vmSize: {{ gpu_vm_size | to_json }} + image: {{ image | to_json }} + osDisk: {{ os_disk | to_json }} + publicIP: {{ public_ip | to_json }} + userDataSecret: {{ user_data | to_json }} diff --git a/ansible/playbooks/templates/gpu-machine-sets.j2 b/ansible/playbooks/templates/gpu-machineset.j2 similarity index 99% rename from ansible/playbooks/templates/gpu-machine-sets.j2 rename to ansible/playbooks/templates/gpu-machineset.j2 index 00ef4063..801ad2ea 100644 --- a/ansible/playbooks/templates/gpu-machine-sets.j2 +++ b/ansible/playbooks/templates/gpu-machineset.j2 @@ -11,7 +11,7 @@ metadata: name: {{ clusterId }}-gpu-{{ cloudRegion }} namespace: openshift-machine-api spec: - replicas: 3 + replicas: 1 selector: matchLabels: machine.openshift.io/cluster-api-cluster: {{ clusterId }} diff --git a/charts/all/llm-serving-service/Chart.yaml b/charts/all/llm-serving-service/Chart.yaml deleted file mode 100644 index 2cd9a568..00000000 --- a/charts/all/llm-serving-service/Chart.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: v2 -name: llm-service -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" diff --git a/charts/all/llm-serving-service/templates/_helpers.tpl b/charts/all/llm-serving-service/templates/_helpers.tpl deleted file mode 100644 index f415bbf2..00000000 --- a/charts/all/llm-serving-service/templates/_helpers.tpl +++ /dev/null @@ -1,62 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "minio.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "minio.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "minio.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "minio.labels" -}} -helm.sh/chart: {{ include "minio.chart" . }} -{{ include "minio.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "minio.selectorLabels" -}} -app.kubernetes.io/name: {{ include "minio.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "minio.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "minio.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} diff --git a/charts/all/llm-serving-service/templates/download-model.yaml b/charts/all/llm-serving-service/templates/download-model.yaml deleted file mode 100644 index 2c36351f..00000000 --- a/charts/all/llm-serving-service/templates/download-model.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: download-model -spec: - selector: {} - template: - spec: - containers: - - args: - - -ec - - |- - pip install huggingface_hub; - export HF_HOME=/tmp/cache/ - cat << 'EOF' | python3 - from huggingface_hub import snapshot_download - from pathlib import Path - from huggingface_hub import login - import subprocess, os - - # Get the environment variable 'hftoken' - hf_token = os.getenv('hftoken') - # Get model id - modelid = os.getenv('modelId') - model_id = modelid.split('/')[-1] - - def run_command(command): - """Run a shell command and check for errors.""" - result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True) - print(result.stdout) - if result.stderr: - print(result.stderr) - - if hf_token is not None and hf_token.strip() != "None": - print("hftoken is set.") - login(token=hf_token) - mistral_models_path = "/cache/models" - snapshot_download(repo_id=modelid, local_dir=mistral_models_path) - EOF - command: - - /bin/bash - envFrom: - - secretRef: - name: huggingface-secret - image: registry.access.redhat.com/ubi9/python-39 - imagePullPolicy: IfNotPresent - name: download-model - volumeMounts: - - mountPath: /cache/models - name: models - volumes: - - name: models - persistentVolumeClaim: - claimName: model-pvc - restartPolicy: OnFailure diff --git a/charts/all/llm-serving-service/templates/inference-service.yaml b/charts/all/llm-serving-service/templates/inference-service.yaml deleted file mode 100644 index 49490152..00000000 --- a/charts/all/llm-serving-service/templates/inference-service.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - annotations: - openshift.io/display-name: ibm-granite-instruct - serving.knative.openshift.io/enablePassthrough: 'true' - sidecar.istio.io/inject: 'true' - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - name: ibm-granite-instruct - namespace: rag-llm - labels: - opendatahub.io/dashboard: 'true' -spec: - predictor: - annotations: - serving.knative.dev/progress-deadline: 30m - maxReplicas: 1 - minReplicas: 1 - model: - modelFormat: - name: vLLM - name: '' - resources: - limits: - cpu: '8' - memory: 10Gi - nvidia.com/gpu: '1' - requests: - cpu: '2' - memory: 8Gi - nvidia.com/gpu: '1' - runtime: ibm-granite-instruct - restartPolicy: OnFailure - tolerations: - - effect: NoSchedule - key: odh-notebook - operator: Exists \ No newline at end of file diff --git a/charts/all/llm-serving-service/templates/model-pvc.yaml b/charts/all/llm-serving-service/templates/model-pvc.yaml deleted file mode 100644 index 34c31812..00000000 --- a/charts/all/llm-serving-service/templates/model-pvc.yaml +++ /dev/null @@ -1,11 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: model-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi - volumeMode: Filesystem \ No newline at end of file diff --git a/charts/all/llm-serving-service/templates/serving-runtime.yaml b/charts/all/llm-serving-service/templates/serving-runtime.yaml deleted file mode 100644 index 9ab06125..00000000 --- a/charts/all/llm-serving-service/templates/serving-runtime.yaml +++ /dev/null @@ -1,68 +0,0 @@ -apiVersion: serving.kserve.io/v1alpha1 -kind: ServingRuntime -metadata: - annotations: - opendatahub.io/accelerator-name: nvidia-gpu - opendatahub.io/apiProtocol: REST - opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' - openshift.io/display-name: ibm-granite-instruct - name: ibm-granite-instruct - namespace: rag-llm - labels: - opendatahub.io/dashboard: 'true' -spec: - annotations: - prometheus.io/path: /metrics - prometheus.io/port: '8080' - containers: - - args: - - '--port=8080' - - '--model=/cache/models' - - '--distributed-executor-backend=mp' - - '--served-model-name=ibm-granite-instruct' - - '--max-model-len=4096' - - '--dtype=half' - - '--gpu-memory-utilization' - - '0.98' - - '--enforce-eager' - command: - - python - - '-m' - - vllm.entrypoints.openai.api_server - env: - - name: HF_HOME - value: /cache - - name: HF_TOKEN - valueFrom: - secretKeyRef: - key: hftoken - name: huggingface-secret - - name: MODEL_ID - valueFrom: - secretKeyRef: - key: modelId - name: huggingface-secret - - name: HF_HUB_OFFLINE - value: '0' - image: 'quay.io/modh/vllm@sha256:c86ff1e89c86bc9821b75d7f2bbc170b3c13e3ccf538bf543b1110f23e056316' - name: kserve-container - ports: - - containerPort: 8080 - protocol: TCP - volumeMounts: - - mountPath: /dev/shm - name: shm - - mountPath: /cache/models - name: models - multiModel: false - supportedModelFormats: - - autoSelect: true - name: vLLM - volumes: - - emptyDir: - medium: Memory - sizeLimit: 2Gi - name: shm - - name: models - persistentVolumeClaim: - claimName: model-pvc diff --git a/charts/all/llm-serving-service/values.yaml b/charts/all/llm-serving-service/values.yaml deleted file mode 100644 index 3a23d362..00000000 --- a/charts/all/llm-serving-service/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -hfmodel: - key: secret/data/hub/hfmodel diff --git a/charts/all/minio/.helmignore b/charts/all/minio/.helmignore deleted file mode 100644 index 0e8a0eb3..00000000 --- a/charts/all/minio/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/charts/all/minio/templates/external-secret.yaml b/charts/all/minio/templates/external-secret.yaml deleted file mode 100644 index 0317f063..00000000 --- a/charts/all/minio/templates/external-secret.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: "external-secrets.io/v1beta1" -kind: ExternalSecret -metadata: - name: minio-secret - namespace: rag-llm -spec: - refreshInterval: 15s - secretStoreRef: - name: {{ .Values.secretStore.name }} - kind: {{ .Values.secretStore.kind }} - dataFrom: - - extract: - key: {{ .Values.minio.key }} \ No newline at end of file diff --git a/charts/all/minio/templates/setup-minio.yaml b/charts/all/minio/templates/setup-minio.yaml deleted file mode 100644 index c54c8331..00000000 --- a/charts/all/minio/templates/setup-minio.yaml +++ /dev/null @@ -1,397 +0,0 @@ ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: demo-setup ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: demo-setup-cluster-reader -subjects: -- kind: ServiceAccount - name: demo-setup - namespace: rag-llm -roleRef: - kind: ClusterRole - name: cluster-reader - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: demo-setup-edit -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: edit -subjects: - - kind: ServiceAccount - name: demo-setup ---- -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: minio-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi - volumeMode: Filesystem ---- -kind: Deployment -apiVersion: apps/v1 -metadata: - name: minio -spec: - replicas: 1 - selector: - matchLabels: - app: minio - template: - metadata: - creationTimestamp: null - labels: - app: minio - spec: - volumes: - - name: data - persistentVolumeClaim: - claimName: minio-pvc - containers: - - resources: - limits: - cpu: 250m - memory: 1Gi - requests: - cpu: 20m - memory: 100Mi - readinessProbe: - tcpSocket: - port: 9000 - initialDelaySeconds: 5 - timeoutSeconds: 1 - periodSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - terminationMessagePath: /dev/termination-log - name: minio - livenessProbe: - tcpSocket: - port: 9000 - initialDelaySeconds: 30 - timeoutSeconds: 1 - periodSeconds: 5 - successThreshold: 1 - failureThreshold: 3 - env: - - name: MINIO_ROOT_USER - valueFrom: - secretKeyRef: - name: minio-secret - key: MINIO_ROOT_USER - - name: MINIO_ROOT_PASSWORD - valueFrom: - secretKeyRef: - name: minio-secret - key: MINIO_ROOT_PASSWORD - ports: - - containerPort: 9000 - protocol: TCP - - containerPort: 9090 - protocol: TCP - imagePullPolicy: IfNotPresent - volumeMounts: - - name: data - mountPath: /data - subPath: minio - terminationMessagePolicy: File - image: >- - quay.io/minio/minio:latest - args: - - server - - /data - - --console-address - - :9090 - restartPolicy: Always - terminationGracePeriodSeconds: 30 - dnsPolicy: ClusterFirst - securityContext: {} - schedulerName: default-scheduler - strategy: - type: Recreate - revisionHistoryLimit: 10 - progressDeadlineSeconds: 600 ---- -kind: Service -apiVersion: v1 -metadata: - name: minio-service -spec: - ipFamilies: - - IPv4 - ports: - - name: api - protocol: TCP - port: 9000 - targetPort: 9000 - - name: ui - protocol: TCP - port: 9090 - targetPort: 9090 - internalTrafficPolicy: Cluster - type: ClusterIP - ipFamilyPolicy: SingleStack - sessionAffinity: None - selector: - app: minio ---- -kind: Route -apiVersion: route.openshift.io/v1 -metadata: - name: minio-api -spec: - to: - kind: Service - name: minio-service - weight: 100 - port: - targetPort: api - wildcardPolicy: None - tls: - termination: edge - insecureEdgeTerminationPolicy: Redirect ---- -kind: Route -apiVersion: route.openshift.io/v1 -metadata: - name: minio-ui -spec: - to: - kind: Service - name: minio-service - weight: 100 - port: - targetPort: ui - wildcardPolicy: None - tls: - termination: edge - insecureEdgeTerminationPolicy: Redirect -# --- -# apiVersion: batch/v1 -# kind: Job -# metadata: -# labels: -# app.kubernetes.io/component: minio -# app.kubernetes.io/instance: minio -# app.kubernetes.io/name: minio -# app.kubernetes.io/part-of: minio -# component: minio -# name: create-minio-buckets -# spec: -# selector: {} -# template: -# metadata: -# labels: -# app.kubernetes.io/component: minio -# app.kubernetes.io/instance: minio -# app.kubernetes.io/name: minio -# app.kubernetes.io/part-of: minio -# component: minio -# spec: -# containers: -# - args: -# - -ec -# - |- -# env | grep MINIO -# pip install minio; -# cat << 'EOF' | python3 -# from minio import Minio -# import os -# client = Minio( -# "minio-service:9000", -# access_key=os.getenv("MINIO_ROOT_USER"), -# secret_key=os.getenv("MINIO_ROOT_PASSWORD"), -# secure=False -# ) -# bucket = 'models' - -# print('creating models bucket') -# if client.bucket_exists(bucket): -# print("bucket 'models' exists") -# else: -# client.make_bucket(bucket) -# print("bucket 'models' created successfully") -# EOF -# command: -# - /bin/bash -# envFrom: -# - secretRef: -# name: minio-secret -# image: registry.access.redhat.com/ubi8/python-38 -# imagePullPolicy: IfNotPresent -# name: create-buckets -# initContainers: -# - args: -# - -ec -# - |- -# echo -n 'Waiting for minio root user secret' -# while ! oc get secret minio-secret 2>/dev/null | grep -qF minio-secret; do -# echo -n . -# sleep 5 -# done; echo - -# echo -n 'Waiting for minio deployment' -# while ! oc get deployment minio 2>/dev/null | grep -qF minio; do -# echo -n . -# sleep 5 -# done; echo -# oc wait --for=condition=available --timeout=60s deployment/minio -# sleep 10 -# command: -# - /bin/bash -# image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest -# imagePullPolicy: IfNotPresent -# name: wait-for-minio -# restartPolicy: Never -# serviceAccountName: demo-setup -# --- -# apiVersion: batch/v1 -# kind: Job -# metadata: -# labels: -# app.kubernetes.io/component: minio -# app.kubernetes.io/instance: minio -# app.kubernetes.io/name: minio -# app.kubernetes.io/part-of: minio -# component: minio -# name: load-model-set -# spec: -# selector: {} -# template: -# metadata: -# labels: -# app.kubernetes.io/component: minio -# app.kubernetes.io/instance: minio -# app.kubernetes.io/name: minio -# app.kubernetes.io/part-of: minio -# component: minio -# spec: -# containers: -# - args: -# - -ec -# - |- -# pip install huggingface_hub boto3; -# cat << 'EOF' | python3 -# import boto3, os, botocore, subprocess -# from huggingface_hub import snapshot_download -# from pathlib import Path -# from huggingface_hub import login - -# # Get the environment variable 'hftoken' -# hf_token = os.getenv('hftoken') -# # Get model id -# modelid = os.getenv('modelId') -# model_id = modelid.split('/')[-1] - -# def run_command(command): -# """Run a shell command and check for errors.""" -# result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True) -# print(result.stdout) -# if result.stderr: -# print(result.stderr) - -# # Upload Model to bucket -# endpoint_url = "http://minio-service:9000" -# aws_access_key_id = os.getenv("MINIO_ROOT_USER") -# aws_secret_access_key = os.getenv("MINIO_ROOT_PASSWORD") -# region_name = "us" -# bucket_name = "models" - -# if not all([aws_access_key_id, aws_secret_access_key, endpoint_url, region_name, bucket_name]): -# raise ValueError("One or data connection variables are empty. " -# "Please check your data connection to an S3 bucket.") - -# session = boto3.session.Session(aws_access_key_id=aws_access_key_id, -# aws_secret_access_key=aws_secret_access_key) - -# s3_resource = session.resource( -# 's3', -# config=botocore.client.Config(signature_version='s3v4'), -# endpoint_url=endpoint_url, -# region_name=region_name) - -# bucket = s3_resource.Bucket(bucket_name) - -# def upload_directory_to_s3(local_directory, s3_prefix): -# num_files = 0 -# for root, dirs, files in os.walk(local_directory): -# for filename in files: -# file_path = os.path.join(root, filename) -# relative_path = os.path.relpath(file_path, local_directory) -# s3_key = os.path.join(s3_prefix, relative_path) -# print(f"{file_path} -> {s3_key}") -# bucket.upload_file(file_path, s3_key) -# num_files += 1 -# return num_files - - -# def list_objects(prefix): -# filter = bucket.objects.filter(Prefix=prefix) -# for obj in filter.all(): -# print(obj.key) - -# if hf_token is not None and hf_token.strip() != "None": -# # If 'hftoken' is not None and not empty, execute this code -# print("hftoken is set.") -# login(token=hf_token) -# mistral_models_path = Path.home().joinpath(model_id) -# mistral_models_path.mkdir(parents=True, exist_ok=True) -# snapshot_download(repo_id=modelid, local_dir=mistral_models_path) - -# list_objects("models") -# print(model_id) -# num_files = upload_directory_to_s3(model_id, "llm-models/" + model_id) - -# if num_files == 0: -# raise ValueError("No files uploaded.") - -# list_objects("models") - -# EOF -# command: -# - /bin/bash -# envFrom: -# - secretRef: -# name: minio-secret -# - secretRef: -# name: huggingface-secret -# image: registry.access.redhat.com/ubi8/python-38 -# imagePullPolicy: IfNotPresent -# name: download-model -# initContainers: -# - args: -# - -ec -# - |- -# echo -n 'Waiting for minio root user secret' -# while ! oc get secret minio-secret 2>/dev/null | grep -qF minio-secret; do -# echo -n . -# sleep 5 -# done; echo - -# echo -n 'Waiting for minio deployment' -# while ! oc get deployment minio 2>/dev/null | grep -qF minio; do -# echo -n . -# sleep 5 -# done; echo -# oc wait --for=condition=available --timeout=600s deployment/minio -# sleep 10 -# command: -# - /bin/bash -# image: image-registry.openshift-image-registry.svc:5000/openshift/tools:latest -# imagePullPolicy: IfNotPresent -# name: wait-for-minio -# restartPolicy: Never -# serviceAccountName: demo-setup diff --git a/charts/all/minio/values.yaml b/charts/all/minio/values.yaml deleted file mode 100644 index 6473bd1b..00000000 --- a/charts/all/minio/values.yaml +++ /dev/null @@ -1,6 +0,0 @@ -secretStore: - name: vault-backend - kind: ClusterSecretStore - -minio: - key: secret/data/hub/minio \ No newline at end of file diff --git a/charts/all/nfd-config/templates/node-feature-discovery.yaml b/charts/all/nfd-config/templates/node-feature-discovery.yaml index 484fbad1..de543668 100644 --- a/charts/all/nfd-config/templates/node-feature-discovery.yaml +++ b/charts/all/nfd-config/templates/node-feature-discovery.yaml @@ -6,7 +6,7 @@ metadata: spec: operand: image: >- - registry.redhat.io/openshift4/ose-node-feature-discovery-rhel9:v{{ .Values.global.clusterVersion }} + registry.redhat.io/openshift4/ose-node-feature-discovery{{ ternary "-rhel9" "" (semverCompare ">=4.15.0" .Values.global.clusterVersion) }}:v{{ .Values.global.clusterVersion }} servicePort: 12000 workerConfig: configData: | @@ -24,7 +24,7 @@ spec: # stderrthreshold: 2 # v: 0 # vmodule: - ## NOTE: the following options are not dynamically run-time + ## NOTE: the following options are not dynamically run-time ## configurable and require a nfd-worker restart to take effect ## after being changed # logDir: diff --git a/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml b/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml index 61ad7972..98c91233 100644 --- a/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml +++ b/charts/all/nvidia-gpu-config/templates/nvidia-cluster-policy-config.yaml @@ -32,8 +32,6 @@ spec: licensingConfig: configMapName: '' nlsEnabled: false - repoConfig: - configMapName: '' upgradePolicy: autoUpgrade: true drain: diff --git a/charts/all/rag-llm/charts/azure-sql/Chart.yaml b/charts/all/rag-llm/charts/azure-sql/Chart.yaml new file mode 100644 index 00000000..331fe1e3 --- /dev/null +++ b/charts/all/rag-llm/charts/azure-sql/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: azuresql +description: Helpers for using Azure SQL server as a RAG DB backend +type: application +version: 0.1.0 +appVersion: 0.1.0 diff --git a/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml b/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml new file mode 100644 index 00000000..da4d6d56 --- /dev/null +++ b/charts/all/rag-llm/charts/azure-sql/templates/external-secret.yaml @@ -0,0 +1,36 @@ +{{- if eq .Values.global.db.type "AZURESQL" }} +apiVersion: "external-secrets.io/v1beta1" +kind: ExternalSecret +metadata: + name: azuresql-external-secret +spec: + refreshInterval: 15s + secretStoreRef: + name: {{ .Values.secretStore.name }} + kind: {{ .Values.secretStore.kind }} + target: + name: azuresql-secret + template: + type: Opaque + engineVersion: v2 + data: + CONNECTION_STRING: > + Driver={{ printf "{%s}" .Values.driver }}; + Server={{ "{{ .server }}" }},1433; + Database={{ .Values.databaseName }}; + UID={{ "{{ .user }}" }}; + PWD={{ "{{ .password }}" }}; + data: + - secretKey: user + remoteRef: + key: {{ .Values.secretStore.key }} + property: "user" + - secretKey: password + remoteRef: + key: {{ .Values.secretStore.key }} + property: "password" + - secretKey: server + remoteRef: + key: {{ .Values.secretStore.key }} + property: "server" +{{- end }} diff --git a/charts/all/rag-llm/charts/azure-sql/values.yaml b/charts/all/rag-llm/charts/azure-sql/values.yaml new file mode 100644 index 00000000..1717686a --- /dev/null +++ b/charts/all/rag-llm/charts/azure-sql/values.yaml @@ -0,0 +1,7 @@ +secretStore: + name: vault-backend + kind: ClusterSecretStore + key: secret/data/hub/azuresql + +driver: "ODBC Driver 18 for SQL Server" +databaseName: embeddings diff --git a/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml b/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml index 80b9bd12..f20b3f7d 100644 --- a/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml +++ b/charts/all/rag-llm/charts/elastic/templates/elasticsearch.yaml @@ -12,7 +12,7 @@ spec: nodeSets: - config: node.store.allow_mmap: false - count: 1 + count: 3 name: default podTemplate: metadata: diff --git a/charts/all/minio/Chart.yaml b/charts/all/rag-llm/charts/mssql/Chart.yaml similarity index 92% rename from charts/all/minio/Chart.yaml rename to charts/all/rag-llm/charts/mssql/Chart.yaml index 50fbb849..73d66be1 100644 --- a/charts/all/minio/Chart.yaml +++ b/charts/all/rag-llm/charts/mssql/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: minio +name: mssql description: A Helm chart for Kubernetes # A chart can be either an 'application' or a 'library' chart. @@ -15,10 +15,9 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" +appVersion: 1.16.0 diff --git a/charts/all/minio/templates/_helpers.tpl b/charts/all/rag-llm/charts/mssql/templates/_helpers.tpl similarity index 72% rename from charts/all/minio/templates/_helpers.tpl rename to charts/all/rag-llm/charts/mssql/templates/_helpers.tpl index f415bbf2..5420b1ae 100644 --- a/charts/all/minio/templates/_helpers.tpl +++ b/charts/all/rag-llm/charts/mssql/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "minio.name" -}} +{{- define "mssql.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "minio.fullname" -}} +{{- define "mssql.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "minio.chart" -}} +{{- define "mssql.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "minio.labels" -}} -helm.sh/chart: {{ include "minio.chart" . }} -{{ include "minio.selectorLabels" . }} +{{- define "mssql.labels" -}} +helm.sh/chart: {{ include "mssql.chart" . }} +{{ include "mssql.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -45,18 +45,26 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Selector labels */}} -{{- define "minio.selectorLabels" -}} -app.kubernetes.io/name: {{ include "minio.name" . }} +{{- define "mssql.selectorLabels" -}} +app.kubernetes.io/name: {{ include "mssql.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "minio.serviceAccountName" -}} +{{- define "mssql.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "minio.fullname" .) .Values.serviceAccount.name }} +{{- default (include "mssql.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} {{- end }} + + +{{/* +Create the name for the SA password secret key. +*/}} +{{- define "mssql.sapassword" -}} + sa_password +{{- end -}} diff --git a/charts/all/rag-llm/charts/mssql/templates/deployment.yaml b/charts/all/rag-llm/charts/mssql/templates/deployment.yaml new file mode 100644 index 00000000..a62e67c0 --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/templates/deployment.yaml @@ -0,0 +1,76 @@ +{{- if eq .Values.global.db.type "MSSQL" }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mssql.fullname" . }} + labels: + {{- include "mssql.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas}} + selector: + matchLabels: + {{- include "mssql.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mssql.selectorLabels" . | nindent 8 }} + spec: + hostname: {{ .Values.hostname}} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + command: + - /bin/bash + - -c + - cp /var/opt/config/mssql.conf /var/opt/mssql/mssql.conf && /opt/mssql/bin/sqlservr + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + {{- toYaml .Values.containerSecurityContext | nindent 12 }} + ports: + - containerPort: {{ .Values.containers.ports.containerPort}} + env: + - name: MSSQL_PID + value: "{{ .Values.MSSQL_PID.value}}" + - name: ACCEPT_EULA + value: "{{ .Values.ACCEPT_EULA.value | upper}}" + - name: MSSQL_AGENT_ENABLED + value: "{{ .Values.MSSQL_AGENT_ENABLED.value}}" + - name: SA_PASSWORD + valueFrom: + secretKeyRef: + name: mssql-secret + key: SA_PASSWORD + volumeMounts: + - name: mssqldb + mountPath: /var/opt/mssql + - name: mssqluserdb + mountPath: /var/opt/mssql/userdata + - name: mssqllog + mountPath: /var/opt/mssql/userlog + - name: mssqltemp + mountPath: /var/opt/mssql/tempdb + - name: mssql-config-volume + mountPath: /var/opt/config + volumes: + - name: mssqldb + persistentVolumeClaim: + claimName: mssql-data + - name: mssqluserdb + persistentVolumeClaim: + claimName: mssql-userdb + - name: mssqllog + persistentVolumeClaim: + claimName: mssql-log + - name: mssqltemp + persistentVolumeClaim: + claimName: mssql-temp + - name: mssql-config-volume + configMap: + name: mssql-config +{{- end }} diff --git a/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml b/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml new file mode 100644 index 00000000..d67f4394 --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/templates/external-secret.yaml @@ -0,0 +1,31 @@ +{{- if eq .Values.global.db.type "MSSQL" }} +apiVersion: "external-secrets.io/v1beta1" +kind: ExternalSecret +metadata: + name: mssql-external-secret +spec: + refreshInterval: 15s + secretStoreRef: + name: {{ .Values.secretStore.name }} + kind: {{ .Values.secretStore.kind }} + target: + name: mssql-secret + template: + type: Opaque + engineVersion: v2 + data: + CONNECTION_STRING: > + Driver={{ printf "{%s}" .Values.driver }}; + Server={{ include "mssql.fullname" . }},{{ toString .Values.service.port }}; + Database={{ .Values.databaseName }}; + UID=sa; + PWD={{ "{{ .sapass }}" }}; + TrustServerCertificate=yes; + Encrypt=no; + SA_PASSWORD: "{{ `{{ .sapass }}` }}" + data: + - secretKey: sapass + remoteRef: + key: {{ .Values.secretStore.mssqlSecretKey }} + property: "sa-pass" +{{- end }} diff --git a/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml b/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml new file mode 100644 index 00000000..f890a62d --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/templates/mssqlconfig.yaml @@ -0,0 +1,25 @@ +{{- if eq .Values.global.db.type "MSSQL" }} +kind: ConfigMap +apiVersion: v1 +metadata: + name: mssql-config +data: + mssql.conf: | + [EULA] + accepteula = Y + accepteulaml = Y + + [coredump] + captureminiandfull = true + coredumptype = full + + [hadr] + hadrenabled = 1 + + [language] + lcid = 1033 + + [filelocation] + defaultdatadir = /var/opt/mssql/userdata + defaultlogdir = /var/opt/mssql/userlog +{{- end }} diff --git a/charts/all/rag-llm/charts/mssql/templates/pvc.yaml b/charts/all/rag-llm/charts/mssql/templates/pvc.yaml new file mode 100644 index 00000000..e5e62c1f --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/templates/pvc.yaml @@ -0,0 +1,53 @@ +{{- if eq .Values.global.db.type "MSSQL" }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mssql-data + annotations: + volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }} +spec: + accessModes: + - {{ .Values.pvc.mssqldataaccessMode | quote}} + resources: + requests: + storage: {{ .Values.pvc.mssqldbsize}} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mssql-userdb + annotations: + volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }} +spec: + accessModes: + - {{ .Values.pvc.userdbaccessMode | quote}} + resources: + requests: + storage: {{ .Values.pvc.userdbsize}} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mssql-log + annotations: + volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }} +spec: + accessModes: + - {{ .Values.pvc.userlogaccessMode | quote}} + resources: + requests: + storage: {{ .Values.pvc.userlogsize}} +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mssql-temp + annotations: + volume.beta.kubernetes.io/storage-class: {{ .Values.global.storageClass }} +spec: + accessModes: + - {{ .Values.pvc.tempdbaccessMode | quote}} + resources: + requests: + storage: {{ .Values.pvc.tempsize}} +{{- end }} diff --git a/charts/all/rag-llm/charts/mssql/templates/service.yaml b/charts/all/rag-llm/charts/mssql/templates/service.yaml new file mode 100644 index 00000000..6a98ab74 --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/templates/service.yaml @@ -0,0 +1,16 @@ +{{- if eq .Values.global.db.type "MSSQL" }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mssql.fullname" . }} + labels: + {{- include "mssql.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.port }} + protocol: TCP + selector: + {{- include "mssql.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/all/rag-llm/charts/mssql/values.yaml b/charts/all/rag-llm/charts/mssql/values.yaml new file mode 100644 index 00000000..6a92a526 --- /dev/null +++ b/charts/all/rag-llm/charts/mssql/values.yaml @@ -0,0 +1,52 @@ +global: + storageClass: gp3-csi + +secretStore: + name: vault-backend + kind: ClusterSecretStore + mssqlSecretKey: secret/data/hub/mssql + +replicas: 1 + +image: + repository: mcr.microsoft.com/mssql/rhel/server + tag: 2025-latest + pullPolicy: IfNotPresent + +ACCEPT_EULA: + value: "y" +MSSQL_PID: + value: "Developer" +MSSQL_AGENT_ENABLED: + value: "true" +hostname: mssqllatest +containers: + ports: + containerPort: 1433 + +podAnnotations: {} + +podSecurityContext: + fsGroupChangePolicy: OnRootMismatch + +containerSecurityContext: + capabilities: + add: + - NET_BIND_SERVICE + +service: + type: ClusterIP + port: 1433 + +pvc: + userdbaccessMode: ReadWriteOnce + userdbsize: 5Gi + userlogaccessMode: ReadWriteOnce + userlogsize: 5Gi + tempdbaccessMode: ReadWriteOnce + tempsize: 2Gi + mssqldataaccessMode: ReadWriteOnce + mssqldbsize: 2Gi + +driver: "ODBC Driver 18 for SQL Server" +databaseName: embeddings diff --git a/charts/all/rag-llm/files/config.yaml b/charts/all/rag-llm/files/config.yaml index 6a9975aa..8f50809e 100644 --- a/charts/all/rag-llm/files/config.yaml +++ b/charts/all/rag-llm/files/config.yaml @@ -2,10 +2,10 @@ llm_providers: - name: "OpenShift AI (vLLM)" enabled: True models: - - name: ibm-granite-instruct + - name: {{ (split "/" .Values.global.model.vllm)._1 }} weight: 1 enabled: True - url: https://ibm-granite-instruct-{{ .Values.llmui.namespace }}.{{ coalesce .Values.global.localClusterDomain .Values.global.hubClusterDomain }}/v1 + url: {{ printf "https://vllm-inference-service-predictor-%s.%s/v1" .Release.Namespace .Values.global.localClusterDomain }} params: - name: max_new_tokens value: 1024 @@ -22,6 +22,6 @@ llm_providers: - name: typical_p value: 0.95 default_provider: "OpenShift AI (vLLM)" -default_model: {{ .Values.global.model.modelId }} +default_model: {{ .Values.global.model.vllm }} # type values=(default, round_robin, all) -type: all \ No newline at end of file +type: all diff --git a/charts/all/rag-llm/templates/deployment.yaml b/charts/all/rag-llm/templates/deployment.yaml index 08e3b50a..1534476b 100644 --- a/charts/all/rag-llm/templates/deployment.yaml +++ b/charts/all/rag-llm/templates/deployment.yaml @@ -36,11 +36,17 @@ spec: env: - name: HUGGINGFACE_HUB_CACHE value: /tmp/.cache + {{- range .Values.volumeMounts }} + {{- if eq .name "providerconfig" }} - name: CONFIG_FILE - value: /app-root/config/config.yaml + value: {{ .mountPath }}/config.yaml + {{- end }} + {{- end }} - name: APP_TITLE value: Talk with your documentation - {{- if eq .Values.global.db.type "REDIS" }} + - name: EMBEDDING_MODEL + value: {{ .Values.global.model.embedding }} + {{- if eq .Values.global.db.type "REDIS" }} - name: DB_TYPE value: REDIS - name: REDIS_URL @@ -49,8 +55,8 @@ spec: value: {{ .Values.global.db.index }} - name: REDIS_SCHEMA value: redis_schema.yaml - {{- end }} - {{- if eq .Values.global.db.type "EDB" }} + {{- end }} + {{- if eq .Values.global.db.type "EDB" }} - name: DB_TYPE value: PGVECTOR - name: DB_USERNAME @@ -87,8 +93,8 @@ spec: value: 'postgresql+psycopg://$(DB_USERNAME):$(DB_PASS)@$(DB_HOST):$(DB_PORT)/$(DB_NAME)' - name: PGVECTOR_COLLECTION_NAME value: {{ .Values.global.db.index }} - {{- end }} - {{- if eq .Values.global.db.type "ELASTIC" }} + {{- end }} + {{- if eq .Values.global.db.type "ELASTIC" }} - name: DB_TYPE value: "ELASTIC" - name: ELASTIC_INDEX @@ -103,6 +109,28 @@ spec: name: es-vectordb-es-elastic-user key: elastic {{- end }} + {{- if eq .Values.global.db.type "MSSQL" }} + - name: DB_TYPE + value: MSSQL + - name: MSSQL_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: mssql-secret + key: CONNECTION_STRING + - name: MSSQL_TABLE + value: {{ .Values.global.db.index }} + {{- end }} + {{- if eq .Values.global.db.type "AZURESQL" }} + - name: DB_TYPE + value: MSSQL + - name: MSSQL_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: azuresql-secret + key: CONNECTION_STRING + - name: MSSQL_TABLE + value: {{ .Values.global.db.index }} + {{- end }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" diff --git a/charts/all/rag-llm/templates/populate-vectordb-job.yaml b/charts/all/rag-llm/templates/populate-vectordb-job.yaml index 989fd474..46b0813d 100644 --- a/charts/all/rag-llm/templates/populate-vectordb-job.yaml +++ b/charts/all/rag-llm/templates/populate-vectordb-job.yaml @@ -26,16 +26,20 @@ spec: imagePullPolicy: {{ .Values.populateDbJob.image.pullPolicy }} name: populate-vectordb env: - - name: TRANSFORMERS_CACHE - value: '/cache/.cache' - - name: SENTENCE_TRANSFORMERS_HOME - value: '/cache/.cache' - - name: DOC_GIT_REPO - value: {{ .Values.populateDbJob.doc_git_repo | quote }} - - name: DOC_LOCATION - value: {{ .Values.populateDbJob.doc_location | quote }} - name: TEMP_DIR - value: {{ .Values.populateDbJob.doc_dir | quote }} + value: {{ .Values.populateDbJob.tempDir }} + - name: LOG_LEVEL + value: {{ .Values.populateDbJob.logLevel }} + - name: REPO_SOURCES + value: {{ .Values.populateDbJob.repoSources | toJson | quote }} + - name: WEB_SOURCES + value: {{ .Values.populateDbJob.webSources | toJson | quote }} + - name: CHUNK_SIZE + value: {{ .Values.populateDbJob.chunking.size | quote }} + - name: CHUNK_OVERLAP + value: {{ .Values.populateDbJob.chunking.overlap | quote }} + - name: EMBEDDING_MODEL + value: {{ .Values.global.model.embedding }} {{- if eq .Values.global.db.type "REDIS" }} - name: DB_TYPE value: "REDIS" @@ -98,8 +102,32 @@ spec: secretKeyRef: name: es-vectordb-es-elastic-user key: elastic + {{- end }} + {{- if eq .Values.global.db.type "MSSQL" }} + - name: DB_TYPE + value: MSSQL + - name: MSSQL_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: mssql-secret + key: CONNECTION_STRING + - name: MSSQL_TABLE + value: {{ .Values.global.db.index }} + {{- end }} + {{- if eq .Values.global.db.type "AZURESQL" }} + - name: DB_TYPE + value: MSSQL + - name: MSSQL_CONNECTION_STRING + valueFrom: + secretKeyRef: + name: azuresql-secret + key: CONNECTION_STRING + - name: MSSQL_TABLE + value: {{ .Values.global.db.index }} + {{- end }} + {{- if .Values.populateDbJob.command }} + command: {{ .Values.populateDbJob.command }} {{- end }} - command: ["/usr/bin/bash", "/app/entrypoint.sh"] {{- if .Values.populateDbJob.args }} args: {{ .Values.populateDbJob.args }} {{- end }} diff --git a/charts/all/rag-llm/values.yaml b/charts/all/rag-llm/values.yaml index b8476cf6..e20629ad 100644 --- a/charts/all/rag-llm/values.yaml +++ b/charts/all/rag-llm/values.yaml @@ -1,12 +1,14 @@ - global: - localClusterDomain: example.com + localClusterDomain: apps.example.com hubClusterDomain: example.com db: index: docs type: EDB model: - modelId: ibm-granite/granite-3.1-8b-instruct + vllm: ibm-granite/granite-3.3-8b-instruct + embedding: sentence-transformers/all-mpnet-base-v2 + storageClass: gp3-csi + llmui: namespace: "rag-llm" @@ -14,10 +16,10 @@ llmui: replicaCount: 1 image: - repository: 'quay.io/ecosystem-appeng/rag-llm-ui' - pullPolicy: IfNotPresent + repository: quay.io/dminnear/gradio-tgi-multi-model-rag + pullPolicy: Always # Overrides the image tag whose default is the chart appVersion. - tag: "1.1" + tag: latest imagePullSecrets: [] nameOverride: "" @@ -69,7 +71,7 @@ resources: livenessProbe: httpGet: - path: /queue/status + path: / port: http scheme: HTTP timeoutSeconds: 8 @@ -79,7 +81,7 @@ livenessProbe: readinessProbe: httpGet: - path: /queue/status + path: / port: http scheme: HTTP timeoutSeconds: 5 @@ -89,7 +91,7 @@ readinessProbe: startupProbe: httpGet: - path: /queue/status + path: / port: http scheme: HTTP timeoutSeconds: 1 @@ -128,7 +130,7 @@ volumes: # Additional volumeMounts on the output Deployment definition. volumeMounts: - name: providerconfig - mountPath: /app-root/config + mountPath: /opt/app-root/config - name: redis-schema mountPath: /opt/app-root/src/redis_schema.yaml subPath: redis_schema.yaml @@ -161,23 +163,35 @@ route: populateDbJob: ## Job image image: - repository: "quay.io/ecosystem-appeng/embeddingjob" - tag: "0.0.4" - pullPolicy: IfNotPresent - - command: ["/usr/bin/bash", "/app/entrypoint.sh"] - #args: ["echo 'consuming a message'; sleep 5"] - - ## Define env - # env: + repository: quay.io/hybridcloudpatterns/vector-embedder + tag: latest + pullPolicy: Always ## Job configurations backoffLimit: 10 restartPolicy: Never - doc_git_repo: https://github.com/RHEcosystemAppEng/llm-on-openshift.git - doc_location: examples/notebooks/langchain/rhods-doc - doc_dir: /docs + # Environment overrides + tempDir: /docs + logLevel: info + repoSources: + - repo: https://github.com/RHEcosystemAppEng/llm-on-openshift.git + globs: + - examples/notebooks/langchain/rhods-doc/*.pdf + webSources: + - https://ai-on-openshift.io/getting-started/openshift/ + - https://ai-on-openshift.io/getting-started/opendatahub/ + - https://ai-on-openshift.io/getting-started/openshift-ai/ + - https://ai-on-openshift.io/odh-rhoai/configuration/ + - https://ai-on-openshift.io/odh-rhoai/custom-notebooks/ + - https://ai-on-openshift.io/odh-rhoai/nvidia-gpus/ + - https://ai-on-openshift.io/odh-rhoai/custom-runtime-triton/ + - https://ai-on-openshift.io/odh-rhoai/openshift-group-management/ + - https://ai-on-openshift.io/tools-and-applications/minio/minio/ + chunking: + size: 1024 + overlap: 40 + embeddingModel: sentence-transformers/all-mpnet-base-v2 # By default, fullname uses '{{ .Release.Name }}-{{ .Chart.Name }}'. This @@ -201,7 +215,7 @@ populateDbJob: # memory: 500Mi securityContext: - runAsUser: + runAsUser: runAsGroup: fsGroup: @@ -210,7 +224,7 @@ populateDbJob: name: cache-volume - mountPath: /docs/ name: doc-volume - + volumes: - name: cache-volume emptyDir: @@ -228,4 +242,4 @@ hfmodel: # Create NetworkPolicy to allow traffic from all namespaces to allow monitoring. Set to false if monitoring is not needed customnetworkpolicy: - enabled: true \ No newline at end of file + enabled: true diff --git a/charts/all/rhods/Chart.yaml b/charts/all/rhods/Chart.yaml index 50fbb849..a7834eac 100644 --- a/charts/all/rhods/Chart.yaml +++ b/charts/all/rhods/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -name: minio +name: rhods description: A Helm chart for Kubernetes # A chart can be either an 'application' or a 'library' chart. diff --git a/charts/all/rhods/templates/_helpers.tpl b/charts/all/rhods/templates/_helpers.tpl index f415bbf2..c7539c12 100644 --- a/charts/all/rhods/templates/_helpers.tpl +++ b/charts/all/rhods/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "minio.name" -}} +{{- define "rhods.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "minio.fullname" -}} +{{- define "rhods.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "minio.chart" -}} +{{- define "rhods.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "minio.labels" -}} -helm.sh/chart: {{ include "minio.chart" . }} -{{ include "minio.selectorLabels" . }} +{{- define "rhods.labels" -}} +helm.sh/chart: {{ include "rhods.chart" . }} +{{ include "rhods.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Selector labels */}} -{{- define "minio.selectorLabels" -}} -app.kubernetes.io/name: {{ include "minio.name" . }} +{{- define "rhods.selectorLabels" -}} +app.kubernetes.io/name: {{ include "rhods.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "minio.serviceAccountName" -}} +{{- define "rhods.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "minio.fullname" .) .Values.serviceAccount.name }} +{{- default (include "rhods.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} diff --git a/charts/all/rhods/templates/dsc.yaml b/charts/all/rhods/templates/dsc.yaml index af226cda..b21fb19c 100644 --- a/charts/all/rhods/templates/dsc.yaml +++ b/charts/all/rhods/templates/dsc.yaml @@ -3,7 +3,7 @@ kind: DataScienceCluster metadata: name: default-dsc annotations: - argocd.argoproj.io/sync-wave: "20" + argocd.argoproj.io/sync-wave: "10" spec: components: dashboard: @@ -28,3 +28,4 @@ spec: type: SelfSigned managementState: Managed name: knative-serving + rawDeploymentServiceConfig: Headed diff --git a/charts/all/tgis-server/.helmignore b/charts/all/tgis-server/.helmignore deleted file mode 100644 index 0e8a0eb3..00000000 --- a/charts/all/tgis-server/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/charts/all/tgis-server/Chart.yaml b/charts/all/tgis-server/Chart.yaml deleted file mode 100644 index 6fae49d1..00000000 --- a/charts/all/tgis-server/Chart.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: v2 -name: tgis-server -description: A Helm chart for Kubernetes - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -# It is recommended to use it with quotes. -appVersion: "1.16.0" diff --git a/charts/all/tgis-server/templates/deployment.yaml b/charts/all/tgis-server/templates/deployment.yaml deleted file mode 100644 index 7a0fed19..00000000 --- a/charts/all/tgis-server/templates/deployment.yaml +++ /dev/null @@ -1,99 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: hf-text-generation-inference-server-model1 - labels: - {{- include "tgis-server.labels" . | nindent 4 }} -spec: - {{- if not .Values.autoscaling.enabled }} - replicas: {{ .Values.replicaCount }} - {{- end }} - selector: - matchLabels: - {{- include "tgis-server.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "tgis-server.labels" . | nindent 8 }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - restartPolicy: Always - schedulerName: default-scheduler - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "tgis-server.serviceAccountName" . }} - terminationGracePeriodSeconds: 120 - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: {{ .Chart.Name }} - env: - - name: MODEL_ID - value: {{ .Values.env.MODEL_ID }} - - name: MAX_INPUT_LENGTH - value: {{ .Values.env.MAX_INPUT_LENGTH | quote }} - - name: MAX_TOTAL_TOKENS - value: {{ .Values.env.MAX_TOTAL_TOKENS | quote }} - - name: HUGGINGFACE_HUB_CACHE - value: {{ .Values.env.HUGGINGFACE_HUB_CACHE }} - - name: PORT - value: {{ .Values.env.PORT | quote }} - - name: HOST - value: {{ .Values.env.HOST | quote }} - {{- with .Values.env.HF_TOKEN }} - - name: HF_TOKEN - {{- toYaml .Values.env.HF_TOKEN | nindent 14 }} - {{- end }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - ports: - - name: http - containerPort: {{ .Values.service.port }} - protocol: TCP - livenessProbe: - {{- toYaml .Values.livenessProbe | nindent 12 }} - readinessProbe: - {{- toYaml .Values.readinessProbe | nindent 12 }} - resources: - {{- toYaml .Values.resources | nindent 12 }} - startupProbe: - {{- toYaml .Values.livenessProbe | nindent 12 }} - volumeMounts: - - name: models-cache - mountPath: /models-cache - - name: shm - mountPath: /dev/shm - terminationMessagePolicy: File - volumes: - - name: models-cache - persistentVolumeClaim: - claimName: {{ include "tgis-server.fullname" . }} - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - dnsPolicy: ClusterFirst - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - strategy: - type: Recreate diff --git a/charts/all/tgis-server/templates/hpa.yaml b/charts/all/tgis-server/templates/hpa.yaml deleted file mode 100644 index 1f78fa9c..00000000 --- a/charts/all/tgis-server/templates/hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "tgis-server.fullname" . }} - labels: - {{- include "tgis-server.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "tgis-server.fullname" . }} - minReplicas: {{ .Values.autoscaling.minReplicas }} - maxReplicas: {{ .Values.autoscaling.maxReplicas }} - metrics: - {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/charts/all/tgis-server/templates/pvc.yaml b/charts/all/tgis-server/templates/pvc.yaml deleted file mode 100644 index d0fdc41e..00000000 --- a/charts/all/tgis-server/templates/pvc.yaml +++ /dev/null @@ -1,28 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ include "tgis-server.fullname" . }} - namespace: {{ .Release.Namespace }} -{{- with .Values.persistence.annotations }} - annotations: -{{ toYaml . | indent 4 }} -{{- end }} - labels: - app: {{ include "tgis-server.fullname" . }} - chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" - release: "{{ .Release.Name }}" - heritage: "{{ .Release.Service }}" -spec: - accessModes: - - {{ .Values.persistence.accessMode | quote }} - volumeMode: {{ .Values.persistence.volumeMode | quote }} - resources: - requests: - storage: {{ .Values.persistence.size | quote }} -{{- if .Values.persistence.storageClass }} -{{- if (eq "-" .Values.persistence.storageClass) }} - storageClassName: "" -{{- else }} - storageClassName: "{{ .Values.persistence.storageClass }}" -{{- end }} -{{- end }} diff --git a/charts/all/tgis-server/templates/service.yaml b/charts/all/tgis-server/templates/service.yaml deleted file mode 100644 index 3c68f038..00000000 --- a/charts/all/tgis-server/templates/service.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: hf-text-generation-inference-server-model1 - labels: - {{- include "tgis-server.labels" . | nindent 4 }} -spec: - clusterIP: None - ipFamilies: - - IPv4 - type: {{ .Values.service.type }} - ports: - - port: {{ .Values.service.port }} - targetPort: http - protocol: TCP - name: http - internalTrafficPolicy: Cluster - selector: - {{- include "tgis-server.selectorLabels" . | nindent 4 }} - diff --git a/charts/all/tgis-server/templates/serviceaccount.yaml b/charts/all/tgis-server/templates/serviceaccount.yaml deleted file mode 100644 index d33d948d..00000000 --- a/charts/all/tgis-server/templates/serviceaccount.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "tgis-server.serviceAccountName" . }} - labels: - {{- include "tgis-server.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -automountServiceAccountToken: {{ .Values.serviceAccount.automount }} -{{- end }} diff --git a/charts/all/tgis-server/values.yaml b/charts/all/tgis-server/values.yaml deleted file mode 100644 index 91bcda39..00000000 --- a/charts/all/tgis-server/values.yaml +++ /dev/null @@ -1,130 +0,0 @@ -# Default values for tgis-server. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -replicaCount: 1 -image: - repository: ghcr.io/huggingface/text-generation-inference - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "1.3.3" - -imagePullSecrets: [] -nameOverride: "" -fullnameOverride: "" - -serviceAccount: - # Specifies whether a service account should be created - create: true - # Automatically mount a ServiceAccount's API credentials? - automount: true - # Annotations to add to the service account - annotations: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: "" - -podAnnotations: {} -podLabels: {} - -podSecurityContext: {} - # fsGroup: 2000 - -securityContext: - capabilities: - drop: - - ALL - runAsNonRoot: true - allowPrivilegeEscalation: false - seccompProfile: - type: RuntimeDefault - - -service: - type: ClusterIP - port: 3000 - -env: - MODEL_ID: mistral-community/Mistral-7B-v0.2 - MAX_INPUT_LENGTH: '1024' - MAX_TOTAL_TOKENS: '2048' - HUGGINGFACE_HUB_CACHE: /models-cache - PORT: '3000' - HOST: 0.0.0.0 - HF_TOKEN: {} - # valueFrom: - # secretKeyRef: - # name: hf_token_secret - # key: hf_token - - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. -resources: - limits: - cpu: '2' - memory: 16Gi - nvidia.com/gpu: '1' - requests: - cpu: '2' - -livenessProbe: - httpGet: - path: /health - port: http - scheme: HTTP - timeoutSeconds: 8 - periodSeconds: 100 - successThreshold: 1 - failureThreshold: 3 - -readinessProbe: - httpGet: - path: /health - port: http - scheme: HTTP - timeoutSeconds: 5 - periodSeconds: 30 - successThreshold: 1 - failureThreshold: 3 - -startupProbe: - httpGet: - path: /health - port: http - scheme: HTTP - timeoutSeconds: 1 - periodSeconds: 30 - successThreshold: 1 - failureThreshold: 24 - -autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 - -nodeSelector: {} - -tolerations: - - key: odh-notebook - value: 'true' - effect: NoSchedule - -persistence: - accessMode: ReadWriteOnce - size: 30Gi - annotations: {} - volumeMode: Filesystem - -affinity: {} - # nodeAffinity: - # requiredDuringSchedulingIgnoredDuringExecution: - # nodeSelectorTerms: - # - matchExpressions: - # - key: nvidia.com/gpu.present - # operator: In - # values: - # - "true" diff --git a/charts/all/llm-serving-service/.helmignore b/charts/all/vllm-inference-service/.helmignore similarity index 100% rename from charts/all/llm-serving-service/.helmignore rename to charts/all/vllm-inference-service/.helmignore diff --git a/charts/all/vllm-inference-service/Chart.yaml b/charts/all/vllm-inference-service/Chart.yaml new file mode 100644 index 00000000..12dd68c4 --- /dev/null +++ b/charts/all/vllm-inference-service/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: vllm-inference-service +description: A Helm chart for Kubernetes +type: application +version: 0.1.0 +appVersion: "1.16.0" diff --git a/charts/all/tgis-server/templates/_helpers.tpl b/charts/all/vllm-inference-service/templates/_helpers.tpl similarity index 70% rename from charts/all/tgis-server/templates/_helpers.tpl rename to charts/all/vllm-inference-service/templates/_helpers.tpl index 2a68af77..075ef98d 100644 --- a/charts/all/tgis-server/templates/_helpers.tpl +++ b/charts/all/vllm-inference-service/templates/_helpers.tpl @@ -1,7 +1,7 @@ {{/* Expand the name of the chart. */}} -{{- define "tgis-server.name" -}} +{{- define "vllm-inference-service.name" -}} {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} {{- end }} @@ -10,7 +10,7 @@ Create a default fully qualified app name. We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). If release name contains chart name it will be used as a full name. */}} -{{- define "tgis-server.fullname" -}} +{{- define "vllm-inference-service.fullname" -}} {{- if .Values.fullnameOverride }} {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} {{- else }} @@ -26,16 +26,16 @@ If release name contains chart name it will be used as a full name. {{/* Create chart name and version as used by the chart label. */}} -{{- define "tgis-server.chart" -}} +{{- define "vllm-inference-service.chart" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} {{- end }} {{/* Common labels */}} -{{- define "tgis-server.labels" -}} -helm.sh/chart: {{ include "tgis-server.chart" . }} -{{ include "tgis-server.selectorLabels" . }} +{{- define "vllm-inference-service.labels" -}} +helm.sh/chart: {{ include "vllm-inference-service.chart" . }} +{{ include "vllm-inference-service.selectorLabels" . }} {{- if .Chart.AppVersion }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} @@ -45,17 +45,17 @@ app.kubernetes.io/managed-by: {{ .Release.Service }} {{/* Selector labels */}} -{{- define "tgis-server.selectorLabels" -}} -app.kubernetes.io/name: {{ include "tgis-server.name" . }} +{{- define "vllm-inference-service.selectorLabels" -}} +app.kubernetes.io/name: {{ include "vllm-inference-service.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* Create the name of the service account to use */}} -{{- define "tgis-server.serviceAccountName" -}} +{{- define "vllm-inference-service.serviceAccountName" -}} {{- if .Values.serviceAccount.create }} -{{- default (include "tgis-server.fullname" .) .Values.serviceAccount.name }} +{{- default (include "vllm-inference-service.fullname" .) .Values.serviceAccount.name }} {{- else }} {{- default "default" .Values.serviceAccount.name }} {{- end }} diff --git a/charts/all/llm-serving-service/templates/accelerator-profile.yaml b/charts/all/vllm-inference-service/templates/accelerator-profile.yaml similarity index 63% rename from charts/all/llm-serving-service/templates/accelerator-profile.yaml rename to charts/all/vllm-inference-service/templates/accelerator-profile.yaml index 4f26e8e0..ebcafd19 100644 --- a/charts/all/llm-serving-service/templates/accelerator-profile.yaml +++ b/charts/all/vllm-inference-service/templates/accelerator-profile.yaml @@ -1,3 +1,4 @@ +{{- if .Values.acceleratorProfile.enabled }} apiVersion: dashboard.opendatahub.io/v1 kind: AcceleratorProfile metadata: @@ -8,6 +9,5 @@ spec: enabled: true identifier: nvidia.com/gpu tolerations: - - effect: NoSchedule - key: odh-notebook - operator: Exists + {{- toYaml .Values.vllmInferenceService.tolerations | nindent 4 }} +{{- end }} diff --git a/charts/all/vllm-inference-service/templates/inference-service.yaml b/charts/all/vllm-inference-service/templates/inference-service.yaml new file mode 100644 index 00000000..8d63b817 --- /dev/null +++ b/charts/all/vllm-inference-service/templates/inference-service.yaml @@ -0,0 +1,60 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + {{- toYaml .Values.vllmInferenceService.annotations | nindent 4 }} + name: {{ include "vllm-inference-service.fullname" . }} + labels: + opendatahub.io/dashboard: 'true' +spec: + predictor: + annotations: + {{- toYaml .Values.vllmInferenceService.predictor.annotations | nindent 6 }} + maxReplicas: {{ .Values.vllmInferenceService.predictor.replicas }} + minReplicas: {{ .Values.vllmInferenceService.predictor.replicas }} + model: + modelFormat: + name: vLLM + name: '' + resources: + {{- toYaml .Values.vllmInferenceService.predictor.resources | nindent 8 }} + runtime: {{ include "vllm-inference-service.fullname" . }} + restartPolicy: Always + tolerations: + {{- toYaml .Values.vllmInferenceService.tolerations | nindent 6 }} + affinity: + {{- toYaml .Values.vllmInferenceService.predictor.affinity | nindent 6 }} + initContainers: + - name: download-model + image: registry.access.redhat.com/ubi9/python-39 + imagePullPolicy: IfNotPresent + command: ["/bin/bash", "-ec"] + args: + - | + pip install --no-cache-dir huggingface_hub + python - <<'PY' + from huggingface_hub import snapshot_download, login + import os + token = os.environ.get("HF_TOKEN") + model = os.environ.get("MODEL_ID") + login(token=token) + snapshot_download( + repo_id=model, + local_dir="/cache/models" + ) + PY + env: + - name: HF_HOME + value: /cache + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: huggingface-secret + key: hftoken + - name: MODEL_ID + value: {{ .Values.global.model.vllm | quote }} + volumeMounts: + - name: models + mountPath: /cache/models + - name: cache + mountPath: /cache diff --git a/charts/all/vllm-inference-service/templates/route.yaml b/charts/all/vllm-inference-service/templates/route.yaml new file mode 100644 index 00000000..dc33f50e --- /dev/null +++ b/charts/all/vllm-inference-service/templates/route.yaml @@ -0,0 +1,21 @@ +apiVersion: route.openshift.io/v1 +kind: Route +metadata: + name: {{ include "vllm-inference-service.fullname" . }} + labels: + {{- include "vllm-inference-service.labels" . | nindent 4 }} + annotations: + haproxy.router.openshift.io/timeout: 5m + argocd.argoproj.io/sync-wave: "30" +spec: + host: {{ printf "%s-predictor-%s.%s" (include "vllm-inference-service.fullname" .) .Release.Namespace .Values.global.localClusterDomain }} + port: + targetPort: http + tls: + insecureEdgeTerminationPolicy: Allow + termination: edge + to: + kind: Service + name: {{ printf "%s-predictor" (include "vllm-inference-service.fullname" .) }} + weight: 100 + wildcardPolicy: None diff --git a/charts/all/vllm-inference-service/templates/serving-runtime.yaml b/charts/all/vllm-inference-service/templates/serving-runtime.yaml new file mode 100644 index 00000000..9c18f6fd --- /dev/null +++ b/charts/all/vllm-inference-service/templates/serving-runtime.yaml @@ -0,0 +1,56 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + annotations: + {{- toYaml .Values.vllmServingRuntime.annotations | nindent 4 }} + name: {{ include "vllm-inference-service.fullname" . }} + labels: + opendatahub.io/dashboard: 'true' +spec: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: '8080' + containers: + - args: + {{- toYaml .Values.vllmServingRuntime.args | nindent 8 }} + - {{ printf "--served-model-name=%s" ((split "/" .Values.global.model.vllm)._1) }} + - {{ printf "--port=%d" (int .Values.vllmServingRuntime.port) }} + command: + {{- toYaml .Values.vllmServingRuntime.command | nindent 8 }} + env: + - name: HF_HOME + value: /cache + - name: HF_TOKEN + valueFrom: + secretKeyRef: + key: hftoken + name: huggingface-secret + - name: MODEL_ID + value: {{ .Values.global.model.vllm }} + - name: HF_HUB_OFFLINE + value: '0' + image: "{{ .Values.vllmServingRuntime.image.repository }}:{{ .Values.vllmServingRuntime.image.tag }}" + name: kserve-container + ports: + - containerPort: {{ .Values.vllmServingRuntime.port }} + protocol: TCP + volumeMounts: + - mountPath: /dev/shm + name: shm + - mountPath: /cache/models + name: models + - mountPath: /cache + name: cache + multiModel: false + supportedModelFormats: + - autoSelect: true + name: vLLM + volumes: + - name: shm + emptyDir: + medium: Memory + sizeLimit: 2Gi + - name: models + emptyDir: {} + - name: cache + emptyDir: {} diff --git a/charts/all/vllm-inference-service/values.yaml b/charts/all/vllm-inference-service/values.yaml new file mode 100644 index 00000000..8131abc1 --- /dev/null +++ b/charts/all/vllm-inference-service/values.yaml @@ -0,0 +1,61 @@ +global: + model: + vllm: ibm-granite/granite-3.3-8b-instruct + +vllmInferenceService: + annotations: + openshift.io/display-name: vllm-inference + serving.kserve.io/deploymentMode: RawDeployment + argocd.argoproj.io/sync-wave: "20" + + predictor: + annotations: + serving.knative.dev/progress-deadline: 30m + replicas: 1 + resources: + limits: + nvidia.com/gpu: '1' + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nvidia.com/gpu.present + operator: In + values: ["true"] + + tolerations: + - effect: NoSchedule + key: odh-notebook + operator: Exists + +vllmServingRuntime: + annotations: + opendatahub.io/accelerator-name: nvidia-gpu + opendatahub.io/apiProtocol: REST + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + openshift.io/display-name: vllm-inference + argocd.argoproj.io/sync-wave: "20" + + args: + - "--model=/cache/models" + - "--distributed-executor-backend=mp" + - "--max-model-len=4096" + - "--dtype=half" + - "--gpu-memory-utilization" + - "0.98" + - "--enforce-eager" + + command: + - python + - '-m' + - vllm.entrypoints.openai.api_server + + image: + repository: quay.io/modh/vllm + tag: rhoai-2.20-cuda + + port: 8080 + +acceleratorProfile: + enabled: true diff --git a/charts/region/.keep b/charts/region/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/overrides/values-AWS.yaml b/overrides/values-AWS.yaml deleted file mode 100644 index 03fa0775..00000000 --- a/overrides/values-AWS.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# The following snippet can be commented out in oroder -# to enable letsencrypt certificates on API endpoint and default -# ingress of the cluster -# It is currently very experimental and unsupported. -# PLEASE read https://github.com/hybrid-cloud-patterns/common/tree/main/letsencrypt#readme -# for all the limitations around it - - -# letsencrypt: -# enabled: true -# api_endpoint: true -# # FIXME: tweak this to match your region -# region: eu-central-1 -# server: https://acme-v02.api.letsencrypt.org/directory -# # server: https://acme-staging-v02.api.letsencrypt.org/directory -# # FIXME: set this to your correct email -# email: iwashere@iwashere.com -# -# clusterGroup: -# applications: -# letsencrypt: -# name: letsencrypt -# namespace: letsencrypt -# # Using 'default' as that exists everywhere -# project: default -# path: common/letsencrypt diff --git a/overrides/values-Azure.yaml b/overrides/values-Azure.yaml new file mode 100644 index 00000000..ab1af5b0 --- /dev/null +++ b/overrides/values-Azure.yaml @@ -0,0 +1,17 @@ +global: + db: + type: MSSQL + model: + vllm: solidrust/Mistral-7B-Instruct-v0.3-AWQ + embedding: sentence-transformers/distiluse-base-multilingual-cased + storageClass: azurefile-csi + +vllmServingRuntime: + args: + - "--model=/cache/models" + - "--distributed-executor-backend=mp" + - "--max-model-len=4096" + - "--quantization=awq" + - "--gpu-memory-utilization" + - "0.98" + - "--enforce-eager" diff --git a/overrides/values-IBMCloud.yaml b/overrides/values-IBMCloud.yaml deleted file mode 100644 index 38d7be76..00000000 --- a/overrides/values-IBMCloud.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# When using IBM ROKS the route certificates are signed by letsencrypt -# By default the ESO configuration uses the kube-root-ca.crt configmap -# to validate the connection to vault. Since this configmap will not contain -# the letsencrypt CA, ESO will be unable to connect to the vault and return an -# x509 CA unknown error. -# Uncomment the following if you are using IBM ROKS (IPI installs on IBM Cloud are unaffected) - -# golangExternalSecrets: -# caProvider: -# enabled: false diff --git a/values-global.yaml b/values-global.yaml index b07de26c..8943afab 100644 --- a/values-global.yaml +++ b/values-global.yaml @@ -5,13 +5,22 @@ global: useCSV: false syncPolicy: Automatic installPlanApproval: Automatic -# Possible value for db.type = [REDIS, EDB, ELASTIC] + # Possible values for RAG vector DB db.type: + # REDIS -> Redis (Local chart deploy) + # EDB -> PGVector (Local chart deploy) + # ELASTIC -> Elasticsearch (Local chart deploy) + # MSSQL -> MS SQL Server (Local chart deploy) + # AZURESQL -> Azure SQL (Pre-existing in Azure) db: index: docs type: EDB -# Add for model ID + # Models used by the inference service (should be a HuggingFace model ID) model: - modelId: ibm-granite/granite-3.1-8b-instruct + vllm: ibm-granite/granite-3.3-8b-instruct + embedding: sentence-transformers/all-mpnet-base-v2 + + storageClass: gp3-csi + main: clusterGroupName: hub multiSourceConfig: diff --git a/values-group-one.yaml b/values-group-one.yaml deleted file mode 100644 index 536313f6..00000000 --- a/values-group-one.yaml +++ /dev/null @@ -1,106 +0,0 @@ -global: - options: - useCSV: False - syncPolicy: Automatic - installPlanApproval: Automatic -clusterGroup: - name: group-one - isHubCluster: false - namespaces: - - rag-llm - - golang-external-secrets - subscriptions: - projects: - - eso - - rag-llm - - llm-monitoring - applications: - golang-external-secrets: - name: golang-external-secrets - namespace: golang-external-secrets - project: eso - path: common/golang-external-secrets - rag-llm: - name: rag-llm - namespace: rag-llm - project: rag-llm - path: charts/all/rag-llm - llm-monitoring: - name: llm-monitoring - namespace: llm-monitoring - project: llm-monitoring - kustomize: true - path: charts/all/llm-monitoring/kustomize/overlays/dev - imperative: - # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm - # The default schedule is every 10 minutes: imperative.schedule - # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds - # imagePullPolicy is set to always: imperative.imagePullPolicy - # For additional overrides that apply to the jobs, please refer to - # https://hybrid-cloud-patterns.io/imperative-actions/#additional-job-customizations - jobs: - - name: hello-world - # ansible playbook to be run - playbook: common/ansible/playbooks/hello-world/hello-world.yaml - # per playbook timeout in seconds - timeout: 234 - # verbosity: "-v" - # Explicitly mention the cluster-state based overrides we plan to use for this pattern. - # We can use self-referential variables because the chart calls the tpl function with these variables defined - sharedValueFiles: - - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - # To mirror the "Classic" magic include structure, the clusterGroup would need all of these: - # sharedValueFiles: - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.global.clusterVersion }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.clusterGroup.name }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterVersion }}-{{ $.Values.clusterGroup.name }}.yaml" -# To have apps in multiple flavors, use namespaces and use helm overrides as appropriate -# -# pipelines: -# name: pipelines -# namespace: production -# project: datacenter -# path: applications/pipeline -# repoURL: https://github.com/you/applications.git -# targetRevision: stable -# overrides: -# - name: myparam -# value: myparam -# -# pipelines_staging: -# - name: pipelines -# namespace: staging -# project: datacenter -# path: applications/pipeline -# repoURL: https://github.com/you/applications.git -# targetRevision: main -# -# Additional applications -# Be sure to include additional resources your apps will require -# +X machines -# +Y RAM -# +Z CPU -# vendor-app: -# name: vendor-app -# namespace: default -# project: vendor -# path: path/to/myapp -# repoURL: https://github.com/vendor/applications.git -# targetRevision: main - -# managedSites: -# factory: -# name: factory -# # repoURL: https://github.com/dagger-refuse-cool/manuela-factory.git -# targetRevision: main -# path: applications/factory -# helmOverrides: -# - name: site.isHubCluster -# value: false -# clusterSelector: -# matchExpressions: -# - key: vendor -# operator: In -# values: -# - OpenShift \ No newline at end of file diff --git a/values-hub.yaml b/values-hub.yaml index d9c6a8be..1a483092 100644 --- a/values-hub.yaml +++ b/values-hub.yaml @@ -1,6 +1,7 @@ clusterGroup: name: hub isHubCluster: true + namespaces: - open-cluster-management - vault @@ -12,7 +13,7 @@ clusterGroup: targetNamespaces: [] - rag-llm: operatorGroup: true - targetNamespaces: + targetNamespaces: - rag-llm labels: opendatahub.io/dashboard: "true" @@ -20,71 +21,43 @@ clusterGroup: - openshift-serverless: operatorGroup: true targetNamespaces: [] + subscriptions: - # Don't install RHOAI via validated patterns - # RHOAI must be installed after service mesh and other dependencies are installed or it will break. - # The RHOAI Application will install the operator after a validation check has passed nfd: name: nfd namespace: openshift-nfd - channel: stable nvidia: name: gpu-operator-certified namespace: nvidia-gpu-operator - channel: v24.6 source: certified-operators edb: name: cloud-native-postgresql namespace: openshift-operators - channel: stable-v1.23 source: certified-operators elastic: name: elasticsearch-eck-operator-certified namespace: rag-llm - channel: stable source: certified-operators - sourceNamespace: openshift-marketplace serverless: name: serverless-operator namespace: openshift-serverless - channel: stable servicemesh: name: servicemeshoperator namespace: openshift-operators - channel: stable rhoai: name: rhods-operator namespace: redhat-ods-operator - channel: stable-2.19 - source: redhat-operators - sourceNamespace: openshift-marketplace + projects: - hub - rag-llm - llm-monitoring - gpu-config - openshift-ai - # Explicitly mention the cluster-state based overrides we plan to use for this pattern. - # We can use self-referential variables because the chart calls the tpl function with these variables defined + sharedValueFiles: - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - - 'values-rag-llm-gitops.yaml' - # sharedValueFiles is a flexible mechanism that will add the listed valuefiles to every app defined in the - # applications section. We intend this to supplement and possibly even replace previous "magic" mechanisms, though - # we do not at present have a target date for removal. - # - # To replicate the "classic" magic include structure, the clusterGroup would need all of these - # sharedValueFiles, in this order: - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.global.clusterVersion }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterPlatform }}-{{ $.Values.clusterGroup.name }}.yaml' - # - '/overrides/values-{{ $.Values.global.clusterVersion }}-{{ $.Values.clusterGroup.name }}.yaml" - # - '/overrides/values-{{ $.Values.global.localClusterName }}.yaml' - # This kind of variable substitution will work with any of the variables the Validated Patterns operator knows - # about and sets, so this is also possible, for example: - # - '/overrides/values-{{ $.Values.global.hubClusterDomain }}.yaml' - # - '/overrides/values-{{ $.Values.global.localClusterDomain }}.yaml' applications: vault: name: vault @@ -98,20 +71,16 @@ clusterGroup: project: hub chart: golang-external-secrets chartVersion: 0.1.* - minio: - name: minio - namespace: rag-llm - project: hub - path: charts/all/minio - llm-serving-service: - name: llm-serving-service + vllm-inference-service: + name: vllm-inference-service namespace: rag-llm project: hub - path: charts/all/llm-serving-service + path: charts/all/vllm-inference-service syncPolicy: - automated: {} + automated: + selfHeal: true retry: - limit: 50 + limit: 20 rag-llm: name: rag-llm namespace: rag-llm @@ -123,57 +92,28 @@ clusterGroup: project: llm-monitoring kustomize: true path: charts/all/llm-monitoring/kustomize/overlays/dev - nfd-config: name: nfd-config namespace: openshift-cfd project: gpu-config path: charts/all/nfd-config - nvidia-config: name: nvidia-config namespace: nvidia-network-operator project: gpu-config path: charts/all/nvidia-gpu-config - llm-ui-config: name: rag-llm-ui-config namespace: rag-llm project: gpu-config path: charts/all/rag-llm-ui-config - llm-monitoring-config: name: grafana-ui-config namespace: llm-monitoring project: gpu-config path: charts/all/llm-monitoring-config - openshift-ai: name: openshift-ai namespace: redhat-ods-operator project: openshift-ai path: charts/all/rhods - - imperative: - # NOTE: We *must* use lists and not hashes. As hashes lose ordering once parsed by helm - # The default schedule is every 10 minutes: imperative.schedule - # Total timeout of all jobs is 1h: imperative.activeDeadlineSeconds - # imagePullPolicy is set to always: imperative.imagePullPolicy - # For additional overrides that apply to the jobs, please refer to - # https://hybrid-cloud-patterns.io/imperative-actions/#additional-job-customizations - jobs: - - name: hello-world - # ansible playbook to be run - playbook: rhvp.cluster_utils.hello_world - # per playbook timeout in seconds - timeout: 234 - # verbosity: "-v" - managedClusterGroups: - exampleRegion: - name: group-one - acmlabels: - - name: clusterGroup - value: group-one - helmOverrides: - - name: clusterGroup.isHubCluster - value: false diff --git a/values-rag-llm-gitops.yaml b/values-rag-llm-gitops.yaml deleted file mode 100644 index e69de29b..00000000 diff --git a/values-secret.yaml.template b/values-secret.yaml.template index b9e9d6d5..cc2ac68a 100644 --- a/values-secret.yaml.template +++ b/values-secret.yaml.template @@ -15,12 +15,21 @@ secrets: fields: - name: hftoken value: null - - name: modelId - value: "ibm-granite/granite-3.1-8b-instruct" - - name: minio + - name: mssql fields: - - name: MINIO_ROOT_USER - value: minio - - name: MINIO_ROOT_PASSWORD + - name: sa-pass value: null onMissingValue: generate + description: mssql password for sa user + - name: azuresql + fields: + - name: user + value: adminuser + description: server admin user for azure sql + - name: password + value: null + onMissingValue: generate + description: server admin password for azure sql + - name: server + value: rag-llm-gitops.database.windows.net + description: server hostname