Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 181 additions & 49 deletions docs/operator-public-documentation/preview/operations/upgrades.md

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions operator/documentdb-helm-chart/crds/documentdb.io_dbs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,25 @@ spec:
required:
- storage
type: object
schemaVersion:
description: |-
SchemaVersion controls the desired schema version for the DocumentDB extension.

This field decouples the extension binary (image) update from the schema update
(ALTER EXTENSION documentdb UPDATE), providing a rollback-safe upgrade window.

Values:
- "" (empty, default): Two-phase mode. Image upgrades happen automatically,
but ALTER EXTENSION UPDATE does NOT run. Users must explicitly set this
field to finalize the schema upgrade. This is the safest option for production
as it allows rollback by reverting the image before committing the schema change.
- "auto": Schema automatically updates to match the binary version whenever
the binary is upgraded. This is the simplest mode but provides no rollback
safety window. Recommended for development and testing environments.
- "<version>" (e.g. "0.112.0"): Schema updates to exactly this version.
Must be <= the binary version.
pattern: ^(auto|[0-9]+\.[0-9]+\.[0-9]+)?$
type: string
sidecarInjectorPluginName:
description: SidecarInjectorPluginName is the name of the sidecar
injector plugin to use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ spec:
- name: documentdb-operator
image: "{{ .Values.image.documentdbk8soperator.repository }}:{{ .Values.image.documentdbk8soperator.tag | default .Chart.AppVersion }}"
imagePullPolicy: "{{ .Values.image.documentdbk8soperator.pullPolicy }}"
args:
- --webhook-cert-path=/tmp/k8s-webhook-server/serving-certs
ports:
- containerPort: 9443
name: webhook-server
protocol: TCP
volumeMounts:
- mountPath: /tmp/k8s-webhook-server/serving-certs
name: webhook-cert
readOnly: true
env:
- name: GATEWAY_PORT
value: "10260"
Expand All @@ -37,3 +47,8 @@ spec:
- name: DOCUMENTDB_IMAGE_PULL_POLICY
value: "{{ .Values.documentDbImagePullPolicy }}"
{{- end }}
volumes:
- name: webhook-cert
secret:
secretName: documentdb-webhook-tls
defaultMode: 420
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{{- $ns := .Values.namespace | default .Release.Namespace -}}
# Self-signed Issuer for webhook TLS certificates in the operator namespace.
# The sidecar injector uses a separate Issuer in cnpg-system; this one is for the operator.
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: documentdb-operator-selfsigned-issuer
namespace: {{ $ns }}
labels:
app.kubernetes.io/name: {{ include "documentdb-chart.name" . }}
app.kubernetes.io/managed-by: "Helm"
spec:
selfSigned: {}
---
# TLS certificate for the validating webhook server.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: documentdb-webhook-cert
namespace: {{ $ns }}
labels:
app.kubernetes.io/name: {{ include "documentdb-chart.name" . }}
app.kubernetes.io/managed-by: "Helm"
spec:
commonName: documentdb-webhook
dnsNames:
- documentdb-webhook-service.{{ $ns }}.svc
- documentdb-webhook-service.{{ $ns }}.svc.cluster.local
duration: 2160h # 90 days
renewBefore: 360h # 15 days
isCA: false
issuerRef:
group: cert-manager.io
kind: Issuer
name: documentdb-operator-selfsigned-issuer
secretName: documentdb-webhook-tls
usages:
- server auth
---
# Service fronting the operator's webhook server on port 9443.
apiVersion: v1
kind: Service
metadata:
name: documentdb-webhook-service
namespace: {{ $ns }}
labels:
app.kubernetes.io/name: {{ include "documentdb-chart.name" . }}
app.kubernetes.io/managed-by: "Helm"
spec:
ports:
- port: 443
protocol: TCP
targetPort: 9443
selector:
app: {{ .Release.Name }}
---
# ValidatingWebhookConfiguration for DocumentDB resources.
# cert-manager injects the CA bundle automatically via the annotation.
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
name: documentdb-validating-webhook
labels:
app.kubernetes.io/name: {{ include "documentdb-chart.name" . }}
app.kubernetes.io/managed-by: "Helm"
annotations:
cert-manager.io/inject-ca-from: {{ $ns }}/documentdb-webhook-cert
webhooks:
- name: vdocumentdb.kb.io
admissionReviewVersions:
- v1
clientConfig:
service:
name: documentdb-webhook-service
namespace: {{ $ns }}
path: /validate-documentdb-io-preview-documentdb
failurePolicy: Fail
rules:
- apiGroups:
- documentdb.io
apiVersions:
- preview
operations:
- CREATE
- UPDATE
resources:
- dbs
sideEffects: None
25 changes: 25 additions & 0 deletions operator/src/api/preview/documentdb_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,31 @@ type DocumentDBSpec struct {
// +kubebuilder:validation:XValidation:rule="self.all(key, key in ['ChangeStreams'])",message="unsupported feature gate key; allowed keys: ChangeStreams"
FeatureGates map[string]bool `json:"featureGates,omitempty"`

// SchemaVersion controls the desired schema version for the DocumentDB extension.
//
// The operator never changes your database schema unless you ask:
// - Set documentDBVersion → updates the binary (safe to roll back)
// - Set schemaVersion → updates the database schema (irreversible)
// - Set schemaVersion: "auto" → schema auto-updates with binary
//
// Once the schema has been updated, the operator blocks image rollback below the
// installed schema version to prevent running an untested binary/schema combination.
//
// Values:
// - "" (empty, default): Two-phase mode. Image upgrades happen automatically,
// but ALTER EXTENSION UPDATE does NOT run. Users must explicitly set this
// field to finalize the schema upgrade. This is the safest option for production
// as it allows rollback by reverting the image before committing the schema change.
// - "auto": Schema automatically updates to match the binary version whenever
// the binary is upgraded. This is the simplest mode but provides no rollback
// safety window.
// - "<version>" (e.g. "0.112.0"): Schema updates to exactly this version.
// Must be <= the binary version.
//
// +kubebuilder:validation:Pattern=`^(auto|[0-9]+\.[0-9]+\.[0-9]+)?$`
// +optional
SchemaVersion string `json:"schemaVersion,omitempty"`

// Affinity/Anti-affinity rules for Pods (cnpg passthrough)
// +optional
Affinity cnpgv1.AffinityConfiguration `json:"affinity,omitempty"`
Expand Down
7 changes: 7 additions & 0 deletions operator/src/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
dbpreview "github.com/documentdb/documentdb-operator/api/preview"
"github.com/documentdb/documentdb-operator/internal/controller"
webhookhandler "github.com/documentdb/documentdb-operator/internal/webhook"
fleetv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1"
// +kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -248,6 +249,12 @@ func main() {

// +kubebuilder:scaffold:builder

// Register the DocumentDB validating webhook
if err = (&webhookhandler.DocumentDBValidator{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "DocumentDB")
os.Exit(1)
}

if metricsCertWatcher != nil {
setupLog.Info("Adding metrics certificate watcher to manager")
if err := mgr.Add(metricsCertWatcher); err != nil {
Expand Down
19 changes: 19 additions & 0 deletions operator/src/config/crd/bases/documentdb.io_dbs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,25 @@ spec:
required:
- storage
type: object
schemaVersion:
description: |-
SchemaVersion controls the desired schema version for the DocumentDB extension.

This field decouples the extension binary (image) update from the schema update
(ALTER EXTENSION documentdb UPDATE), providing a rollback-safe upgrade window.

Values:
- "" (empty, default): Two-phase mode. Image upgrades happen automatically,
but ALTER EXTENSION UPDATE does NOT run. Users must explicitly set this
field to finalize the schema upgrade. This is the safest option for production
as it allows rollback by reverting the image before committing the schema change.
- "auto": Schema automatically updates to match the binary version whenever
the binary is upgraded. This is the simplest mode but provides no rollback
safety window. Recommended for development and testing environments.
- "<version>" (e.g. "0.112.0"): Schema updates to exactly this version.
Must be <= the binary version.
pattern: ^(auto|[0-9]+\.[0-9]+\.[0-9]+)?$
type: string
sidecarInjectorPluginName:
description: SidecarInjectorPluginName is the name of the sidecar
injector plugin to use.
Expand Down
5 changes: 3 additions & 2 deletions operator/src/internal/cnpg/cnpg_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ func GetCnpgClusterSpec(req ctrl.Request, documentdb *dbpreview.DocumentDB, docu
},
Spec: func() cnpgv1.ClusterSpec {
spec := cnpgv1.ClusterSpec{
Instances: documentdb.Spec.InstancesPerNode,
ImageName: documentdb.Spec.PostgresImage,
Instances: documentdb.Spec.InstancesPerNode,
ImageName: documentdb.Spec.PostgresImage,
PrimaryUpdateMethod: cnpgv1.PrimaryUpdateMethodSwitchover,
StorageConfiguration: cnpgv1.StorageConfiguration{
StorageClass: storageClassPointer, // Use configured storage class or default
Size: documentdb.Spec.Resource.Storage.PvcSize,
Expand Down
109 changes: 101 additions & 8 deletions operator/src/internal/controller/documentdb_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -835,8 +835,9 @@ func parseExtensionVersionsFromOutput(output string) (defaultVersion, installedV

// upgradeDocumentDBIfNeeded handles the complete DocumentDB image upgrade process:
// 1. Checks if extension image and/or gateway image need updating (builds a single JSON Patch)
// 1b. Blocks extension image rollback below the installed schema version (irreversible)
// 2. If images changed, applies the patch atomically (triggers one CNPG rolling restart) and returns
// 3. After rolling restart completes, runs ALTER EXTENSION documentdb UPDATE if needed
// 3. After rolling restart completes, checks spec.schemaVersion to decide whether to run ALTER EXTENSION
// 4. Updates the DocumentDB status with the new extension version
func (r *DocumentDBReconciler) upgradeDocumentDBIfNeeded(ctx context.Context, currentCluster, desiredCluster *cnpgv1.Cluster, documentdb *dbpreview.DocumentDB) error {
logger := log.FromContext(ctx)
Expand All @@ -852,6 +853,9 @@ func (r *DocumentDBReconciler) upgradeDocumentDBIfNeeded(ctx context.Context, cu
return fmt.Errorf("failed to build image patch operations: %w", err)
}

// Note: Image rollback below installed schema version is blocked by the validating
// webhook at admission time. The controller trusts that persisted specs are valid.

// Step 2: Apply patch if any images need updating
if len(patchOps) > 0 {
patchBytes, err := json.Marshal(patchOps)
Expand Down Expand Up @@ -951,7 +955,10 @@ func (r *DocumentDBReconciler) upgradeDocumentDBIfNeeded(ctx context.Context, cu
return nil
}

// Step 5b: Rollback detection — check if the new binary is older than the installed schema
// Step 5b: Rollback detection — secondary safety net.
// The primary rollback block is in Step 1b (prevents image patch when new image < schema).
// This handles edge cases where the binary version is already running but older than schema
// (e.g., if Step 1b was added after an earlier rollback already took effect).
cmp, err := util.CompareExtensionVersions(defaultVersion, installedVersion)
if err != nil {
logger.Error(err, "Failed to compare extension versions, skipping ALTER EXTENSION as a safety measure",
Expand All @@ -976,27 +983,39 @@ func (r *DocumentDBReconciler) upgradeDocumentDBIfNeeded(ctx context.Context, cu
return nil
}

// Step 6: Run ALTER EXTENSION to upgrade (cmp > 0: defaultVersion > installedVersion)
// Step 6: Determine schema target based on spec.schemaVersion (two-phase upgrade logic)
//
// Three modes:
// - "" (empty): Two-phase mode — do NOT auto-update schema. User must explicitly
// set spec.schemaVersion to finalize. This provides a rollback-safe window.
// - "auto": Auto-finalize — update schema to match binary version automatically.
// - "<version>": Explicit pin — update schema to exactly this version.
schemaTarget, updateSQL := r.determineSchemaTarget(ctx, documentdb, defaultVersion, installedVersion)
if schemaTarget == "" {
// Two-phase mode or validation failure — do not run ALTER EXTENSION
return nil
}

// Step 7: Run ALTER EXTENSION to upgrade
logger.Info("Upgrading DocumentDB extension",
"fromVersion", installedVersion,
"toVersion", defaultVersion)
"toVersion", schemaTarget)

updateSQL := "ALTER EXTENSION documentdb UPDATE"
if _, err := r.SQLExecutor(ctx, currentCluster, updateSQL); err != nil {
return fmt.Errorf("failed to run ALTER EXTENSION documentdb UPDATE: %w", err)
}

logger.Info("Successfully upgraded DocumentDB extension",
"fromVersion", installedVersion,
"toVersion", defaultVersion)
"toVersion", schemaTarget)

// Step 7: Update DocumentDB schema version in status after upgrade
// Step 8: Update DocumentDB schema version in status after upgrade
// Re-fetch to get latest resourceVersion before status update
if err := r.Get(ctx, types.NamespacedName{Name: documentdb.Name, Namespace: documentdb.Namespace}, documentdb); err != nil {
return fmt.Errorf("failed to refetch DocumentDB after schema upgrade: %w", err)
}
// Convert from pg_available_extensions format ("0.110-0") to semver ("0.110.0")
documentdb.Status.SchemaVersion = util.ExtensionVersionToSemver(defaultVersion)
documentdb.Status.SchemaVersion = util.ExtensionVersionToSemver(schemaTarget)
if err := r.Status().Update(ctx, documentdb); err != nil {
logger.Error(err, "Failed to update DocumentDB status after schema upgrade")
return fmt.Errorf("failed to update DocumentDB status after schema upgrade: %w", err)
Expand All @@ -1005,6 +1024,80 @@ func (r *DocumentDBReconciler) upgradeDocumentDBIfNeeded(ctx context.Context, cu
return nil
}

// determineSchemaTarget decides the target schema version based on spec.schemaVersion.
// Returns the target version (in pg_available_extensions format) and the SQL to execute.
// Returns ("", "") if no schema update should run (two-phase mode or validation failure).
func (r *DocumentDBReconciler) determineSchemaTarget(
ctx context.Context,
documentdb *dbpreview.DocumentDB,
binaryVersion string,
installedVersion string,
) (string, string) {
logger := log.FromContext(ctx)
specSchemaVersion := documentdb.Spec.SchemaVersion

switch {
case specSchemaVersion == "":
// Two-phase mode: schema stays at current version until user explicitly sets schemaVersion
logger.Info("Schema update available but not requested (two-phase mode). "+
"Set spec.schemaVersion to finalize the upgrade.",
"binaryVersion", binaryVersion,
"installedVersion", installedVersion)
if r.Recorder != nil {
msg := fmt.Sprintf(
"Schema update available: binary version is %s, schema is at %s. "+
"Set spec.schemaVersion to %q or \"auto\" to finalize the upgrade.",
util.ExtensionVersionToSemver(binaryVersion),
util.ExtensionVersionToSemver(installedVersion),
util.ExtensionVersionToSemver(binaryVersion))
r.Recorder.Event(documentdb, corev1.EventTypeNormal, "SchemaUpdateAvailable", msg)
}
return "", ""

case specSchemaVersion == "auto":
// Auto-finalize: update schema to match binary version
return binaryVersion, "ALTER EXTENSION documentdb UPDATE"

default:
// Explicit version: update to that specific version.
// Note: schemaVersion <= binary version is enforced by the validating webhook at
// admission time. This is a defense-in-depth guard.
targetPgVersion := util.SemverToExtensionVersion(specSchemaVersion)

// Guard: target must not exceed binary version
targetCmp, err := util.CompareExtensionVersions(targetPgVersion, binaryVersion)
if err != nil {
logger.Error(err, "Failed to compare target schema version with binary version",
"targetVersion", specSchemaVersion,
"binaryVersion", binaryVersion)
return "", ""
}
if targetCmp > 0 {
logger.Info("Skipping schema update: schemaVersion exceeds binary version (should have been rejected by webhook)",
"schemaVersion", specSchemaVersion,
"binaryVersion", util.ExtensionVersionToSemver(binaryVersion))
return "", ""
}

// Check: target must be > installed version (otherwise no-op)
installedCmp, err := util.CompareExtensionVersions(targetPgVersion, installedVersion)
if err != nil {
logger.Error(err, "Failed to compare target schema version with installed version",
"targetVersion", specSchemaVersion,
"installedVersion", installedVersion)
return "", ""
}
if installedCmp <= 0 {
logger.V(1).Info("Schema already at or beyond requested version",
"requestedVersion", specSchemaVersion,
"installedVersion", installedVersion)
return "", ""
}

return targetPgVersion, fmt.Sprintf("ALTER EXTENSION documentdb UPDATE TO '%s'", targetPgVersion)
}
}

// buildImagePatchOps compares the current and desired CNPG cluster specs and returns
// JSON Patch operations for any image differences (extension image settings and/or gateway image).
// This is a pure function with no API calls. Returns:
Expand Down
Loading
Loading