Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const (
serverBootConfigControllerPxe = "serverbootconfigpxe"
httpBootConfigController = "httpbootconfig"
serverBootConfigControllerHttp = "serverbootconfighttp"
serverBootConfigReadiness = "serverbootconfigreadiness"
)

func init() {
Expand Down Expand Up @@ -108,6 +109,7 @@ func main() {
serverBootConfigControllerPxe,
serverBootConfigControllerHttp,
httpBootConfigController,
serverBootConfigReadiness,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Don't make readiness independently disable-able.

ServerBootConfigurationReadinessReconciler is now the only writer of ServerBootConfiguration.Status.State. Any deployment that passes an explicit --controllers= allow-list and doesn't include this new switch will keep running the HTTP/PXE reconcilers, but Status.State will stop changing entirely. Please auto-enable readiness whenever either mode controller is enabled, or fail fast on that invalid combination.

Also applies to: 289-299

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cmd/main.go` at line 112, The readiness controller (serverBootConfigReadiness
/ ServerBootConfigurationReadinessReconciler) must not be independently
disable-able because it's the sole writer of
ServerBootConfiguration.Status.State; update cmd/main.go so that when either
mode controller (HTTP or PXE reconcilers) is enabled the readiness controller is
automatically enabled, or else validate the --controllers allow-list at startup
and exit with an error if a mode controller is enabled while
serverBootConfigReadiness is disabled; modify the controller initialization or
add a startup check that references serverBootConfigReadiness and the mode
controller flags and either force-enable readiness or fail fast on the invalid
combination.

)

flag.Var(controllers, "controllers",
Expand Down Expand Up @@ -284,6 +286,18 @@ func main() {
}
}

if controllers.Enabled(serverBootConfigReadiness) {
if err = (&controller.ServerBootConfigurationReadinessReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
RequireHTTPBoot: controllers.Enabled(serverBootConfigControllerHttp),
RequireIPXEBoot: controllers.Enabled(serverBootConfigControllerPxe),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ServerBootConfigReadiness")
os.Exit(1)
}
}

//+kubebuilder:scaffold:builder

if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
Expand Down
23 changes: 23 additions & 0 deletions internal/controller/serverbootconfig_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,26 @@ func PatchServerBootConfigWithError(

return c.Status().Patch(ctx, &cur, client.MergeFrom(base))
}

// PatchServerBootConfigCondition patches a single condition on the ServerBootConfiguration status.
// Callers should only set condition types they own.
func PatchServerBootConfigCondition(
ctx context.Context,
c client.Client,
namespacedName types.NamespacedName,
condition metav1.Condition,
) error {
var cur metalv1alpha1.ServerBootConfiguration
if fetchErr := c.Get(ctx, namespacedName, &cur); fetchErr != nil {
return fmt.Errorf("failed to fetch ServerBootConfiguration: %w", fetchErr)
}
base := cur.DeepCopy()

// Default to current generation if caller didn't set it.
if condition.ObservedGeneration == 0 {
condition.ObservedGeneration = cur.Generation
}
apimeta.SetStatusCondition(&cur.Status.Conditions, condition)

return c.Status().Patch(ctx, &cur, client.MergeFrom(base))
Comment on lines +251 to +269
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate the HTTP and PXE controller files mentioned in the review
find . -type f -name "*http_controller.go" -o -name "*pxe_controller.go" | head -20

Repository: ironcore-dev/boot-operator

Length of output: 198


🏁 Script executed:

# Search for the PatchServerBootConfigCondition usage
rg "PatchServerBootConfigCondition" -A 3 -B 3

Repository: ironcore-dev/boot-operator

Length of output: 2368


🏁 Script executed:

# Look for condition-related patches in the reconcilers
rg "SetStatusCondition|apimeta" -A 2 -B 2 --type go

Repository: ironcore-dev/boot-operator

Length of output: 7688


🏁 Script executed:

# Check if there are other direct status patch calls
rg "Status\(\)\.Patch" -A 3 -B 3 --type go

Repository: ironcore-dev/boot-operator

Length of output: 5060


🏁 Script executed:

# Read the HTTP controller helper method that has the open-coded patch
rg "apimeta.SetStatusCondition.*Status.Conditions" -B 20 "internal/controller/serverbootconfiguration_http_controller.go" | head -100

Repository: ironcore-dev/boot-operator

Length of output: 776


🏁 Script executed:

# Read the PXE controller helper method that has the open-coded patch  
rg "apimeta.SetStatusCondition.*Status.Conditions" -B 20 "internal/controller/serverbootconfiguration_pxe_controller.go" | head -100

Repository: ironcore-dev/boot-operator

Length of output: 760


🏁 Script executed:

# Get the complete HTTP controller helper function to see the full patch logic
sed -n '/^func.*HTTP.*Condition/,/^}/p' internal/controller/serverbootconfiguration_http_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 1250


🏁 Script executed:

# Get the complete PXE controller helper function to see the full patch logic
sed -n '/^func.*PXE.*Condition/,/^}/p' internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 1242


🏁 Script executed:

# Check the structure of ServerBootConfiguration to understand if multiple conditions can exist
rg "ServerBootConfiguration" api/ --type go -A 5 | grep -A 30 "type ServerBootConfiguration"

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Find the API definition files
find . -type f -name "*.go" -path "*/api/*" | grep -i serverbootconfig

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Search for ServerBootConfiguration type definition
rg "type ServerBootConfiguration struct" -A 20 --type go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check how conditions are defined in the status
rg "Status.*Conditions" api/ --type go -B 2 -A 2

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Find all Go files that might contain the API definition
find . -type f -name "*.go" | xargs grep -l "ServerBootConfiguration" | head -10

Repository: ironcore-dev/boot-operator

Length of output: 525


🏁 Script executed:

# Search for the type definition more broadly
rg "type ServerBootConfiguration" --type go

Repository: ironcore-dev/boot-operator

Length of output: 422


🏁 Script executed:

# Look for the api directory structure
find . -type d -name "api"

Repository: ironcore-dev/boot-operator

Length of output: 75


🏁 Script executed:

# List files in the api directory
find ./api -type f -name "*.go"

Repository: ironcore-dev/boot-operator

Length of output: 273


🏁 Script executed:

# Search for ServerBootConfiguration struct definition in api directory
rg "type ServerBootConfiguration" ./api --type go -A 30

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Search for Conditions field in Status
rg "Conditions.*\[\]" ./api --type go -B 3 -A 3

Repository: ironcore-dev/boot-operator

Length of output: 1094


🏁 Script executed:

# Search for ServerBootConfiguration across all Go files
rg "type ServerBootConfiguration " --type go -A 15

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check the imports in the controller files to see where ServerBootConfiguration comes from
rg "import.*metalv1alpha1" --type go -A 20 | head -40

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Look for metalv1alpha1 package location
find . -type f -name "*.go" -path "*metalv1alpha1*" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check imports in the controller file to see where metalv1alpha1 comes from
head -50 internal/controller/serverbootconfig_helpers.go | grep -A 5 -B 5 "metalv1alpha1"

Repository: ironcore-dev/boot-operator

Length of output: 450


🏁 Script executed:

# Search for go.mod to understand the dependency
cat go.mod | grep -i metal

Repository: ironcore-dev/boot-operator

Length of output: 182


🏁 Script executed:

# Check what the review comment is actually about - verify the merge-patch behavior
# Look for documentation or comments about client.MergeFrom and slice handling
rg "MergeFrom|merge.*patch" --type go -B 2 -A 2 | head -50

Repository: ironcore-dev/boot-operator

Length of output: 3584


🏁 Script executed:

# Check how many reconcilers can be writing to ServerBootConfiguration simultaneously
rg "ServerBootConfigurationHTTPReconciler|ServerBootConfigurationPXEReconciler|ServerBootConfigurationReadinessReconciler" --type go -B 5 | grep -i "reconcile\|Reconcile"

Repository: ironcore-dev/boot-operator

Length of output: 6997


🏁 Script executed:

# Verify the actual concurrent behavior - check if HTTP and PXE reconcilers run in parallel
rg "Reconcile" internal/controller/serverbootconfiguration_http_controller.go -A 1
rg "Reconcile" internal/controller/serverbootconfiguration_pxe_controller.go -A 1

Repository: ironcore-dev/boot-operator

Length of output: 5047


🏁 Script executed:

# Look for the main setup to see if both reconcilers are registered
rg "SetupWithManager" internal/controller/ --type go -l

Repository: ironcore-dev/boot-operator

Length of output: 394


🏁 Script executed:

# Check SetupWithManager to see if both controllers watch the same resource
sed -n '/ServerBootConfigurationHTTPReconciler.*SetupWithManager/,/^}/p' internal/controller/serverbootconfiguration_http_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 435


🏁 Script executed:

# Check SetupWithManager for PXE reconciler
sed -n '/ServerBootConfigurationPXEReconciler.*SetupWithManager/,/^}/p' internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 423


🏁 Script executed:

# Search for the actual CRD type definition in metal-operator dependency
# Check if there's a deepcopy file that might show merge strategies
rg "ServerBootConfiguration" --type go | grep -i "deepcopy\|merge" | head -20

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Look at actual patch call patterns - verify the concern is valid
# Check if there are any tests that show concurrent condition writes
rg "patchHTTPBootReadyConditionFromHTTPState\|patchIPXEBootReadyConditionFromIPXEState" -A 5 -B 5 --type go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Look more carefully at the actual reconcile methods to see if there are any other direct Status().Patch calls
rg "Status\(\)\.Patch" internal/controller/serverbootconfiguration_http_controller.go -B 10 -A 2

Repository: ironcore-dev/boot-operator

Length of output: 468


🏁 Script executed:

# Same for PXE controller
rg "Status\(\)\.Patch" internal/controller/serverbootconfiguration_pxe_controller.go -B 10 -A 2

Repository: ironcore-dev/boot-operator

Length of output: 468


🏁 Script executed:

# Verify the merge-patch behavior with Kubernetes documentation
# Check if there are comments or tests that explain the expected behavior
rg "merge.*patch\|MergeFrom" internal/controller/ --type go -B 3 -A 3 | grep -i "slice\|condition\|race\|concurrent"

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check the readiness controller to see if it also patches conditions
rg "Status\(\)\.Patch" internal/controller/serverbootconfiguration_readiness_controller.go -B 10 -A 5

Repository: ironcore-dev/boot-operator

Length of output: 400


🏁 Script executed:

# Let me check if there's any usage of resourceVersion or optimistic locking in the codebase
rg "resourceVersion\|ResourceVersion" internal/controller/ --type go | head -20

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check if there are tests that verify concurrent condition writes
find . -name "*test.go" -type f | xargs grep -l "condition" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 208


🏁 Script executed:

# Read a test file to see if there's any test coverage for concurrent patches
head -100 internal/controller/serverbootconfig_helpers_test.go

Repository: ironcore-dev/boot-operator

Length of output: 3290


🏁 Script executed:

# Verify the exact concern: check the helper function test
sed -n '/TestPatchServerBootConfigCondition/,/^}/p' internal/controller/serverbootconfig_helpers_test.go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# One final verification - check if there's any test that shows the race condition
cat internal/controller/serverbootconfig_helpers_test.go | grep -A 30 "PatchServerBootConfigCondition"

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check if the readiness controller also writes conditions (it only patches State, not Conditions)
grep -A 20 "func.*Reconcile" internal/controller/serverbootconfiguration_readiness_controller.go | head -30

Repository: ironcore-dev/boot-operator

Length of output: 1000


🏁 Script executed:

# Verify the final state - confirm both reconcilers are independent and run concurrently
grep -A 3 "SetupWithManager\|For\(" internal/controller/serverbootconfiguration_http_controller.go internal/controller/serverbootconfiguration_pxe_controller.go | grep -E "For|Owns"

Repository: ironcore-dev/boot-operator

Length of output: 93


🏁 Script executed:

# Simpler verification - check what resource both controllers watch
grep "For(" internal/controller/serverbootconfiguration_http_controller.go internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 292


🏁 Script executed:

# Now let's understand the actual merge-patch issue by checking Kubernetes documentation
# Let's see if there are any comments about merge semantics in the code
rg "merge\|Merge" internal/controller/serverbootconfig_helpers.go -B 3 -A 3

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check if both HTTP and PXE patch functions are called during the same reconciliation or in separate ones
grep -n "patchHTTPBootReadyConditionFromHTTPState\|patchIPXEBootReadyConditionFromIPXEState" internal/controller/serverbootconfiguration_http_controller.go internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 933


🏁 Script executed:

# Let me check the PR context - what does the review mention about "dual-mode lost-update race"
# Look for any existing issues or comments in the code
rg "dual-mode\|race\|concurrent\|lost.*update" --type go | head -20

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Check the exact patch operations - verify the concern is about merging slices
# Read the helper function one more time to confirm the issue
sed -n '251,269p' internal/controller/serverbootconfig_helpers.go

Repository: ironcore-dev/boot-operator

Length of output: 717


🏁 Script executed:

# Verify: are there any retry mechanisms or conflict handling?
rg "conflict\|Conflict\|retry\|Retry" internal/controller/ --type go | head -10

Repository: ironcore-dev/boot-operator

Length of output: 52


Make condition patches conflict-safe.

client.MergeFrom(base) replaces the entire status.conditions slice rather than merging individual condition entries. Because ServerBootConfigurationHTTPReconciler and ServerBootConfigurationPXEReconciler both watch and reconcile the same ServerBootConfiguration resource independently and in parallel, concurrent writes will silently overwrite each other's conditions instead of detecting a conflict—reintroducing the dual-mode lost-update race. Switch to a conflict-aware strategy: use strategic merge patch (which understands condition list semantics), server-side apply (SSA), or optimistic locking with resourceVersion checks. Apply this fix to the helper function PatchServerBootConfigCondition (lines 251-269) and to the open-coded condition patches in patchHTTPBootReadyConditionFromHTTPState and patchIPXEBootReadyConditionFromIPXEState.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controller/serverbootconfig_helpers.go` around lines 251 - 269,
PatchServerBootConfigCondition currently does a single MergeFrom status patch
which can silently overwrite concurrent condition updates; change it to an
optimistic retry-on-conflict loop: fetch the latest ServerBootConfiguration, set
condition.ObservedGeneration if zero, call apimeta.SetStatusCondition on the
fetched object's Status.Conditions, attempt c.Status().Patch(ctx, &obj,
client.MergeFrom(base)), and if you get a conflict (apierrors.IsConflict)
re-fetch and retry a few times before failing. Apply the same conflict-retry
pattern to the two open-coded condition updates in
patchHTTPBootReadyConditionFromHTTPState and
patchIPXEBootReadyConditionFromIPXEState so each updater merges the latest
conditions and retries on conflict instead of silently overwriting.

}
44 changes: 29 additions & 15 deletions internal/controller/serverbootconfiguration_http_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,16 @@ func (r *ServerBootConfigurationHTTPReconciler) reconcile(ctx context.Context, l
ukiURL, err := r.constructUKIURL(ctx, config.Spec.Image)
if err != nil {
log.Error(err, "Failed to construct UKI URL")
if patchErr := PatchServerBootConfigWithError(ctx, r.Client,
types.NamespacedName{Name: config.Name, Namespace: config.Namespace}, err); patchErr != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch state to error: %w (original error: %w)", patchErr, err)
if patchErr := PatchServerBootConfigCondition(ctx, r.Client,
types.NamespacedName{Name: config.Name, Namespace: config.Namespace},
metav1.Condition{
Type: HTTPBootReadyConditionType,
Status: metav1.ConditionFalse,
Reason: "UKIURLConstructionFailed",
Message: err.Error(),
ObservedGeneration: config.Generation,
}); patchErr != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch %s condition: %w (original error: %w)", HTTPBootReadyConditionType, patchErr, err)
}
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -134,36 +141,43 @@ func (r *ServerBootConfigurationHTTPReconciler) reconcile(ctx context.Context, l
return ctrl.Result{}, fmt.Errorf("failed to get HTTPBoot config: %w", err)
}

if err := r.patchConfigStateFromHTTPState(ctx, httpBootConfig, config); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch server boot config state to %s: %w", httpBootConfig.Status.State, err)
if err := r.patchHTTPBootReadyConditionFromHTTPState(ctx, httpBootConfig, config); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch %s condition from HTTPBootConfig state %s: %w", HTTPBootReadyConditionType, httpBootConfig.Status.State, err)
}
log.V(1).Info("Patched server boot config state")
log.V(1).Info("Patched server boot config condition", "condition", HTTPBootReadyConditionType)
Comment on lines +144 to +147
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, locate and examine the serverbootconfiguration_http_controller.go file
fd -t f "serverbootconfiguration_http_controller.go" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 132


🏁 Script executed:

# Get the full file to understand the context
cat -n internal/controller/serverbootconfiguration_http_controller.go | head -200

Repository: ironcore-dev/boot-operator

Length of output: 9707


🏁 Script executed:

# Focus on the critical sections mentioned in the review
sed -n '85,105p' internal/controller/serverbootconfiguration_http_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 1014


🏁 Script executed:

# Check lines around 135-181 (child apply and status handling)
sed -n '130,185p' internal/controller/serverbootconfiguration_http_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 2617


🏁 Script executed:

# Find the HTTPBootConfig API definition
fd -t f ".*http.*boot.*" | grep -i "api\|types" | head -10

Repository: ironcore-dev/boot-operator

Length of output: 106


🏁 Script executed:

# Search for HTTPBootConfig definition
rg "type HTTPBootConfig" -A 20

Repository: ironcore-dev/boot-operator

Length of output: 6726


🏁 Script executed:

# Look for the status structure
rg "HTTPBootConfigStatus" -A 15

Repository: ironcore-dev/boot-operator

Length of output: 10881


🏁 Script executed:

# Examine the HTTPBootConfig controller reconciliation
cat -n internal/controller/httpbootconfig_controller.go | head -100

Repository: ironcore-dev/boot-operator

Length of output: 4489


🏁 Script executed:

# Look for where the HTTPBootConfig controller sets status
rg "Status\.State\|SetStatusCondition\|ObservedGeneration" internal/controller/httpbootconfig_controller.go -B 2 -A 2

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Continue reading the patchStatus method
sed -n '97,120p' internal/controller/httpbootconfig_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 885


🏁 Script executed:

# Check if there's any ObservedGeneration tracking at all in the HTTPBootConfig controller
rg "Generation" internal/controller/httpbootconfig_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Look at the full patchStatus implementation
sed -n '97,115p' internal/controller/httpbootconfig_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 728


Don't reuse old HTTPBootConfig status for the current SBC generation.

This path executes right after applying the child spec, so HTTPBootConfig.Status.State may still reflect the previous generation's status. The parent condition is then stamped with the current ServerBootConfiguration.Generation, making stale Ready/Error data appear current after a spec change. Until the HTTPBootConfig controller has reconciled the new spec, write HTTPBootReady=Unknown instead.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controller/serverbootconfiguration_http_controller.go` around lines
144 - 147, The code currently uses HTTPBootConfig.Status.State directly when
patching the parent condition, which can be stale if the child HTTPBootConfig
hasn't reconciled the new generation; update the logic so that when calling or
inside patchHTTPBootReadyConditionFromHTTPState you compare
httpBootConfig.ObjectMeta.Generation (or httpBootConfig.Generation) with the
parent ServerBootConfiguration.Generation (config.Generation) and treat the
child's state as Unknown when they differ. Concretely: modify
patchHTTPBootReadyConditionFromHTTPState (or its call) to detect generation
mismatch and map any non-current-child state to metav1.ConditionUnknown (or the
equivalent Unknown/Unknown constant) before creating/updating the
HTTPBootReadyCondition (HTTPBootReadyConditionType), ensuring the parent
condition is set to Unknown until the child reconciles.


log.V(1).Info("Reconciled ServerBootConfiguration")
return ctrl.Result{}, nil
}

func (r *ServerBootConfigurationHTTPReconciler) patchConfigStateFromHTTPState(ctx context.Context, httpBootConfig *bootv1alpha1.HTTPBootConfig, cfg *metalv1alpha1.ServerBootConfiguration) error {
func (r *ServerBootConfigurationHTTPReconciler) patchHTTPBootReadyConditionFromHTTPState(ctx context.Context, httpBootConfig *bootv1alpha1.HTTPBootConfig, cfg *metalv1alpha1.ServerBootConfiguration) error {
key := types.NamespacedName{Name: cfg.Name, Namespace: cfg.Namespace}
var cur metalv1alpha1.ServerBootConfiguration
if err := r.Get(ctx, key, &cur); err != nil {
return err
}
base := cur.DeepCopy()

cond := metav1.Condition{
Type: HTTPBootReadyConditionType,
ObservedGeneration: cur.Generation,
}
switch httpBootConfig.Status.State {
case bootv1alpha1.HTTPBootConfigStateReady:
cur.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
// Remove ImageValidation condition when transitioning to Ready
apimeta.RemoveStatusCondition(&cur.Status.Conditions, "ImageValidation")
cond.Status = metav1.ConditionTrue
cond.Reason = "BootConfigReady"
cond.Message = "HTTP boot configuration is ready."
case bootv1alpha1.HTTPBootConfigStateError:
cur.Status.State = metalv1alpha1.ServerBootConfigurationStateError
}

for _, c := range httpBootConfig.Status.Conditions {
apimeta.SetStatusCondition(&cur.Status.Conditions, c)
cond.Status = metav1.ConditionFalse
cond.Reason = "BootConfigError"
cond.Message = "HTTPBootConfig reported an error."
default:
cond.Status = metav1.ConditionUnknown
cond.Reason = "BootConfigPending"
cond.Message = "Waiting for HTTPBootConfig to become Ready."
}

apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
}

Expand Down
53 changes: 36 additions & 17 deletions internal/controller/serverbootconfiguration_pxe_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,16 @@ func (r *ServerBootConfigurationPXEReconciler) reconcile(ctx context.Context, lo

kernelURL, initrdURL, squashFSURL, err := r.getImageDetailsFromConfig(ctx, bootConfig)
if err != nil {
if patchErr := PatchServerBootConfigWithError(ctx, r.Client,
types.NamespacedName{Name: bootConfig.Name, Namespace: bootConfig.Namespace}, err); patchErr != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch server boot config state: %w (original error: %w)", patchErr, err)
if patchErr := PatchServerBootConfigCondition(ctx, r.Client,
types.NamespacedName{Name: bootConfig.Name, Namespace: bootConfig.Namespace},
metav1.Condition{
Type: IPXEBootReadyConditionType,
Status: metav1.ConditionFalse,
Reason: "ImageDetailsFailed",
Message: err.Error(),
ObservedGeneration: bootConfig.Generation,
}); patchErr != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch %s condition: %w (original error: %w)", IPXEBootReadyConditionType, patchErr, err)
}
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -153,32 +160,44 @@ func (r *ServerBootConfigurationPXEReconciler) reconcile(ctx context.Context, lo
return ctrl.Result{}, fmt.Errorf("failed to get IPXE config: %w", err)
}

if err := r.patchConfigStateFromIPXEState(ctx, config, bootConfig); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch server boot config state to %s: %w", config.Status.State, err)
if err := r.patchIPXEBootReadyConditionFromIPXEState(ctx, config, bootConfig); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch %s condition from IPXEBootConfig state %s: %w", IPXEBootReadyConditionType, config.Status.State, err)
}
log.V(1).Info("Patched server boot config state")
log.V(1).Info("Patched server boot config condition", "condition", IPXEBootReadyConditionType)
Comment on lines +163 to +166
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let me examine the file to understand the full context
cat -n internal/controller/serverbootconfiguration_pxe_controller.go | head -250

Repository: ironcore-dev/boot-operator

Length of output: 12084


🏁 Script executed:

# Let me see more of the implementation, particularly around line 159 onwards
sed -n '154,201p' internal/controller/serverbootconfiguration_pxe_controller.go | cat -n

Repository: ironcore-dev/boot-operator

Length of output: 2474


🏁 Script executed:

# Let me check if IPXEBootConfig has an ObservedGeneration field in its status
fd -t f ".*\.go$" . | xargs grep -l "IPXEBootConfig" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 220


🏁 Script executed:

# Check the IPXEBootConfig type definition
cat -n ./api/v1alpha1/ipxebootconfig_types.go

Repository: ironcore-dev/boot-operator

Length of output: 4420


🏁 Script executed:

# Now let me check what the ServerBootConfiguration looks like to understand ObservedGeneration usage
fd -t f "serverbootconfiguration.*types.go" . | head -1 | xargs cat | head -150

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# ServerBootConfiguration is from metal-operator, let me look for how ObservedGeneration is used in this controller
# Let's check the entire patchIPXEBootReadyConditionFromIPXEState function and surrounding context
sed -n '172,201p' internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 1242


🏁 Script executed:

# Let me check if there's a controller for IPXEBootConfig that would update its status
fd -t f ".*controller.*\.go$" . | xargs grep -l "IPXEBootConfig" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 309


🏁 Script executed:

# Check the IPXEBootConfig controller to see when Status.State is updated
cat -n ./internal/controller/ipxebootconfig_controller.go | head -300

Repository: ironcore-dev/boot-operator

Length of output: 6352


🏁 Script executed:

# Let me understand the timing: after Apply, does Get() fetch the newly applied object?
# Let me check if there's any test that shows the expected behavior
grep -n "Patch\|Get" internal/controller/serverbootconfiguration_pxe_controller_test.go | head -30

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Let me check the IPXEBootConfig Status more carefully for any ObservedGeneration tracking
grep -A 20 "type IPXEBootConfigStatus" api/v1alpha1/ipxebootconfig_types.go

Repository: ironcore-dev/boot-operator

Length of output: 907


🏁 Script executed:

# Let me trace the full flow: when is IPXEBootConfigReconciler's reconcile() triggered?
# And does it run synchronously with the Apply, or asynchronously?
# Let me check how the controller-runtime Patch() works with Apply

# First, let's see if there's any documentation or comments about the Apply semantics
grep -B 5 -A 5 "RawPatch.*ApplyPatchType" internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 607


🏁 Script executed:

# Let me check when IPXEBootConfig status gets updated relative to the Apply
# Look for where State is set in the IPXEBootConfig controller

grep -n "Status.State\|patchStatus" internal/controller/ipxebootconfig_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 286


🏁 Script executed:

# Let me understand the reconciliation trigger model - when IPXEBootConfig is applied, 
# does the IPXEBootConfigReconciler get queued immediately?
# Let's see if there are any watches or event handlers

grep -A 10 "SetupWithManager" internal/controller/serverbootconfiguration_pxe_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 484


🏁 Script executed:

# Let me check if there's any mechanism to track whether IPXEBootConfig has processed the new spec
# Check if IPXEBootConfig status has any generation-related fields or version tracking

grep -n "Generation\|ObservedGeneration" api/v1alpha1/ipxebootconfig_types.go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Let me verify the exact flow: Get the IPXEBootConfig after Apply but check what state it has
# Let me also understand: does the IPXEBootConfig have a Generation field (it should, as all k8s objects do)?

grep -n "Generation" api/v1alpha1/ipxebootconfig_types.go

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Let me check if IPXEBootConfig has an ObservedGeneration in its status section
# by looking at the full type definition

cat api/v1alpha1/ipxebootconfig_types.go

Repository: ironcore-dev/boot-operator

Length of output: 3776


Don't stamp the current SBC generation onto stale IPXEBootConfig status.

This runs immediately after applying the child spec (line 154), so IPXEBootConfig.Status.State at line 184 can still reflect the pending state from before the Apply. The IPXEBootConfigReconciler runs asynchronously in a separate reconciliation loop, so by the time you read the status, it may not have observed the new spec yet. Writing that stale status back with ObservedGeneration: cur.Generation makes an old Ready/Error/Pending state falsely appear current for the new ServerBootConfiguration generation. Only promote Ready/Error once the child has reconciled the new spec; otherwise keep IPXEBootReady in an unknown/pending state.

Since IPXEBootConfig.Status lacks an ObservedGeneration field, consider adding one to track whether the child has observed the current spec generation, or skip this condition update until a subsequent reconciliation pass when the child has had time to process the new spec.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controller/serverbootconfiguration_pxe_controller.go` around lines
163 - 166, The code patches IPXEBootReadyCondition from the child IPXEBootConfig
immediately after Apply, but may read a stale IPXEBootConfig.Status.State and
incorrectly stamp ObservedGeneration for the new ServerBootConfiguration; update
patchIPXEBootReadyConditionFromIPXEState (and its call site) to avoid promoting
Ready/Error from a stale child status by either (A) adding an ObservedGeneration
field to IPXEBootConfig.Status and only applying the child's Ready/Error when
IPXEBootConfig.Status.ObservedGeneration == ipxe.Spec.Generation (or otherwise
indicates the child has observed the new spec), or (B) skip setting
ObservedGeneration/Ready/Error in ServerBootConfiguration until a subsequent
reconcile when the child status shows it has processed the spec; reference the
functions/methods patchIPXEBootReadyConditionFromIPXEState,
IPXEBootConfig.Status and the ServerBootConfiguration reconciliation path where
Apply is called to locate and implement the change.


log.V(1).Info("Reconciled ServerBootConfiguration")
return ctrl.Result{}, nil
}

func (r *ServerBootConfigurationPXEReconciler) patchConfigStateFromIPXEState(ctx context.Context, config *v1alpha1.IPXEBootConfig, bootConfig *metalv1alpha1.ServerBootConfiguration) error {
bootConfigBase := bootConfig.DeepCopy()
func (r *ServerBootConfigurationPXEReconciler) patchIPXEBootReadyConditionFromIPXEState(ctx context.Context, config *v1alpha1.IPXEBootConfig, bootConfig *metalv1alpha1.ServerBootConfiguration) error {
key := types.NamespacedName{Name: bootConfig.Name, Namespace: bootConfig.Namespace}
var cur metalv1alpha1.ServerBootConfiguration
if err := r.Get(ctx, key, &cur); err != nil {
return err
}
base := cur.DeepCopy()

cond := metav1.Condition{
Type: IPXEBootReadyConditionType,
ObservedGeneration: cur.Generation,
}
switch config.Status.State {
case v1alpha1.IPXEBootConfigStateReady:
bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady
// Remove ImageValidation condition when transitioning to Ready
apimeta.RemoveStatusCondition(&bootConfig.Status.Conditions, "ImageValidation")
cond.Status = metav1.ConditionTrue
cond.Reason = "BootConfigReady"
cond.Message = "IPXE boot configuration is ready."
case v1alpha1.IPXEBootConfigStateError:
bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateError
}

for _, c := range config.Status.Conditions {
apimeta.SetStatusCondition(&bootConfig.Status.Conditions, c)
cond.Status = metav1.ConditionFalse
cond.Reason = "BootConfigError"
cond.Message = "IPXEBootConfig reported an error."
default:
cond.Status = metav1.ConditionUnknown
cond.Reason = "BootConfigPending"
cond.Message = "Waiting for IPXEBootConfig to become Ready."
}

return r.Status().Patch(ctx, bootConfig, client.MergeFrom(bootConfigBase))
apimeta.SetStatusCondition(&cur.Status.Conditions, cond)
return r.Status().Patch(ctx, &cur, client.MergeFrom(base))
}

func (r *ServerBootConfigurationPXEReconciler) getSystemUUIDFromBootConfig(ctx context.Context, config *metalv1alpha1.ServerBootConfiguration) (string, error) {
Expand Down
103 changes: 103 additions & 0 deletions internal/controller/serverbootconfiguration_readiness_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// SPDX-FileCopyrightText: 2026 SAP SE or an SAP affiliate company and IronCore contributors
// SPDX-License-Identifier: Apache-2.0

package controller

import (
"context"

apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"

metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
)

const (
// Condition types written by the mode-specific converters.
HTTPBootReadyConditionType = "HTTPBootReady"
IPXEBootReadyConditionType = "IPXEBootReady"
)

// ServerBootConfigurationReadinessReconciler aggregates mode-specific readiness conditions and is the
// single writer of ServerBootConfiguration.Status.State.
type ServerBootConfigurationReadinessReconciler struct {
client.Client
Scheme *runtime.Scheme

// RequireHTTPBoot/RequireIPXEBoot are derived from boot-operator CLI controller enablement.
// There is currently no per-SBC spec hint for which boot modes should be considered active.
RequireHTTPBoot bool
RequireIPXEBoot bool
}

//+kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverbootconfigurations,verbs=get;list;watch
//+kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverbootconfigurations/status,verbs=get;update;patch

func (r *ServerBootConfigurationReadinessReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
cfg := &metalv1alpha1.ServerBootConfiguration{}
if err := r.Get(ctx, req.NamespacedName, cfg); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}

// If no boot modes are required (because their converters are disabled), do not mutate status.
if !r.RequireHTTPBoot && !r.RequireIPXEBoot {
return ctrl.Result{}, nil
}

desired := metalv1alpha1.ServerBootConfigurationStatePending

allReady := true
hasError := false

if r.RequireHTTPBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, HTTPBootReadyConditionType)
switch {
case c == nil:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
}

if r.RequireIPXEBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, IPXEBootReadyConditionType)
switch {
case c == nil:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
Comment on lines +55 to +76
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n internal/controller/serverbootconfiguration_readiness_controller.go

Repository: ironcore-dev/boot-operator

Length of output: 3972


🏁 Script executed:

# Find the ServerBootConfiguration type definition
find . -name "*.go" -type f | xargs grep -l "type ServerBootConfiguration struct" | head -5

Repository: ironcore-dev/boot-operator

Length of output: 52


🏁 Script executed:

# Search for HTTPBootReadyConditionType and IPXEBootReadyConditionType usage
rg "HTTPBootReadyConditionType|IPXEBootReadyConditionType" -A 3 -B 3

Repository: ironcore-dev/boot-operator

Length of output: 8224


🏁 Script executed:

# Search for other uses of FindStatusCondition to see if there's a pattern
rg "FindStatusCondition" -B 2 -A 5

Repository: ironcore-dev/boot-operator

Length of output: 2501


🏁 Script executed:

# Search for .Generation usage in the codebase
rg "\.Generation" -B 2 -A 2 | head -50

Repository: ironcore-dev/boot-operator

Length of output: 3489


Treat stale mode conditions as pending.

The aggregator ignores Condition.ObservedGeneration. After a ServerBootConfiguration spec change, old HTTPBootReady / IPXEBootReady values can keep Status.State at Ready or Error until the mode controllers rewrite them. ObservedGeneration != cfg.Generation should be handled the same as a missing condition.

Suggested guard
 	if r.RequireHTTPBoot {
 		c := apimeta.FindStatusCondition(cfg.Status.Conditions, HTTPBootReadyConditionType)
 		switch {
-		case c == nil:
+		case c == nil || c.ObservedGeneration != cfg.Generation:
 			allReady = false
 		case c.Status == metav1.ConditionFalse:
 			hasError = true
 		case c.Status != metav1.ConditionTrue:
 			allReady = false
 		}
 	}

 	if r.RequireIPXEBoot {
 		c := apimeta.FindStatusCondition(cfg.Status.Conditions, IPXEBootReadyConditionType)
 		switch {
-		case c == nil:
+		case c == nil || c.ObservedGeneration != cfg.Generation:
 			allReady = false
 		case c.Status == metav1.ConditionFalse:
 			hasError = true
 		case c.Status != metav1.ConditionTrue:
 			allReady = false
 		}
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if r.RequireHTTPBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, HTTPBootReadyConditionType)
switch {
case c == nil:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
}
if r.RequireIPXEBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, IPXEBootReadyConditionType)
switch {
case c == nil:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
if r.RequireHTTPBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, HTTPBootReadyConditionType)
switch {
case c == nil || c.ObservedGeneration != cfg.Generation:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
}
if r.RequireIPXEBoot {
c := apimeta.FindStatusCondition(cfg.Status.Conditions, IPXEBootReadyConditionType)
switch {
case c == nil || c.ObservedGeneration != cfg.Generation:
allReady = false
case c.Status == metav1.ConditionFalse:
hasError = true
case c.Status != metav1.ConditionTrue:
allReady = false
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/controller/serverbootconfiguration_readiness_controller.go` around
lines 55 - 76, When evaluating HTTPBootReadyConditionType and
IPXEBootReadyConditionType (in the RequireHTTPBoot / RequireIPXEBoot blocks)
treat conditions whose ObservedGeneration != cfg.Generation as if they were
missing: after calling apimeta.FindStatusCondition(cfg.Status.Conditions, ...),
consider the condition nil if c == nil OR c.ObservedGeneration !=
cfg.Generation, and then set allReady = false for that case; keep the existing
handling for c.Status == metav1.ConditionFalse (set hasError = true) and
c.Status != metav1.ConditionTrue (set allReady = false). This ensures stale
conditions are treated as pending until the mode controllers update them.

}

switch {
case hasError:
desired = metalv1alpha1.ServerBootConfigurationStateError
case allReady:
desired = metalv1alpha1.ServerBootConfigurationStateReady
}

if cfg.Status.State == desired {
return ctrl.Result{}, nil
}

base := cfg.DeepCopy()
cfg.Status.State = desired
if err := r.Status().Patch(ctx, cfg, client.MergeFrom(base)); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *ServerBootConfigurationReadinessReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&metalv1alpha1.ServerBootConfiguration{}).
Complete(r)
}
7 changes: 7 additions & 0 deletions internal/controller/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ func SetupTest() *corev1.Namespace {
RegistryValidator: registryValidator,
}).SetupWithManager(k8sManager)).To(Succeed())

Expect((&ServerBootConfigurationReadinessReconciler{
Client: k8sManager.GetClient(),
Scheme: k8sManager.GetScheme(),
RequireHTTPBoot: true,
RequireIPXEBoot: true,
}).SetupWithManager(k8sManager)).To(Succeed())

go func() {
defer GinkgoRecover()
Expect(k8sManager.Start(mgrCtx)).To(Succeed(), "failed to start manager")
Expand Down
Loading