Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pkg/operator/controller/gateway-labeler/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ func NewUnmanaged(mgr manager.Manager) (controller.Controller, error) {
client: mgr.GetClient(),
recorder: mgr.GetEventRecorderFor(controllerName),
}
c, err := controller.NewUnmanaged(controllerName, controller.Options{Reconciler: reconciler})
options := controller.Options{Reconciler: reconciler}
options.DefaultFromConfig(mgr.GetControllerOptions())
c, err := controller.NewUnmanaged(controllerName, options)
if err != nil {
return nil, err
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/operator/controller/gateway-service-dns/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ func NewUnmanaged(mgr manager.Manager, config Config) (controller.Controller, er
cache: operatorCache,
recorder: mgr.GetEventRecorderFor(controllerName),
}
c, err := controller.NewUnmanaged(controllerName, controller.Options{Reconciler: reconciler})
options := controller.Options{Reconciler: reconciler}
options.DefaultFromConfig(mgr.GetControllerOptions())
c, err := controller.NewUnmanaged(controllerName, options)
if err != nil {
return nil, err
}
Expand Down
61 changes: 52 additions & 9 deletions pkg/operator/controller/gatewayclass/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"sync"

configv1 "github.com/openshift/api/config/v1"
logf "github.com/openshift/cluster-ingress-operator/pkg/log"
operatorcontroller "github.com/openshift/cluster-ingress-operator/pkg/operator/controller"

Expand Down Expand Up @@ -71,6 +72,9 @@ const (
// can be specified. This annotation is only intended for use by
// OpenShift developers.
istioVersionOverrideAnnotationKey = "unsupported.do-not-use.openshift.io/istio-version"

// gatewayclassControllerIndexFieldName is the name of the index for gatewayclass by controller name.
gatewayclassControllerIndexFieldName = "spec.controllerName"
)

var log = logf.Logger.WithName(controllerName)
Expand All @@ -82,15 +86,19 @@ var gatewayClassController controller.Controller
func NewUnmanaged(mgr manager.Manager, config Config) (controller.Controller, error) {
operatorCache := mgr.GetCache()
reconciler := &reconciler{
config: config,
client: mgr.GetClient(),
cache: operatorCache,
recorder: mgr.GetEventRecorderFor(controllerName),
config: config,
client: mgr.GetClient(),
cache: operatorCache,
fieldIndexer: mgr.GetFieldIndexer(),
recorder: mgr.GetEventRecorderFor(controllerName),
}
c, err := controller.NewUnmanaged(controllerName, controller.Options{Reconciler: reconciler})
options := controller.Options{Reconciler: reconciler}
options.DefaultFromConfig(mgr.GetControllerOptions())
c, err := controller.NewUnmanaged(controllerName, options)
if err != nil {
return nil, err
}

isOurGatewayClass := predicate.NewPredicateFuncs(func(o client.Object) bool {
class := o.(*gatewayapiv1.GatewayClass)
return class.Spec.ControllerName == operatorcontroller.OpenShiftGatewayClassControllerName
Expand Down Expand Up @@ -146,6 +154,12 @@ func NewUnmanaged(mgr manager.Manager, config Config) (controller.Controller, er
return nil, err
}

// Watch the cluster infrastructure config in case the infrastructure
// topology changes.
if err := c.Watch(source.Kind[client.Object](operatorCache, &configv1.Infrastructure{}, reconciler.enqueueRequestForSomeGatewayClass())); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question here is, if you change Sail Operator parameter will it reflect on Pilot config?
Also, in case this parameter is also used for Gateway replica deployment, will Sail reconcile all the gateways and HPA?

(Giving some thoughts for test)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

answering myself:
the change of HPA parameter from pilot does not impact on the GatewayClass, it needs its own definition from spec.infrastructure.paramRefs

return nil, err
}

gatewayClassController = c
return c, nil
}
Expand All @@ -172,11 +186,15 @@ type Config struct {
type reconciler struct {
config Config

client client.Client
cache cache.Cache
recorder record.EventRecorder
client client.Client
cache cache.Cache
fieldIndexer client.FieldIndexer
recorder record.EventRecorder

startIstioWatch sync.Once
// startGatewayclassControllerIndex ensures we create the gatewayclasses index at
// most once.
startGatewayclassControllerIndex sync.Once
}

// enqueueRequestForSomeGatewayClass enqueues a reconciliation request for the
Expand Down Expand Up @@ -230,12 +248,36 @@ func (r *reconciler) enqueueRequestForSomeGatewayClass() handler.EventHandler {
func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) {
log.Info("reconciling", "request", request)

var infraConfig configv1.Infrastructure
if err := r.client.Get(ctx, types.NamespacedName{Name: "cluster"}, &infraConfig); err != nil {
return reconcile.Result{}, err
}

var gatewayclass gatewayapiv1.GatewayClass
if err := r.cache.Get(ctx, request.NamespacedName, &gatewayclass); err != nil {
return reconcile.Result{}, err
}

var errs []error

// Create the Index gatewayclasses once on first reconciliation. The
// index cannot be created in NewUnmanaged as the CRD might not exist
// when NewUnmanaged is called, and so we create the index here.
r.startGatewayclassControllerIndex.Do(func() {
gatewayclassControllerIndexFn := client.IndexerFunc(func(o client.Object) []string {
gatewayclass, ok := o.(*gatewayapiv1.GatewayClass)
if !ok {
return []string{}
}

return []string{string(gatewayclass.Spec.ControllerName)}
})
if err := r.fieldIndexer.IndexField(context.Background(), &gatewayapiv1.GatewayClass{}, gatewayclassControllerIndexFieldName, gatewayclassControllerIndexFn); err != nil {
log.Error(err, "failed to create index for gatewayclasses", "request", request)
errs = append(errs, err)
}
})
Comment on lines +263 to +279
Copy link

@coderabbitai coderabbitai bot Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Index creation failure is not retriable due to sync.Once.

If IndexField fails transiently (e.g., temporary API server issue), the sync.Once prevents any retry. Subsequent reconciliations will consistently fail when ensureIstio attempts to list with the non-existent index.

While the error is logged and the reconciliation will fail with an error (surfacing the issue), the system cannot self-heal without operator restart.

Consider using a different synchronization pattern that allows retry on failure, such as tracking success state explicitly.

🔧 Alternative pattern allowing retry on failure
-	// startGatewayclassControllerIndex ensures we create the gatewayclasses index at
-	// most once.
-	startGatewayclassControllerIndex sync.Once
+	// gatewayclassControllerIndexCreated tracks whether the index was successfully created.
+	gatewayclassControllerIndexCreated bool
+	gatewayclassControllerIndexMu      sync.Mutex
 }

 // In Reconcile:
-	r.startGatewayclassControllerIndex.Do(func() {
+	r.gatewayclassControllerIndexMu.Lock()
+	if !r.gatewayclassControllerIndexCreated {
 		gatewayclassControllerIndexFn := client.IndexerFunc(func(o client.Object) []string {
 			gatewayclass, ok := o.(*gatewayapiv1.GatewayClass)
 			if !ok {
 				return []string{}
 			}
 			return []string{string(gatewayclass.Spec.ControllerName)}
 		})
 		if err := r.fieldIndexer.IndexField(context.Background(), &gatewayapiv1.GatewayClass{}, gatewayclassControllerIndexFieldName, gatewayclassControllerIndexFn); err != nil {
 			log.Error(err, "failed to create index for gatewayclasses", "request", request)
 			errs = append(errs, err)
+		} else {
+			r.gatewayclassControllerIndexCreated = true
 		}
-	})
+	}
+	r.gatewayclassControllerIndexMu.Unlock()
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@pkg/operator/controller/gatewayclass/controller.go` around lines 263 - 279,
The current use of r.startGatewayclassControllerIndex.Do(...) prevents retries
if fieldIndexer.IndexField fails; change the pattern so the index is only marked
"done" after a successful IndexField call (e.g., replace sync.Once.Do with a
small protected boolean or atomic flag plus a mutex or atomic.CompareAndSwap):
run gatewayclassControllerIndexFn and call r.fieldIndexer.IndexField(...) and if
it returns nil set the success flag (e.g., r.gatewayclassIndexInitialized =
true) so future reconciliations skip creation, but if it returns an error do not
set the flag and return/append the error so the reconcile can retry; keep
references to gatewayclassControllerIndexFn,
gatewayclassControllerIndexFieldName and r.fieldIndexer.IndexField when
implementing this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fair point. On the one hand, the current logic for initializing the index follows pattern we established with the logic for initializing the watch on istios. On the other hand, this seems simple enough to fix, and I expect the watch on istios will go away in a couple releases. I would like to allow the current CI run to finish, but I can update the logic after that.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Miciah, sounds good! The suggested mutex+boolean pattern is straightforward to apply whenever you're ready. Since the startIstioWatch watch initialization has the same limitation, you may want to apply a similar retry-capable pattern there as well — though as you noted, that code is expected to be removed in a future release, so it may not be worth the effort.


ossmCatalog := r.config.GatewayAPIOperatorCatalog
if v, ok := gatewayclass.Annotations[subscriptionCatalogOverrideAnnotationKey]; ok {
ossmCatalog = v
Expand All @@ -258,7 +300,8 @@ func (r *reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
if v, ok := gatewayclass.Annotations[istioVersionOverrideAnnotationKey]; ok {
istioVersion = v
}
if _, _, err := r.ensureIstio(ctx, &gatewayclass, istioVersion); err != nil {

if _, _, err := r.ensureIstio(ctx, &gatewayclass, istioVersion, &infraConfig); err != nil {
errs = append(errs, err)
} else {
// The OSSM operator installs the istios.sailoperator.io CRD.
Expand Down
Loading