diff --git a/apis/placement/v1/binding_types.go b/apis/placement/v1/binding_types.go index 7f5b78aac..55f1c96d2 100644 --- a/apis/placement/v1/binding_types.go +++ b/apis/placement/v1/binding_types.go @@ -13,8 +13,9 @@ import ( // +kubebuilder:object:root=true // +kubebuilder:resource:scope=Cluster,categories={fleet,fleet-placement},shortName=rb // +kubebuilder:subresource:status -// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="WorkSynchronized")].status`,name="WorkCreated",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="WorkSynchronized")].status`,name="WorkSynchronized",type=string // +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Applied")].status`,name="ResourcesApplied",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Available")].status`,name="ResourceAvailable",priority=1,type=string // +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date // ClusterResourceBinding represents a scheduling decision that binds a group of resources to a cluster. diff --git a/apis/placement/v1beta1/binding_types.go b/apis/placement/v1beta1/binding_types.go index dfb033781..7649a4434 100644 --- a/apis/placement/v1beta1/binding_types.go +++ b/apis/placement/v1beta1/binding_types.go @@ -20,8 +20,9 @@ const ( // +kubebuilder:resource:scope=Cluster,categories={fleet,fleet-placement},shortName=rb // +kubebuilder:subresource:status // +kubebuilder:storageversion -// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="WorkSynchronized")].status`,name="WorkCreated",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="WorkSynchronized")].status`,name="WorkSynchronized",type=string // +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Applied")].status`,name="ResourcesApplied",type=string +// +kubebuilder:printcolumn:JSONPath=`.status.conditions[?(@.type=="Available")].status`,name="ResourceAvailable",priority=1,type=string // +kubebuilder:printcolumn:JSONPath=`.metadata.creationTimestamp`,name="Age",type=date // ClusterResourceBinding represents a scheduling decision that binds a group of resources to a cluster. diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourcebindings.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourcebindings.yaml index 931199c77..8d0a15d8b 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourcebindings.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourcebindings.yaml @@ -21,11 +21,15 @@ spec: versions: - additionalPrinterColumns: - jsonPath: .status.conditions[?(@.type=="WorkSynchronized")].status - name: WorkCreated + name: WorkSynchronized type: string - jsonPath: .status.conditions[?(@.type=="Applied")].status name: ResourcesApplied type: string + - jsonPath: .status.conditions[?(@.type=="Available")].status + name: ResourceAvailable + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date @@ -388,11 +392,15 @@ spec: status: {} - additionalPrinterColumns: - jsonPath: .status.conditions[?(@.type=="WorkSynchronized")].status - name: WorkCreated + name: WorkSynchronized type: string - jsonPath: .status.conditions[?(@.type=="Applied")].status name: ResourcesApplied type: string + - jsonPath: .status.conditions[?(@.type=="Available")].status + name: ResourceAvailable + priority: 1 + type: string - jsonPath: .metadata.creationTimestamp name: Age type: date diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverrides.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverrides.yaml index 3aa2b04c4..8b393cae7 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverrides.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverrides.yaml @@ -340,6 +340,10 @@ spec: description: |- Value defines the content to be applied on the target location. Value should be empty when operator is `remove`. + We have reserved a few variables in this field that will be replaced by the actual values. + Those variables all start with `$` and are case sensitive. + Here is the list of currently supported variables: + `${MEMBER-CLUSTER-NAME}`: this will be replaced by the name of the memberCluster CR that represents this cluster. x-kubernetes-preserve-unknown-fields: true required: - op diff --git a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverridesnapshots.yaml b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverridesnapshots.yaml index d89c326a0..51e8ccc4f 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverridesnapshots.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_clusterresourceoverridesnapshots.yaml @@ -354,6 +354,10 @@ spec: description: |- Value defines the content to be applied on the target location. Value should be empty when operator is `remove`. + We have reserved a few variables in this field that will be replaced by the actual values. + Those variables all start with `$` and are case sensitive. + Here is the list of currently supported variables: + `${MEMBER-CLUSTER-NAME}`: this will be replaced by the name of the memberCluster CR that represents this cluster. x-kubernetes-preserve-unknown-fields: true required: - op diff --git a/config/crd/bases/placement.kubernetes-fleet.io_resourceoverrides.yaml b/config/crd/bases/placement.kubernetes-fleet.io_resourceoverrides.yaml index 6f1a80678..8f65a189e 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_resourceoverrides.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_resourceoverrides.yaml @@ -255,6 +255,10 @@ spec: description: |- Value defines the content to be applied on the target location. Value should be empty when operator is `remove`. + We have reserved a few variables in this field that will be replaced by the actual values. + Those variables all start with `$` and are case sensitive. + Here is the list of currently supported variables: + `${MEMBER-CLUSTER-NAME}`: this will be replaced by the name of the memberCluster CR that represents this cluster. x-kubernetes-preserve-unknown-fields: true required: - op diff --git a/config/crd/bases/placement.kubernetes-fleet.io_resourceoverridesnapshots.yaml b/config/crd/bases/placement.kubernetes-fleet.io_resourceoverridesnapshots.yaml index d502dd686..22f9de081 100644 --- a/config/crd/bases/placement.kubernetes-fleet.io_resourceoverridesnapshots.yaml +++ b/config/crd/bases/placement.kubernetes-fleet.io_resourceoverridesnapshots.yaml @@ -269,6 +269,10 @@ spec: description: |- Value defines the content to be applied on the target location. Value should be empty when operator is `remove`. + We have reserved a few variables in this field that will be replaced by the actual values. + Those variables all start with `$` and are case sensitive. + Here is the list of currently supported variables: + `${MEMBER-CLUSTER-NAME}`: this will be replaced by the name of the memberCluster CR that represents this cluster. x-kubernetes-preserve-unknown-fields: true required: - op diff --git a/pkg/controllers/clusterresourcebindingwatcher/watcher.go b/pkg/controllers/clusterresourcebindingwatcher/watcher.go index 870123d3b..326866eb2 100644 --- a/pkg/controllers/clusterresourcebindingwatcher/watcher.go +++ b/pkg/controllers/clusterresourcebindingwatcher/watcher.go @@ -32,7 +32,8 @@ type Reconciler struct { PlacementController controller.Controller } -// Reconcile reconciles the clusterResourceBinding. +// Reconcile watches the clusterResourceBinding and enqueues the corresponding CRP name for those bindings whose +// status has changed. This is for the CRP controller to update the corresponding placementStatuses. func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { bindingRef := klog.KRef("", req.Name) diff --git a/pkg/controllers/rollout/controller.go b/pkg/controllers/rollout/controller.go index cc4148dcc..ac8ce5de5 100644 --- a/pkg/controllers/rollout/controller.go +++ b/pkg/controllers/rollout/controller.go @@ -22,12 +22,10 @@ import ( "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" runtime "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" ctrl "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" fleetv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" @@ -161,9 +159,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim } klog.V(2).InfoS("Picked the bindings to be updated", "clusterResourcePlacement", crpName, "numberOfBindings", len(toBeUpdatedBindings), "numberOfStaleBindings", len(staleBoundBindings)) + // StaleBindings is the list that contains bindings that need to be updated but are blocked by the rollout strategy. // Update the status first, so that if the rolling out (updateBindings func) fails in the middle, the controller will // recompute the list and the result may be different. - // As far as now, these bindings are blocked by the rollout strategy. if err := r.updateStaleBindingsStatus(ctx, staleBoundBindings); err != nil { return runtime.Result{}, err } @@ -357,13 +355,13 @@ func (r *Reconciler) pickBindingsToRoll(ctx context.Context, allBindings []*flee switch binding.Spec.State { case fleetv1beta1.BindingStateUnscheduled: if bindingutils.HasBindingFailed(binding) { - klog.V(3).InfoS("Found a failed to be ready unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) + klog.V(2).InfoS("Found a failed to be ready unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) } else { canBeReadyBindings = append(canBeReadyBindings, binding) } waitTime, bindingReady := isBindingReady(binding, readyTimeCutOff) if bindingReady { - klog.V(3).InfoS("Found a ready unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) + klog.V(2).InfoS("Found a ready unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) readyBindings = append(readyBindings, binding) } else { allReady = false @@ -373,7 +371,7 @@ func (r *Reconciler) pickBindingsToRoll(ctx context.Context, allBindings []*flee } if binding.DeletionTimestamp.IsZero() { // it's not been deleted yet, so it is a removal candidate - klog.V(3).InfoS("Found a not yet deleted unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) + klog.V(2).InfoS("Found a not yet deleted unscheduled binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) // The desired binding is nil for the removeCandidates. removeCandidates = append(removeCandidates, toBeUpdatedBinding{currentBinding: binding}) } else if bindingReady { @@ -395,7 +393,7 @@ func (r *Reconciler) pickBindingsToRoll(ctx context.Context, allBindings []*flee schedulerTargetedBinds = append(schedulerTargetedBinds, binding) waitTime, bindingReady := isBindingReady(binding, readyTimeCutOff) if bindingReady { - klog.V(3).InfoS("Found a ready bound binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) + klog.V(2).InfoS("Found a ready bound binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) readyBindings = append(readyBindings, binding) } else { allReady = false @@ -405,7 +403,7 @@ func (r *Reconciler) pickBindingsToRoll(ctx context.Context, allBindings []*flee } // check if the binding is failed or still on going if bindingutils.HasBindingFailed(binding) { - klog.V(3).InfoS("Found a failed to be ready bound binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) + klog.V(2).InfoS("Found a failed to be ready bound binding", "clusterResourcePlacement", crpKObj, "binding", bindingKObj) bindingFailed = true } else { canBeReadyBindings = append(canBeReadyBindings, binding) @@ -443,7 +441,8 @@ func (r *Reconciler) pickBindingsToRoll(ctx context.Context, allBindings []*flee klog.V(2).InfoS("Calculated the targetNumber", "clusterResourcePlacement", crpKObj, "targetNumber", targetNumber, "readyBindingNumber", len(readyBindings), "canBeUnavailableBindingNumber", len(canBeUnavailableBindings), "canBeReadyBindingNumber", len(canBeReadyBindings), "boundingCandidateNumber", len(boundingCandidates), - "removeCandidateNumber", len(removeCandidates), "updateCandidateNumber", len(updateCandidates), "applyFailedUpdateCandidateNumber", len(applyFailedUpdateCandidates)) + "removeCandidateNumber", len(removeCandidates), "updateCandidateNumber", len(updateCandidates), "applyFailedUpdateCandidateNumber", + len(applyFailedUpdateCandidates), "minWaitTime", minWaitTime) // the list of bindings that are to be updated by this rolling phase toBeUpdatedBindingList := make([]toBeUpdatedBinding, 0) @@ -496,7 +495,7 @@ func determineBindingsToUpdate( for ; boundingCandidatesUnselectedIndex < maxNumberToAdd && boundingCandidatesUnselectedIndex < len(boundingCandidates); boundingCandidatesUnselectedIndex++ { toBeUpdatedBindingList = append(toBeUpdatedBindingList, boundingCandidates[boundingCandidatesUnselectedIndex]) } - + // Those are the bindings that are not up to date but not selected to be updated in this round because of the rollout constraints. staleUnselectedBinding := make([]toBeUpdatedBinding, 0) if updateCandidateUnselectedIndex < len(updateCandidates) { staleUnselectedBinding = append(staleUnselectedBinding, updateCandidates[updateCandidateUnselectedIndex:]...) @@ -580,6 +579,7 @@ func isBindingReady(binding *fleetv1beta1.ClusterResourceBinding, readyTimeCutOf return waitTime, false } // we don't know when the current spec is available yet, return a negative wait time + // since we will reconcile again after the binding status changes return -1, false } @@ -669,17 +669,16 @@ func (r *Reconciler) SetupWithManager(mgr runtime.Manager) error { Watches(&fleetv1beta1.ClusterResourceBinding{}, handler.Funcs{ CreateFunc: func(ctx context.Context, e event.CreateEvent, q workqueue.RateLimitingInterface) { klog.V(2).InfoS("Handling a resourceBinding create event", "resourceBinding", klog.KObj(e.Object)) - handleResourceBinding(e.Object, q) + enqueueResourceBinding(e.Object, q) }, UpdateFunc: func(ctx context.Context, e event.UpdateEvent, q workqueue.RateLimitingInterface) { - klog.V(2).InfoS("Handling a resourceBinding update event", "resourceBinding", klog.KObj(e.ObjectNew)) - handleResourceBinding(e.ObjectNew, q) + handleResourceBindingUpdated(e.ObjectNew, e.ObjectOld, q) }, GenericFunc: func(ctx context.Context, e event.GenericEvent, q workqueue.RateLimitingInterface) { klog.V(2).InfoS("Handling a resourceBinding generic event", "resourceBinding", klog.KObj(e.Object)) - handleResourceBinding(e.Object, q) + enqueueResourceBinding(e.Object, q) }, - }, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + }). // Aside from ClusterResourceSnapshot and ClusterResourceBinding objects, the rollout // controller also watches ClusterResourcePlacement objects, so that it can push apply // strategy updates to all bindings right away. @@ -795,8 +794,8 @@ func handleResourceSnapshot(snapshot client.Object, q workqueue.RateLimitingInte }) } -// handleResourceBinding parse the binding label and enqueue the CRP name associated with the resource binding -func handleResourceBinding(binding client.Object, q workqueue.RateLimitingInterface) { +// enqueueResourceBinding parse the binding label and enqueue the CRP name associated with the resource binding +func enqueueResourceBinding(binding client.Object, q workqueue.RateLimitingInterface) { bindingRef := klog.KObj(binding) // get the CRP name from the label crp := binding.GetLabels()[fleetv1beta1.CRPTrackingLabel] @@ -812,6 +811,38 @@ func handleResourceBinding(binding client.Object, q workqueue.RateLimitingInterf }) } +// handleResourceBindingUpdated determines the action to take when a resource binding is updated. +// we only care about the Available and DiffReported condition change. +func handleResourceBindingUpdated(objectOld, objectNew client.Object, q workqueue.RateLimitingInterface) { + // Check if the update event is valid. + if objectOld == nil || objectNew == nil { + klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("update event is nil")), "Failed to process update event") + return + } + oldBinding, oldOk := objectOld.(*fleetv1beta1.ClusterResourceBinding) + newBinding, newOk := objectNew.(*fleetv1beta1.ClusterResourceBinding) + if !oldOk || !newOk { + klog.ErrorS(controller.NewUnexpectedBehaviorError(fmt.Errorf("failed to cast runtime objects in update event to cluster resource binding objects")), "Failed to process update event") + } + if oldBinding.GetGeneration() != newBinding.GetGeneration() { + klog.V(2).InfoS("The binding spec have changed, need to notify rollout controller", "binding", klog.KObj(newBinding)) + enqueueResourceBinding(newBinding, q) + return + } + // these are the conditions we care about + conditionsToMonitor := []string{string(fleetv1beta1.ResourceBindingDiffReported), string(fleetv1beta1.ResourceBindingAvailable)} + for _, conditionType := range conditionsToMonitor { + oldCond := oldBinding.GetCondition(conditionType) + newCond := newBinding.GetCondition(conditionType) + if !condition.EqualCondition(oldCond, newCond) { + klog.V(2).InfoS("The binding status have changed, need to notify rollout controller", "binding", klog.KObj(newBinding), "conditionType", conditionType) + enqueueResourceBinding(newBinding, q) + return + } + } + klog.V(2).InfoS("A resourceBinding is updated but we don't need to handle it", "resourceBinding", klog.KObj(newBinding)) +} + // updateStaleBindingsStatus updates the status of the stale bindings to indicate that they are blocked by the rollout strategy. // Note: the binding state should be "Scheduled" or "Bound". // The desired binding will be ignored. @@ -893,7 +924,7 @@ func (r *Reconciler) processApplyStrategyUpdates( // Verify if the binding has the latest apply strategy set. if equality.Semantic.DeepEqual(binding.Spec.ApplyStrategy, applyStrategy) { // The binding already has the latest apply strategy set; no need to push the update. - klog.V(2).InfoS("The binding already has the latest apply strategy; skip the apply strategy update", "clusterResourceBinding", klog.KObj(binding)) + klog.V(3).InfoS("The binding already has the latest apply strategy; skip the apply strategy update", "clusterResourceBinding", klog.KObj(binding)) continue } diff --git a/pkg/controllers/rollout/controller_integration_test.go b/pkg/controllers/rollout/controller_integration_test.go index 912e8b8cd..0a8705a5d 100644 --- a/pkg/controllers/rollout/controller_integration_test.go +++ b/pkg/controllers/rollout/controller_integration_test.go @@ -271,9 +271,9 @@ var _ = Describe("Test the rollout Controller", func() { It("Should rollout the selected and unselected bindings (not trackable resources)", func() { // create CRP - var targetCluster int32 = 11 + var initTargetClusterNum int32 = 11 rolloutCRP = clusterResourcePlacementForTest(testCRPName, - createPlacementPolicyForTest(fleetv1beta1.PickNPlacementType, targetCluster), + createPlacementPolicyForTest(fleetv1beta1.PickNPlacementType, initTargetClusterNum), createPlacementRolloutStrategyForTest(fleetv1beta1.RollingUpdateRolloutStrategyType, generateDefaultRollingUpdateConfig(), nil)) Expect(k8sClient.Create(ctx, rolloutCRP)).Should(Succeed()) // create master resource snapshot that is latest @@ -281,8 +281,8 @@ var _ = Describe("Test the rollout Controller", func() { Expect(k8sClient.Create(ctx, masterSnapshot)).Should(Succeed()) By(fmt.Sprintf("master resource snapshot %s created", masterSnapshot.Name)) // create scheduled bindings for master snapshot on target clusters - clusters := make([]string, targetCluster) - for i := 0; i < int(targetCluster); i++ { + clusters := make([]string, initTargetClusterNum) + for i := 0; i < int(initTargetClusterNum); i++ { clusters[i] = "cluster-" + strconv.Itoa(i) binding := generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, masterSnapshot.Name, clusters[i]) Expect(k8sClient.Create(ctx, binding)).Should(Succeed()) @@ -308,34 +308,34 @@ var _ = Describe("Test the rollout Controller", func() { markBindingAvailable(bindings[i], false) } // simulate another scheduling decision, pick some cluster to unselect from the bottom of the list - var newTarget int32 = 9 - rolloutCRP.Spec.Policy.NumberOfClusters = &newTarget + var newTargetClusterNum int32 = 9 + rolloutCRP.Spec.Policy.NumberOfClusters = &newTargetClusterNum Expect(k8sClient.Update(ctx, rolloutCRP)).Should(Succeed()) secondRoundBindings := make([]*fleetv1beta1.ClusterResourceBinding, 0) deletedBindings := make([]*fleetv1beta1.ClusterResourceBinding, 0) - stillScheduled := 6 + stillScheduledClusterNum := 6 // the amount of clusters that are still scheduled in first round // simulate that some of the bindings are available // moved to before being set to unscheduled, otherwise, the rollout controller will try to delete the bindings before we mark them as available. - for i := int(newTarget); i < int(targetCluster); i++ { + for i := int(newTargetClusterNum); i < int(initTargetClusterNum); i++ { markBindingAvailable(bindings[i], false) } - for i := int(targetCluster - 1); i >= stillScheduled; i-- { + for i := int(initTargetClusterNum - 1); i >= stillScheduledClusterNum; i-- { binding := bindings[i] binding.Spec.State = fleetv1beta1.BindingStateUnscheduled Expect(k8sClient.Update(ctx, binding)).Should(Succeed()) By(fmt.Sprintf("resource binding `%s` is marked as not scheduled", binding.Name)) deletedBindings = append(deletedBindings, binding) } - for i := 0; i < stillScheduled; i++ { + for i := 0; i < stillScheduledClusterNum; i++ { secondRoundBindings = append(secondRoundBindings, bindings[i]) } // simulate that some of the bindings are available and not trackable - for i := firstApplied; i < int(newTarget); i++ { + for i := firstApplied; i < int(newTargetClusterNum); i++ { markBindingAvailable(bindings[i], false) } - newScheduled := int(newTarget) - stillScheduled - for i := 0; i < newScheduled; i++ { - binding := generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, masterSnapshot.Name, "cluster-"+strconv.Itoa(int(targetCluster)+i)) + newlyScheduledClusterNum := int(newTargetClusterNum) - stillScheduledClusterNum + for i := 0; i < newlyScheduledClusterNum; i++ { + binding := generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, masterSnapshot.Name, "cluster-"+strconv.Itoa(int(initTargetClusterNum)+i)) Expect(k8sClient.Create(ctx, binding)).Should(Succeed()) By(fmt.Sprintf("resource binding %s created", binding.Name)) bindings = append(bindings, binding) @@ -353,12 +353,12 @@ var _ = Describe("Test the rollout Controller", func() { } } return true - }, timeout, interval).Should(BeTrue(), "rollout controller should roll all the bindings to Bound state") + }, 3*defaultUnavailablePeriod*time.Second, interval).Should(BeTrue(), "rollout controller should roll all the bindings to Bound state") // simulate that the new bindings are available and not trackable for i := 0; i < len(secondRoundBindings); i++ { markBindingAvailable(secondRoundBindings[i], false) } - // check that the unselected bindings are deleted + // check that the unselected bindings are deleted after 3 times of the default unavailable period Eventually(func() bool { for _, binding := range deletedBindings { if err := k8sClient.Get(ctx, types.NamespacedName{Name: binding.GetName()}, binding); err != nil && !apierrors.IsNotFound(err) { @@ -366,7 +366,7 @@ var _ = Describe("Test the rollout Controller", func() { } } return true - }, timeout, interval).Should(BeTrue(), "rollout controller should delete all the unselected bindings") + }, 3*defaultUnavailablePeriod*time.Second, interval).Should(BeTrue(), "rollout controller should delete all the unselected bindings") }) It("Should rollout both the new scheduling and the new resources (trackable)", func() { @@ -668,7 +668,7 @@ var _ = Describe("Test the rollout Controller", func() { } } return allMatch - }, timeout, interval).Should(BeTrue(), "rollout controller should roll all the bindings to use the latest resource snapshot") + }, 5*defaultUnavailablePeriod*time.Second, interval).Should(BeTrue(), "rollout controller should roll all the bindings to use the latest resource snapshot") }) It("Should wait designated time before rolling out ", func() { @@ -934,7 +934,7 @@ var _ = Describe("Test the rollout Controller", func() { return fmt.Errorf("binding %s is in bound state, which is unexpected", newScheduledBinding.Name) } return nil - }, timeout, interval).Should(BeNil(), "rollout controller shouldn't roll new scheduled binding to bound state") + }, 3*defaultUnavailablePeriod*time.Second, interval).Should(BeNil(), "rollout controller shouldn't roll new scheduled binding to bound state") // simulate eviction by deleting first bound binding. firstBoundBinding := 1 @@ -953,7 +953,7 @@ var _ = Describe("Test the rollout Controller", func() { return false } return true - }, timeout, interval).Should(BeTrue(), "rollout controller should roll all remaining bindings to Bound state") + }, 3*defaultUnavailablePeriod*time.Second, interval).Should(BeTrue(), "rollout controller should roll all remaining bindings to Bound state") } }) diff --git a/pkg/controllers/rollout/controller_test.go b/pkg/controllers/rollout/controller_test.go index 7f61b5a6d..97942c903 100644 --- a/pkg/controllers/rollout/controller_test.go +++ b/pkg/controllers/rollout/controller_test.go @@ -66,6 +66,8 @@ var ( return !t1.Time.Add(10 * time.Second).Before(t2.Time) }), } + + defaultUnavailablePeriod = time.Duration(3) ) func serviceScheme(t *testing.T) *runtime.Scheme { @@ -188,9 +190,9 @@ func TestReconcilerHandleResourceBinding(t *testing.T) { for name, tt := range tests { t.Run(name, func(t *testing.T) { queue := &controllertest.Queue{Interface: workqueue.New()} - handleResourceBinding(tt.resourceBinding, queue) + enqueueResourceBinding(tt.resourceBinding, queue) if tt.shouldEnqueue && queue.Len() == 0 { - t.Errorf("handleResourceBinding test `%s` didn't queue the object when it should enqueue", name) + t.Errorf("enqueueResourceBinding test `%s` didn't queue the object when it should enqueue", name) } if !tt.shouldEnqueue && queue.Len() != 0 { t.Errorf("handleResourceSnapshot test `%s` queue the object when it should not enqueue", name) @@ -1094,7 +1096,7 @@ func TestPickBindingsToRoll(t *testing.T) { wantNeedRoll: true, wantWaitTime: 0, }, - "test bound binding with latest resources - rollout blocked": { + "test bound binding with latest resources - rollout not needed": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ generateClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), }, @@ -1105,7 +1107,6 @@ func TestPickBindingsToRoll(t *testing.T) { wantTobeUpdatedBindings: []int{}, wantStaleUnselectedBindings: []int{}, wantNeedRoll: false, - wantWaitTime: time.Second, }, "test failed to apply bound binding, outdated resources - rollout allowed": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -1124,7 +1125,7 @@ func TestPickBindingsToRoll(t *testing.T) { }, }, wantNeedRoll: true, - wantWaitTime: time.Second, + wantWaitTime: defaultUnavailablePeriod * time.Second, }, "test one failed to apply bound binding and four failed non ready bound bindings, outdated resources with maxUnavailable specified - rollout allowed": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -1168,7 +1169,7 @@ func TestPickBindingsToRoll(t *testing.T) { }, }, wantNeedRoll: true, - wantWaitTime: time.Second, + wantWaitTime: defaultUnavailablePeriod * time.Second, }, "test three failed to apply bound binding, two ready bound binding, outdated resources with maxUnavailable specified - rollout allowed": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -1212,7 +1213,7 @@ func TestPickBindingsToRoll(t *testing.T) { }, }, wantNeedRoll: true, - wantWaitTime: time.Second, + wantWaitTime: defaultUnavailablePeriod * time.Second, }, "test bound ready bindings, maxUnavailable is set to zero - rollout blocked": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -1303,7 +1304,7 @@ func TestPickBindingsToRoll(t *testing.T) { wantNeedRoll: true, wantWaitTime: 0, }, - "test canBeReady bound and scheduled binding - rollout allowed": { + "test canBeReady bound and scheduled binding - rollout allowed with unavailable period wait time": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ generateCanBeReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), generateClusterResourceBinding(fleetv1beta1.BindingStateScheduled, "snapshot-1", cluster2), @@ -1395,12 +1396,12 @@ func TestPickBindingsToRoll(t *testing.T) { createPlacementRolloutStrategyForTest(fleetv1beta1.RollingUpdateRolloutStrategyType, generateDefaultRollingUpdateConfig(), nil)), wantErr: controller.ErrExpectedBehavior, }, - "test bound bindings with different waitTimes": { + "test bound bindings with different waitTimes and check the wait time should be the min of them all": { // want the min wait time of bound bindings that are not ready allBindings: []*fleetv1beta1.ClusterResourceBinding{ - generateNotReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster3, metav1.Time{Time: now.Add(-35 * time.Second)}), // notReady, waitTime = t - 35s - generateCanBeReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), // notReady, no wait time because it does not have available condition yet, - generateReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-2", cluster2), // Ready + generateNotTrackableClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster3, metav1.Time{Time: now.Add(-35 * time.Second)}), // notReady, waitTime = t - 35s + generateCanBeReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-1", cluster1), // notReady, no wait time because it does not have available condition yet, + generateReadyClusterResourceBinding(fleetv1beta1.BindingStateBound, "snapshot-2", cluster2), // Ready }, latestResourceSnapshotName: "snapshot-2", crp: clusterResourcePlacementForTest("test", @@ -1433,11 +1434,11 @@ func TestPickBindingsToRoll(t *testing.T) { wantNeedRoll: true, wantWaitTime: 25 * time.Second, // minWaitTime = (t - 35 seconds) - (t - 60 seconds) = 25 seconds }, - "test unscheduled bindings with different waitTimes": { + "test unscheduled bindings with different waitTimes and check the wait time is correct": { // want the min wait time of unscheduled bindings that are not ready allBindings: []*fleetv1beta1.ClusterResourceBinding{ - generateNotReadyClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "snapshot-1", cluster2, metav1.Time{Time: now.Add(-1 * time.Minute)}), // NotReady, waitTime = t - 60s - generateNotReadyClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "snapshot-1", cluster3, metav1.Time{Time: now.Add(-35 * time.Second)}), // NotReady, waitTime = t - 35s + generateNotTrackableClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "snapshot-1", cluster2, metav1.Time{Time: now.Add(-1 * time.Minute)}), // NotReady, waitTime = t - 60s + generateNotTrackableClusterResourceBinding(fleetv1beta1.BindingStateUnscheduled, "snapshot-1", cluster3, metav1.Time{Time: now.Add(-35 * time.Second)}), // NotReady, waitTime = t - 35s }, latestResourceSnapshotName: "snapshot-2", crp: clusterResourcePlacementForTest("test", @@ -1468,7 +1469,6 @@ func TestPickBindingsToRoll(t *testing.T) { wantTobeUpdatedBindings: []int{}, wantStaleUnselectedBindings: []int{}, wantNeedRoll: false, - wantWaitTime: time.Second, }, "test policy change with MaxSurge specified, evict resources on unscheduled cluster - rollout allowed for one scheduled binding": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -1911,7 +1911,7 @@ func TestPickBindingsToRoll(t *testing.T) { Type: intstr.Int, IntVal: 0, }, - UnavailablePeriodSeconds: ptr.To(1), + UnavailablePeriodSeconds: ptr.To(int(defaultUnavailablePeriod)), }, nil)), wantDesiredBindingsSpec: []fleetv1beta1.ResourceBindingSpec{ {}, @@ -1932,7 +1932,7 @@ func TestPickBindingsToRoll(t *testing.T) { wantTobeUpdatedBindings: []int{4, 5}, // no ready unscheduled bindings, so scheduled bindings were chosen. wantStaleUnselectedBindings: []int{}, wantNeedRoll: true, - wantWaitTime: time.Second, + wantWaitTime: defaultUnavailablePeriod * time.Second, }, "test downscale with resource snapshot change, evict ready bound binding - rollout allowed for one unscheduled binding": { allBindings: []*fleetv1beta1.ClusterResourceBinding{ @@ -2083,9 +2083,10 @@ func TestPickBindingsToRoll(t *testing.T) { if gotNeedRoll != tt.wantNeedRoll { t.Errorf("pickBindingsToRoll() = needRoll %v, want %v", gotNeedRoll, tt.wantNeedRoll) } - - if gotWaitTime.Round(time.Second) != tt.wantWaitTime { - t.Errorf("pickBindingsToRoll() = waitTime %v, want %v", gotWaitTime, tt.wantWaitTime) + if tt.wantNeedRoll == true { + if gotWaitTime.Round(time.Second) != tt.wantWaitTime { + t.Errorf("pickBindingsToRoll() = waitTime %v, want %v", gotWaitTime, tt.wantWaitTime) + } } }) } @@ -2179,7 +2180,7 @@ func generateReadyClusterResourceBinding(state fleetv1beta1.BindingState, resour return binding } -func generateNotReadyClusterResourceBinding(state fleetv1beta1.BindingState, resourceSnapshotName, targetCluster string, lastTransitionTime metav1.Time) *fleetv1beta1.ClusterResourceBinding { +func generateNotTrackableClusterResourceBinding(state fleetv1beta1.BindingState, resourceSnapshotName, targetCluster string, lastTransitionTime metav1.Time) *fleetv1beta1.ClusterResourceBinding { binding := generateClusterResourceBinding(state, resourceSnapshotName, targetCluster) binding.Status.Conditions = []metav1.Condition{ { @@ -2213,7 +2214,7 @@ func generateDefaultRollingUpdateConfig() *fleetv1beta1.RollingUpdateConfig { Type: intstr.Int, IntVal: 3, }, - UnavailablePeriodSeconds: ptr.To(1), + UnavailablePeriodSeconds: ptr.To(int(defaultUnavailablePeriod)), } } diff --git a/pkg/controllers/updaterun/execution.go b/pkg/controllers/updaterun/execution.go index 2ce101ee4..294a4005d 100644 --- a/pkg/controllers/updaterun/execution.go +++ b/pkg/controllers/updaterun/execution.go @@ -20,6 +20,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + bindingutils "go.goms.io/fleet/pkg/utils/binding" "go.goms.io/fleet/pkg/utils/condition" "go.goms.io/fleet/pkg/utils/controller" ) @@ -174,7 +175,7 @@ func (r *Reconciler) executeUpdatingStage( markStageUpdatingWaiting(updatingStageStatus, updateRun.Generation) klog.V(2).InfoS("The stage has finished all cluster updating", "stage", updatingStageStatus.StageName, "clusterStagedUpdateRun", updateRunRef) // Check if the after stage tasks are ready. - approved, err := r.checkAfterStageTasksStatus(ctx, updatingStageIndex, updateRun) + approved, waitTime, err := r.checkAfterStageTasksStatus(ctx, updatingStageIndex, updateRun) if err != nil { return 0, err } @@ -183,7 +184,11 @@ func (r *Reconciler) executeUpdatingStage( // No need to wait to get to the next stage. return 0, nil } - return stageUpdatingWaitTime, nil + // The after stage tasks are not ready yet. + if waitTime < 0 { + waitTime = stageUpdatingWaitTime + } + return waitTime, nil } return clusterUpdatingWaitTime, nil } @@ -253,22 +258,24 @@ func (r *Reconciler) executeDeleteStage( // checkAfterStageTasksStatus checks if the after stage tasks have finished. // Tt returns if the after stage tasks have finished or error if the after stage tasks failed. -func (r *Reconciler) checkAfterStageTasksStatus(ctx context.Context, updatingStageIndex int, updateRun *placementv1beta1.ClusterStagedUpdateRun) (bool, error) { +// It also returns the time to wait before rechecking the wait type of task. It turns -1 if the task is not a wait type. +func (r *Reconciler) checkAfterStageTasksStatus(ctx context.Context, updatingStageIndex int, updateRun *placementv1beta1.ClusterStagedUpdateRun) (bool, time.Duration, error) { updateRunRef := klog.KObj(updateRun) updatingStageStatus := &updateRun.Status.StagesStatus[updatingStageIndex] updatingStage := &updateRun.Status.StagedUpdateStrategySnapshot.Stages[updatingStageIndex] if updatingStage.AfterStageTasks == nil { klog.V(2).InfoS("There is no after stage task for this stage", "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return true, nil + return true, 0, nil } for i, task := range updatingStage.AfterStageTasks { switch task.Type { case placementv1beta1.AfterStageTaskTypeTimedWait: waitStartTime := meta.FindStatusCondition(updatingStageStatus.Conditions, string(placementv1beta1.StageUpdatingConditionProgressing)).LastTransitionTime.Time // Check if the wait time has passed. - if waitStartTime.Add(task.WaitTime.Duration).After(time.Now()) { + waitTime := time.Until(waitStartTime.Add(task.WaitTime.Duration)) + if waitTime > 0 { klog.V(2).InfoS("The after stage task still need to wait", "waitStartTime", waitStartTime, "waitTime", task.WaitTime, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return false, nil + return false, waitTime, nil } markAfterStageWaitTimeElapsed(&updatingStageStatus.AfterStageTaskStatus[i], updateRun.Generation) klog.V(2).InfoS("The after stage wait task has completed", "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) @@ -295,43 +302,43 @@ func (r *Reconciler) checkAfterStageTasksStatus(ctx context.Context, updatingSta markAfterStageRequestCreated(&updatingStageStatus.AfterStageTaskStatus[i], updateRun.Generation) if err = r.Client.Get(ctx, client.ObjectKeyFromObject(&approvalRequest), &approvalRequest); err != nil { klog.ErrorS(err, "Failed to get the already existing approval request", "approvalRequest", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return false, controller.NewAPIServerError(true, err) + return false, -1, controller.NewAPIServerError(true, err) } if approvalRequest.Spec.TargetStage != updatingStage.Name || approvalRequest.Spec.TargetUpdateRun != updateRun.Name { unexpectedErr := controller.NewUnexpectedBehaviorError(fmt.Errorf("the approval request task `%s` is targeting update run `%s` and stage `%s` ", approvalRequest.Name, approvalRequest.Spec.TargetStage, approvalRequest.Spec.TargetUpdateRun)) klog.ErrorS(unexpectedErr, "Found an approval request targeting wrong stage", "approvalRequestTask", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return false, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) + return false, -1, fmt.Errorf("%w: %s", errStagedUpdatedAborted, unexpectedErr.Error()) } approvalAccepted := condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequest.Status.Conditions, string(placementv1beta1.ApprovalRequestConditionApprovalAccepted)), approvalRequest.Generation) // Approved state should not change once the approval is accepted. if !approvalAccepted && !condition.IsConditionStatusTrue(meta.FindStatusCondition(approvalRequest.Status.Conditions, string(placementv1beta1.ApprovalRequestConditionApproved)), approvalRequest.Generation) { klog.V(2).InfoS("The approval request has not been approved yet", "approvalRequestTask", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return false, nil + return false, -1, nil } klog.V(2).InfoS("The approval request has been approved", "approvalRequestTask", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) if !approvalAccepted { - if err := r.updateApprovalRequestAccepted(ctx, &approvalRequest); err != nil { + if err = r.updateApprovalRequestAccepted(ctx, &approvalRequest); err != nil { klog.ErrorS(err, "Failed to accept the approved approval request", "approvalRequest", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) // retriable err - return false, err + return false, 0, err } } markAfterStageRequestApproved(&updatingStageStatus.AfterStageTaskStatus[i], updateRun.Generation) } else { // retriable error klog.ErrorS(err, "Failed to create the approval request", "approvalRequest", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) - return false, controller.NewAPIServerError(false, err) + return false, -1, controller.NewAPIServerError(false, err) } } else { // The approval request has been created for the first time. klog.V(2).InfoS("The approval request has been created", "approvalRequestTask", requestRef, "stage", updatingStage.Name, "clusterStagedUpdateRun", updateRunRef) markAfterStageRequestCreated(&updatingStageStatus.AfterStageTaskStatus[i], updateRun.Generation) - return false, nil + return false, -1, nil } } } // All the after stage tasks have been finished or the for loop will return before this line. - return true, nil + return true, 0, nil } // updateBindingRolloutStarted updates the binding status to indicate the rollout has started. @@ -367,7 +374,7 @@ func (r *Reconciler) updateApprovalRequestAccepted(ctx context.Context, appReq * klog.ErrorS(err, "Failed to update approval request status", "clusterApprovalRequest", klog.KObj(appReq), "condition", cond) return controller.NewUpdateIgnoreConflictError(err) } - klog.V(2).InfoS("Updated binding as rolloutStarted", "clusterApprovalRequest", klog.KObj(appReq), "condition", cond) + klog.V(2).InfoS("Updated approval request as approval accepted", "clusterApprovalRequest", klog.KObj(appReq), "condition", cond) return nil } @@ -407,15 +414,13 @@ func checkClusterUpdateResult( markClusterUpdatingSucceeded(clusterStatus, updateRun.Generation) return true, nil } - for i := condition.OverriddenCondition; i <= condition.AppliedCondition; i++ { - bindingCond := binding.GetCondition(string(i.ResourceBindingConditionType())) - if condition.IsConditionStatusFalse(bindingCond, binding.Generation) { - // We have no way to know if the failed condition is recoverable or not so we just let it run - klog.InfoS("The cluster updating encountered an error", "failedCondition", bindingCond, "cluster", clusterStatus.ClusterName, "stage", updatingStage.StageName, "clusterStagedUpdateRun", klog.KObj(updateRun)) - // TODO(wantjian): identify some non-recoverable error and mark the cluster updating as failed - return false, fmt.Errorf("the cluster updating encountered an error at stage `%s`, err := `%s`", string(i.ResourceBindingConditionType()), bindingCond.Message) - } + if bindingutils.HasBindingFailed(binding) { + // We have no way to know if the failed condition is recoverable or not so we just let it run + klog.InfoS("The cluster updating encountered an error", "cluster", clusterStatus.ClusterName, "stage", updatingStage.StageName, "clusterStagedUpdateRun", klog.KObj(updateRun)) + // TODO(wantjian): identify some non-recoverable error and mark the cluster updating as failed + return false, fmt.Errorf("the cluster updating encountered an error at stage `%s`, updateRun := `%s`", updatingStage.StageName, updateRun.Name) } + klog.InfoS("The application on the cluster is in the mid of being updated", "cluster", clusterStatus.ClusterName, "stage", updatingStage.StageName, "clusterStagedUpdateRun", klog.KObj(updateRun)) return false, nil } diff --git a/pkg/controllers/updaterun/execution_integration_test.go b/pkg/controllers/updaterun/execution_integration_test.go index 4160f12dc..bf7ab7379 100644 --- a/pkg/controllers/updaterun/execution_integration_test.go +++ b/pkg/controllers/updaterun/execution_integration_test.go @@ -134,7 +134,7 @@ var _ = Describe("UpdateRun execution tests", func() { By("Creating a new clusterStagedUpdateRun") Expect(k8sClient.Create(ctx, updateRun)).To(Succeed()) - By("Validating the initialization succeededand and the execution started") + By("Validating the initialization succeeded and the execution started") initialized := generateSucceededInitializationStatus(crp, updateRun, policySnapshot, updateStrategy, clusterResourceOverride) wantStatus = generateExecutionStartedStatus(updateRun, initialized) validateClusterStagedUpdateRunStatus(ctx, updateRun, wantStatus, "") diff --git a/pkg/controllers/updaterun/initialization.go b/pkg/controllers/updaterun/initialization.go index 60ff3d234..51e24e277 100644 --- a/pkg/controllers/updaterun/initialization.go +++ b/pkg/controllers/updaterun/initialization.go @@ -192,7 +192,7 @@ func (r *Reconciler) collectScheduledClusters( // no more retries here. return nil, nil, fmt.Errorf("%w: %s", errInitializedFailed, stateErr.Error()) } - klog.V(2).InfoS("Found a to-be-deleted binding", "binding", binding.Name, "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef) + klog.V(2).InfoS("Found a to-be-deleted binding", "binding", binding.Name, "cluster", binding.Spec.TargetCluster, "clusterResourcePlacement", placementName, "latestPolicySnapshot", latestPolicySnapshot.Name, "clusterStagedUpdateRun", updateRunRef) toBeDeletedBindings = append(toBeDeletedBindings, &bindingList.Items[i]) } } diff --git a/pkg/controllers/workgenerator/controller.go b/pkg/controllers/workgenerator/controller.go index 1453b212a..8fae80bbc 100644 --- a/pkg/controllers/workgenerator/controller.go +++ b/pkg/controllers/workgenerator/controller.go @@ -176,6 +176,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req controllerruntime.Reques if syncErr != nil { klog.ErrorS(syncErr, "Failed to sync all the works", "resourceBinding", bindingRef) + //TODO: check if it's user error and set a different failed reason errorMessage := syncErr.Error() // unwrap will return nil if syncErr is not wrapped // the wrapped error string format is "%w: %s" so that remove ": " from messages @@ -300,7 +301,8 @@ func (r *Reconciler) updateBindingStatusWithRetry(ctx context.Context, resourceB } return nil } - return err + klog.V(2).InfoS("Successfully updated the resourceBinding status", "resourceBinding", bindingRef, "resourceBindingStatus", resourceBinding.Status) + return nil } // handleDelete handle a deleting binding diff --git a/pkg/utils/binding/binding.go b/pkg/utils/binding/binding.go index 0030405ec..f5d6cdd4a 100644 --- a/pkg/utils/binding/binding.go +++ b/pkg/utils/binding/binding.go @@ -6,16 +6,20 @@ Licensed under the MIT license. package binding import ( + "k8s.io/klog/v2" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" "go.goms.io/fleet/pkg/utils/condition" ) -// HasBindingFailed checks if ClusterResourceBinding has failed based on its applied and available conditions. +// HasBindingFailed checks if ClusterResourceBinding has failed based on its conditions. func HasBindingFailed(binding *placementv1beta1.ClusterResourceBinding) bool { - appliedCondition := binding.GetCondition(string(placementv1beta1.ResourceBindingApplied)) - availableCondition := binding.GetCondition(string(placementv1beta1.ResourceBindingAvailable)) - if condition.IsConditionStatusFalse(appliedCondition, binding.Generation) || condition.IsConditionStatusFalse(availableCondition, binding.Generation) { - return true + for i := condition.OverriddenCondition; i <= condition.AvailableCondition; i++ { + if condition.IsConditionStatusFalse(binding.GetCondition(string(i.ResourceBindingConditionType())), binding.Generation) { + // TODO: parse the reason of the condition to see if the failure is recoverable/retriable or not + klog.V(2).Infof("binding %s has condition %s with status false", binding.Name, string(i.ResourceBindingConditionType())) + return true + } } return false } diff --git a/pkg/utils/binding/binding_test.go b/pkg/utils/binding/binding_test.go index 68e77c459..a94925a84 100644 --- a/pkg/utils/binding/binding_test.go +++ b/pkg/utils/binding/binding_test.go @@ -20,7 +20,7 @@ func TestHasBindingFailed(t *testing.T) { want bool }{ { - name: "apply, available conditions not set for binding", + name: "binding should not fail if no conditions is set for binding", binding: &placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "test-binding", @@ -33,7 +33,7 @@ func TestHasBindingFailed(t *testing.T) { want: false, }, { - name: "apply failed binding", + name: "binding should fail if binding's apply condition is false", binding: &placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "test-binding", @@ -58,7 +58,7 @@ func TestHasBindingFailed(t *testing.T) { want: true, }, { - name: "non available binding", + name: "binding should fail if binding's available condition is false", binding: &placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "test-binding", @@ -91,7 +91,7 @@ func TestHasBindingFailed(t *testing.T) { want: true, }, { - name: "available binding", + name: "binding should NOT fail if binding's conditions are true", binding: &placementv1beta1.ClusterResourceBinding{ ObjectMeta: metav1.ObjectMeta{ Name: "test-binding", @@ -123,6 +123,187 @@ func TestHasBindingFailed(t *testing.T) { }, want: false, }, + { + name: "binding with overridden condition false", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 1, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenFailed", + Message: "test message", + }, + }, + }, + }, + want: true, + }, + { + name: "binding with overridden condition true", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 1, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenSucceeded", + Message: "test message", + }, + }, + }, + }, + want: false, + }, + { + name: "binding with multiple conditions including overridden false", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 1, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingApplied), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "applySucceeded", + Message: "test message", + }, + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenFailed", + Message: "test message", + }, + }, + }, + }, + want: true, + }, + { + name: "binding with multiple conditions all true", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 1, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingApplied), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "applySucceeded", + Message: "test message", + }, + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenSucceeded", + Message: "test message", + }, + { + Type: string(placementv1beta1.ResourceBindingWorkSynchronized), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "resourceBindingWorkSynchronized", + Message: "test message", + }, + + { + Type: string(placementv1beta1.ResourceBindingAvailable), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "available", + Message: "test message", + }, + }, + }, + }, + want: false, + }, + { + name: "binding with multiple conditions including overridden false and different generation", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 2, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingApplied), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "applySucceeded", + Message: "test message", + }, + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenFailed", + Message: "test message", + }, + }, + }, + }, + want: false, + }, + { + name: "binding with multiple conditions all true and different generation", + binding: &placementv1beta1.ClusterResourceBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-binding", + Generation: 2, + }, + Status: placementv1beta1.ResourceBindingStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceBindingApplied), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "applySucceeded", + Message: "test message", + }, + { + Type: string(placementv1beta1.ResourceBindingOverridden), + Status: metav1.ConditionTrue, + LastTransitionTime: metav1.Time{}, + ObservedGeneration: 1, + Reason: "overriddenSucceeded", + Message: "test message", + }, + }, + }, + }, + want: false, + }, } for _, tc := range tests { diff --git a/pkg/utils/condition/condition.go b/pkg/utils/condition/condition.go index ea44fd26d..da1fefe02 100644 --- a/pkg/utils/condition/condition.go +++ b/pkg/utils/condition/condition.go @@ -80,6 +80,8 @@ const ( // DiffReportedStatusTrueReason is the reason string of the DiffReported condition when the // diff reporting has been fully completed. DiffReportedStatusTrueReason = "DiffReportingCompleted" + + // TODO: Add a user error reason ) // A group of condition reason string which is used to populate the placement condition per cluster. diff --git a/test/e2e/join_and_leave_test.go b/test/e2e/join_and_leave_test.go index 2830ec914..078c4e1bd 100644 --- a/test/e2e/join_and_leave_test.go +++ b/test/e2e/join_and_leave_test.go @@ -137,13 +137,13 @@ var _ = Describe("Test member cluster force delete flow", Ordered, Serial, func( Expect(hubClient.Get(ctx, types.NamespacedName{Name: memberCluster3WestProdName}, &mc)).To(Succeed(), "Failed to get member cluster") Expect(hubClient.Delete(ctx, &mc)).Should(Succeed()) }) - + // we set force delete time at 1 minute in the test env It("Should garbage collect resources owned by member cluster and force delete member cluster CR after force delete wait time", func() { memberClusterNamespace := fmt.Sprintf(utils.NamespaceNameFormat, memberCluster3WestProdName) Eventually(func() bool { var ns corev1.Namespace return apierrors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: memberClusterNamespace}, &ns)) - }, eventuallyDuration, eventuallyInterval).Should(BeTrue(), "Failed to garbage collect resources owned by member cluster") + }, longEventuallyDuration, eventuallyInterval).Should(BeTrue(), "Failed to garbage collect resources owned by member cluster") Eventually(func() bool { return apierrors.IsNotFound(hubClient.Get(ctx, types.NamespacedName{Name: memberCluster3WestProdName}, &clusterv1beta1.MemberCluster{})) diff --git a/test/e2e/placement_cro_test.go b/test/e2e/placement_cro_test.go index 9fba4ce63..117f4f876 100644 --- a/test/e2e/placement_cro_test.go +++ b/test/e2e/placement_cro_test.go @@ -267,7 +267,7 @@ var _ = Describe("creating clusterResourceOverride with different rules for each ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envProd}, }, }, }, @@ -285,7 +285,7 @@ var _ = Describe("creating clusterResourceOverride with different rules for each ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue2}, + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envCanary}, }, }, }, @@ -303,7 +303,7 @@ var _ = Describe("creating clusterResourceOverride with different rules for each ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue2, envLabelName: envLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionWest, envLabelName: envProd}, }, }, }, @@ -602,7 +602,7 @@ var _ = Describe("creating clusterResourceOverride with delete rules for one clu ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionEast}, }, }, }, @@ -621,7 +621,7 @@ var _ = Describe("creating clusterResourceOverride with delete rules for one clu ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue2}, + MatchLabels: map[string]string{regionLabelName: regionWest}, }, }, }, diff --git a/test/e2e/placement_pickall_test.go b/test/e2e/placement_pickall_test.go index 9f099be5d..9f11e1af4 100644 --- a/test/e2e/placement_pickall_test.go +++ b/test/e2e/placement_pickall_test.go @@ -131,14 +131,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue1, + envProd, }, }, }, @@ -201,14 +201,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue1, + envProd, }, }, }, @@ -250,14 +250,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue2, + regionLabelName: regionWest, }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue1, + envProd, }, }, }, @@ -317,14 +317,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue2, + regionLabelName: regionWest, }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue2, + envCanary, }, }, }, @@ -690,7 +690,7 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, PropertySelector: &placementv1beta1.PropertySelector{ @@ -766,7 +766,7 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, PropertySelector: &placementv1beta1.PropertySelector{ @@ -826,14 +826,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, MatchExpressions: []metav1.LabelSelectorRequirement{ { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue2, + envCanary, }, }, }, @@ -857,14 +857,14 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func Key: regionLabelName, Operator: metav1.LabelSelectorOpNotIn, Values: []string{ - regionLabelValue2, + regionWest, }, }, { Key: envLabelName, Operator: metav1.LabelSelectorOpIn, Values: []string{ - envLabelValue1, + envProd, }, }, }, @@ -946,7 +946,7 @@ var _ = Describe("placing resources using a CRP of PickAll placement type", func { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, PropertySelector: &placementv1beta1.PropertySelector{ diff --git a/test/e2e/placement_pickn_test.go b/test/e2e/placement_pickn_test.go index 6e5abaafc..e4efe0182 100644 --- a/test/e2e/placement_pickn_test.go +++ b/test/e2e/placement_pickn_test.go @@ -244,7 +244,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, }, @@ -316,7 +316,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, }, @@ -367,7 +367,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { Preference: placementv1beta1.ClusterSelectorTerm{ LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - envLabelName: envLabelValue1, + envLabelName: envProd, }, }, }, @@ -385,10 +385,10 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { return hubClient.Update(ctx, crp) }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP with new affinity and topology spread constraints") }) - + // topology spread constraints takes a bit longer to be applied It("should update CRP status as expected", func() { statusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), []string{memberCluster1EastProdName, memberCluster3WestProdName}, nil, "0") - Eventually(statusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(statusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place resources on the newly picked clusters", func() { @@ -445,7 +445,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, }, @@ -750,7 +750,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { Preference: placementv1beta1.ClusterSelectorTerm{ LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, PropertySorter: &placementv1beta1.PropertySorter{ @@ -764,7 +764,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { Preference: placementv1beta1.ClusterSelectorTerm{ LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - envLabelName: envLabelValue2, + envLabelName: envCanary, }, }, PropertySorter: &placementv1beta1.PropertySorter{ @@ -843,7 +843,7 @@ var _ = Describe("placing resources using a CRP of PickN placement", func() { { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - envLabelName: envLabelValue1, + envLabelName: envProd, }, }, PropertySelector: &placementv1beta1.PropertySelector{ diff --git a/test/e2e/placement_ro_test.go b/test/e2e/placement_ro_test.go index 7ad9493ed..140c57032 100644 --- a/test/e2e/placement_ro_test.go +++ b/test/e2e/placement_ro_test.go @@ -259,7 +259,7 @@ var _ = Describe("creating resourceOverride with different rules for each cluste ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envProd}, }, }, }, @@ -277,7 +277,7 @@ var _ = Describe("creating resourceOverride with different rules for each cluste ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue2}, + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envCanary}, }, }, }, @@ -295,7 +295,7 @@ var _ = Describe("creating resourceOverride with different rules for each cluste ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue2, envLabelName: envLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionWest, envLabelName: envProd}, }, }, }, @@ -693,7 +693,7 @@ var _ = Describe("creating resourceOverride with delete configMap", Ordered, fun ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1}, + MatchLabels: map[string]string{regionLabelName: regionEast}, }, }, }, @@ -711,7 +711,7 @@ var _ = Describe("creating resourceOverride with delete configMap", Ordered, fun ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue2}, + MatchLabels: map[string]string{regionLabelName: regionWest}, }, }, }, diff --git a/test/e2e/resources_test.go b/test/e2e/resources_test.go index 0df723b2f..988d45a8d 100644 --- a/test/e2e/resources_test.go +++ b/test/e2e/resources_test.go @@ -17,6 +17,7 @@ import ( "k8s.io/utils/ptr" fleetnetworkingv1alpha1 "go.goms.io/fleet-networking/api/v1alpha1" + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" ) @@ -37,7 +38,7 @@ const ( updateRunStrategyNameTemplate = "curs-%d" updateRunNameWithSubIndexTemplate = "cur-%d-%d" - customDeletionBlockerFinalizer = "custom-deletion-blocker-finalizer" + customDeletionBlockerFinalizer = "kubernetes-fleet.io/custom-deletion-blocker-finalizer" workNamespaceLabelName = "process" ) diff --git a/test/e2e/rollout_test.go b/test/e2e/rollout_test.go index 8fb3253ef..91c38b365 100644 --- a/test/e2e/rollout_test.go +++ b/test/e2e/rollout_test.go @@ -91,7 +91,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := crpStatusUpdatedActual(wantSelectedResources, allMemberClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { @@ -187,7 +187,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := crpStatusUpdatedActual(wantSelectedResources, allMemberClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { @@ -219,7 +219,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { Namespace: testDeployment.Namespace, } crpStatusActual := safeRolloutWorkloadCRPStatusUpdatedActual(wantSelectedResources, failedDeploymentResourceIdentifier, allMemberClusterNames, "1", 2) - Eventually(crpStatusActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -265,14 +265,14 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "0", true) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { for idx := range allMemberClusters { memberCluster := allMemberClusters[idx] workResourcesPlacedActual := waitForDaemonSetPlacementToReady(memberCluster, &testDaemonSet) - Eventually(workResourcesPlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + Eventually(workResourcesPlacedActual, eventuallyInterval, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) } }) @@ -285,7 +285,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { } testEnvelopeDaemonSet.Data["daemonset.yaml"] = string(daemonSetByte) return hubClient.Update(ctx, &testEnvelopeDaemonSet) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to change the image name of daemonset in envelope object") + }, eventuallyInterval, eventuallyInterval).Should(Succeed(), "Failed to change the image name of daemonset in envelope object") }) It("should update CRP status as expected", func() { @@ -302,7 +302,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { }, } crpStatusActual := safeRolloutWorkloadCRPStatusUpdatedActual(wantSelectedResources, failedDaemonSetResourceIdentifier, allMemberClusterNames, "1", 2) - Eventually(crpStatusActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -348,14 +348,14 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "0", true) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 2*workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { for idx := range allMemberClusters { memberCluster := allMemberClusters[idx] workResourcesPlacedActual := waitForStatefulSetPlacementToReady(memberCluster, &testStatefulSet) - Eventually(workResourcesPlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + Eventually(workResourcesPlacedActual, eventuallyInterval, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) } }) @@ -368,7 +368,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { } testEnvelopeStatefulSet.Data["statefulset.yaml"] = string(daemonSetByte) return hubClient.Update(ctx, &testEnvelopeStatefulSet) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to change the image name in statefulset") + }, eventuallyInterval, eventuallyInterval).Should(Succeed(), "Failed to change the image name in statefulset") }) It("should update CRP status as expected", func() { @@ -385,7 +385,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { }, } crpStatusActual := safeRolloutWorkloadCRPStatusUpdatedActual(wantSelectedResources, failedStatefulSetResourceIdentifier, allMemberClusterNames, "1", 2) - Eventually(crpStatusActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -431,7 +431,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := crpStatusUpdatedActual(wantSelectedResources, allMemberClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { @@ -464,7 +464,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { } // failedResourceObservedGeneration is set to 0 because generation is not populated for service. crpStatusActual := safeRolloutWorkloadCRPStatusUpdatedActual(wantSelectedResources, failedDeploymentResourceIdentifier, allMemberClusterNames, "1", 0) - Eventually(crpStatusActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -513,7 +513,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := crpStatusUpdatedActual(wantSelectedResources, allMemberClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { @@ -545,7 +545,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { Namespace: testDeployment.Namespace, } crpStatusActual := safeRolloutWorkloadCRPStatusUpdatedActual(wantSelectedResources, failedDeploymentResourceIdentifier, allMemberClusterNames, "1", 2) - Eventually(crpStatusActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("update work to trigger a work generator reconcile", func() { @@ -610,7 +610,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { if err != nil { return err } - testDeployment.Spec.Template.Spec.Containers[0].Image = "1.26.2" + testDeployment.Spec.Template.Spec.Containers[0].Image = "nginx:1.26.2" return hubClient.Update(ctx, &testDeployment) }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to change the image name in deployment") }) @@ -619,7 +619,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { for idx := range allMemberClusters { memberCluster := allMemberClusters[idx] workResourcesPlacedActual := waitForDeploymentPlacementToReady(memberCluster, &testDeployment) - Eventually(workResourcesPlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + Eventually(workResourcesPlacedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) } }) @@ -634,7 +634,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { workNamespace := appNamespace() var wantSelectedResources []placementv1beta1.ResourceIdentifier var testJob batchv1.Job - + unAvailablePeriodSeconds := 15 BeforeAll(func() { // Create the test resources. readJobTestManifest(&testJob) @@ -665,13 +665,13 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { // the job we are trying to propagate takes 10s to complete. MaxUnavailable is set to 1. So setting UnavailablePeriodSeconds to 15s // so that after each rollout phase we only wait for 15s before proceeding to the next since Job is not trackable, // we want rollout to finish in a reasonable time. - crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(15) + crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(unAvailablePeriodSeconds) Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") }) It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "0", false) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 2*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on all member clusters", func() { @@ -693,10 +693,10 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { return hubClient.Update(ctx, &job) }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to suspend job") }) - + // job is not trackable, so we need to wait for a bit longer for each roll out It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "1", false) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 5*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -711,7 +711,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { var wantSelectedResources []placementv1beta1.ResourceIdentifier var testCustomResource testv1alpha1.TestResource var crp *placementv1beta1.ClusterResourcePlacement - + unAvailablePeriodSeconds := 30 BeforeAll(func() { // Create the test resources. readTestCustomResource(&testCustomResource) @@ -758,13 +758,13 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { memberCluster1EastProdName, }, } - crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(60) + crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(unAvailablePeriodSeconds) Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") }) It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, []string{memberCluster1EastProdName}, nil, "0", false) - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 2*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on member cluster", func() { @@ -803,7 +803,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, []string{memberCluster1EastProdName}, nil, "1", false) - Eventually(crpStatusUpdatedActual, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 4*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { @@ -818,6 +818,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { var wantSelectedResources []placementv1beta1.ResourceIdentifier var testCustomResource testv1alpha1.TestResource var crp *placementv1beta1.ClusterResourcePlacement + unAvailablePeriodSeconds := 30 BeforeAll(func() { // Create the test resources. @@ -862,13 +863,13 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { crp.Spec.Policy = &placementv1beta1.PlacementPolicy{ PlacementType: placementv1beta1.PickAllPlacementType, } - crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(60) + crp.Spec.Strategy.RollingUpdate.UnavailablePeriodSeconds = ptr.To(unAvailablePeriodSeconds) Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") }) It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "0", false) - Eventually(crpStatusUpdatedActual, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 2*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) It("should place the resources on member clusters", func() { @@ -931,7 +932,7 @@ var _ = Describe("placing wrapped resources using a CRP", Ordered, func() { It("should update CRP status as expected", func() { crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantSelectedResources, allMemberClusterNames, nil, "1", false) - Eventually(crpStatusUpdatedActual, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 4*time.Duration(unAvailablePeriodSeconds)*time.Second, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) AfterAll(func() { diff --git a/test/e2e/scheduler_watchers_test.go b/test/e2e/scheduler_watchers_test.go index c0d468177..79a406352 100644 --- a/test/e2e/scheduler_watchers_test.go +++ b/test/e2e/scheduler_watchers_test.go @@ -271,11 +271,11 @@ var _ = Describe("responding to specific member cluster changes", func() { return memberCluster3WestProdClient.Create(ctx, node) }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create a new node") }) - + // need to wait for the resource change to be propagated back to the hub cluster It("should pick the new cluster", func() { targetClusterNames := []string{memberCluster3WestProdName} crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), targetClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, 2*workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) @@ -1236,7 +1236,7 @@ var _ = Describe("responding to specific member cluster changes", func() { }) It("can add a new member cluster in a region which would violate the topology spread constraint", func() { - createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionLabelValue1}, nil) + createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionEast}, nil) markMemberClusterAsHealthy(fakeClusterName1ForWatcherTests) }) @@ -1247,7 +1247,7 @@ var _ = Describe("responding to specific member cluster changes", func() { }) It("can add a new member cluster in a region which would re-balance the topology spread", func() { - createMemberCluster(fakeClusterName2ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionLabelValue2}, nil) + createMemberCluster(fakeClusterName2ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionWest}, nil) markMemberClusterAsHealthy(fakeClusterName2ForWatcherTests) }) @@ -1353,13 +1353,13 @@ var _ = Describe("responding to specific member cluster changes", func() { corev1.ResourceMemory: resource.MustParse("10000Gi"), } return memberCluster3WestProdClient.Status().Update(ctx, node) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create and update a new node") + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to create and update a new node") }) It("should pick the new cluster", func() { targetClusterNames := []string{memberCluster3WestProdName} crpStatusUpdatedActual := crpStatusUpdatedActual(workResourceIdentifiers(), targetClusterNames, nil, "0") - Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + Eventually(crpStatusUpdatedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") }) @@ -1439,7 +1439,7 @@ var _ = Describe("responding to specific member cluster changes", func() { }) It("can add a new member cluster in a region which would violate the topology spread constraint", func() { - createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionLabelValue1}, nil) + createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionEast}, nil) markMemberClusterAsHealthy(fakeClusterName1ForWatcherTests) }) @@ -1694,8 +1694,8 @@ var _ = Describe("responding to specific member cluster changes", func() { createWorkResources() // Create new member clusters. - createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionLabelValue1}, nil) - createMemberCluster(fakeClusterName2ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionLabelValue2}, nil) + createMemberCluster(fakeClusterName1ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionEast}, nil) + createMemberCluster(fakeClusterName2ForWatcherTests, hubClusterSAName, map[string]string{regionLabelName: regionWest}, nil) // Mark the newly created member clusters as healthy. markMemberClusterAsHealthy(fakeClusterName1ForWatcherTests) markMemberClusterAsHealthy(fakeClusterName2ForWatcherTests) diff --git a/test/e2e/setup_test.go b/test/e2e/setup_test.go index e1085ea48..15768528a 100644 --- a/test/e2e/setup_test.go +++ b/test/e2e/setup_test.go @@ -70,8 +70,12 @@ const ( ) const ( - eventuallyDuration = time.Minute * 2 - longEventuallyDuration = time.Minute * 5 + // Do not bump this value unless you have a good reason. This is to safeguard any performance related regressions. + eventuallyDuration = time.Second * 10 + // this is for workload related test cases + workloadEventuallyDuration = time.Second * 45 + // This is for cluster setup. + longEventuallyDuration = time.Minute * 2 eventuallyInterval = time.Millisecond * 250 consistentlyDuration = time.Second * 15 consistentlyInterval = time.Millisecond * 500 @@ -98,29 +102,29 @@ var ( ) var ( - regionLabelName = "region" - regionLabelValue1 = "east" - regionLabelValue2 = "west" - regionLabelValue3 = "south" - envLabelName = "env" - envLabelValue1 = "prod" - envLabelValue2 = "canary" - clusterID1 = "test-cluster-id-1" - clusterID2 = "test-cluster-id-2" - clusterID3 = "test-cluster-id-3" + regionLabelName = "region" + regionEast = "east" + regionWest = "west" + regionSouth = "south" + envLabelName = "env" + envProd = "prod" + envCanary = "canary" + clusterID1 = "test-cluster-id-1" + clusterID2 = "test-cluster-id-2" + clusterID3 = "test-cluster-id-3" labelsByClusterName = map[string]map[string]string{ memberCluster1EastProdName: { - regionLabelName: regionLabelValue1, - envLabelName: envLabelValue1, + regionLabelName: regionEast, + envLabelName: envProd, }, memberCluster2EastCanaryName: { - regionLabelName: regionLabelValue1, - envLabelName: envLabelValue2, + regionLabelName: regionEast, + envLabelName: envCanary, }, memberCluster3WestProdName: { - regionLabelName: regionLabelValue2, - envLabelName: envLabelValue1, + regionLabelName: regionWest, + envLabelName: envProd, }, } annotationsByClusterName = map[string]map[string]string{ @@ -137,13 +141,13 @@ var ( taintTolerationMap = map[string]map[string]string{ memberCluster1EastProdName: { - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, memberCluster2EastCanaryName: { - regionLabelName: regionLabelValue2, + regionLabelName: regionWest, }, memberCluster3WestProdName: { - regionLabelName: regionLabelValue3, + regionLabelName: regionSouth, }, } ) diff --git a/test/e2e/taint_toleration_test.go b/test/e2e/taint_toleration_test.go index a17e9d5af..18f1a42d6 100644 --- a/test/e2e/taint_toleration_test.go +++ b/test/e2e/taint_toleration_test.go @@ -254,7 +254,7 @@ var _ = Describe("picking N clusters with affinities and topology spread constra { LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - regionLabelName: regionLabelValue1, + regionLabelName: regionEast, }, }, }, diff --git a/test/e2e/updaterun_test.go b/test/e2e/updaterun_test.go index 28fec1608..1389be6b0 100644 --- a/test/e2e/updaterun_test.go +++ b/test/e2e/updaterun_test.go @@ -24,10 +24,8 @@ import ( ) const ( - // The current stage wait duration is 1 minute. - // It might not be enough to wait for the updateRun to reach the wanted state with existing eventuallyDurtation, 2 minutes. - // Here we extend it to 3 minutes. - updateRunEventuallyDuration = time.Minute * 3 + // The current stage wait between clusters are 15 seconds + updateRunEventuallyDuration = time.Minute resourceSnapshotIndex1st = "0" resourceSnapshotIndex2nd = "1" @@ -107,15 +105,19 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[1]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envLabelValue2) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary) }) - It("Should rollout resources to member-cluster-1 and member-cluster-3 too and complete the staged update run successfully", func() { - checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters) + It("Should rollout resources to member-cluster-1 first because of its name", func() { + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0]}) + }) + + It("Should rollout resources to all the members and complete the staged update run successfully", func() { updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[0], policySnapshotIndex1st, len(allMemberClusters), nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil) Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) It("Should update the configmap successfully on hub but not change member clusters", func() { @@ -131,13 +133,13 @@ var _ = Describe("test CRP rollout with staged update run", func() { crsList := &placementv1beta1.ClusterResourceSnapshotList{} Eventually(func() error { if err := hubClient.List(ctx, crsList, client.MatchingLabels{placementv1beta1.CRPTrackingLabel: crpName, placementv1beta1.IsLatestSnapshotLabel: "true"}); err != nil { - return fmt.Errorf("Failed to list the resourcesnapshot: %w", err) + return fmt.Errorf("failed to list the resourcesnapshot: %w", err) } if len(crsList.Items) != 1 { - return fmt.Errorf("Got %d latest resourcesnapshots, want 1", len(crsList.Items)) + return fmt.Errorf("got %d latest resourcesnapshots, want 1", len(crsList.Items)) } if crsList.Items[0].Labels[placementv1beta1.ResourceIndexLabel] != resourceSnapshotIndex2nd { - return fmt.Errorf("Got resource snapshot index %s, want %s", crsList.Items[0].Labels[placementv1beta1.ResourceIndexLabel], resourceSnapshotIndex2nd) + return fmt.Errorf("got resource snapshot index %s, want %s", crsList.Items[0].Labels[placementv1beta1.ResourceIndexLabel], resourceSnapshotIndex2nd) } return nil }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed get the new latest resourcensnapshot") @@ -148,18 +150,25 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[1]}) + By("Verify that new the configmap is updated on member-cluster-2") + configMapActual := configMapPlacedOnClusterActual(allMemberClusters[1], &newConfigMap) + Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to rollback the configmap change on cluster %s", allMemberClusterNames[1]) + By("Verify that the configmap is not updated on member-cluster-1 and member-cluster-3") for _, cluster := range []*framework.Cluster{allMemberClusters[0], allMemberClusters[2]} { configMapActual := configMapPlacedOnClusterActual(cluster, &oldConfigMap) - Consistently(configMapActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep configmap %s data as expected", oldConfigMap.Name) + Consistently(configMapActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep configmap %s data as expected", newConfigMap.Name) } - validateAndApproveClusterApprovalRequests(updateRunNames[1], envLabelValue2) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary) }) It("Should rollout resources to member-cluster-1 and member-cluster-3 too and complete the staged update run successfully", func() { - checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters) updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[1], policySnapshotIndex1st, len(allMemberClusters), nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil) Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) + By("Verify that new the configmap is updated on all member clusters") + for idx := range allMemberClusters { + configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &newConfigMap) + Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to rollback the configmap %s data on cluster %s as expected", oldConfigMap.Name, allMemberClusterNames[idx]) + } }) It("Should create a new staged update run with old resourceSnapshotIndex successfully to rollback", func() { @@ -167,20 +176,24 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollback resources to member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[1]}) + By("Verify that the configmap is rolled back on member-cluster-2") configMapActual := configMapPlacedOnClusterActual(allMemberClusters[1], &oldConfigMap) Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to rollback the configmap change on cluster %s", allMemberClusterNames[1]) - checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) - validateAndApproveClusterApprovalRequests(updateRunNames[2], envLabelValue2) + By("Verify that the configmap is not rolled back on member-cluster-1 and member-cluster-3") + for _, cluster := range []*framework.Cluster{allMemberClusters[0], allMemberClusters[2]} { + configMapActual := configMapPlacedOnClusterActual(cluster, &newConfigMap) + Consistently(configMapActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to keep configmap %s data as expected", newConfigMap.Name) + } + validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary) }) It("Should rollback resources to member-cluster-1 and member-cluster-3 too and complete the staged update run successfully", func() { + updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[2], policySnapshotIndex1st, len(allMemberClusters), nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil) + Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) for idx := range allMemberClusters { configMapActual := configMapPlacedOnClusterActual(allMemberClusters[idx], &oldConfigMap) Eventually(configMapActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to rollback the configmap %s data on cluster %s as expected", oldConfigMap.Name, allMemberClusterNames[idx]) } - updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[2], policySnapshotIndex1st, len(allMemberClusters), nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil) - Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) }) }) @@ -249,19 +262,19 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[1]}) - validateAndApproveClusterApprovalRequests(updateRunNames[0], envLabelValue2) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) + validateAndApproveClusterApprovalRequests(updateRunNames[0], envCanary) }) It("Should rollout resources to member-cluster-1 too but not member-cluster-3 and complete the staged update run successfully", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[0], policySnapshotIndex1st, 2, nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0]}}, nil, nil, nil) Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[0]) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) }) - It("Should update the crp to pick member-cluster-3 too", func() { + It("Update the crp to pick member-cluster-3 too", func() { Eventually(func() error { crp := &placementv1beta1.ClusterResourcePlacement{} if err := hubClient.Get(ctx, client.ObjectKey{Name: crpName}, crp); err != nil { @@ -281,22 +294,24 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should still have resources on member-cluster-1 and member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) - validateAndApproveClusterApprovalRequests(updateRunNames[1], envLabelValue2) + // this check is meaningless as resources were already placed on member-cluster-1 and member-cluster-2 + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) + // TODO: need a way to check the status of staged update run that are have member-cluster-1 and member-cluster-2 updated checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) + validateAndApproveClusterApprovalRequests(updateRunNames[1], envCanary) }) It("Should rollout resources to member-cluster-3 too and complete the staged update run successfully", func() { - checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters) updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[1], policySnapshotIndex2nd, 3, nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, nil, nil) Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[1]) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) - It("Should update the crp to only keep member-cluster-3", func() { + It("Update the crp to only keep member-cluster-3", func() { Eventually(func() error { crp := &placementv1beta1.ClusterResourcePlacement{} if err := hubClient.Get(ctx, client.ObjectKey{Name: crpName}, crp); err != nil { - return fmt.Errorf("Failed to get the crp: %w", err) + return fmt.Errorf("failed to get the crp: %w", err) } crp.Spec.Policy.ClusterNames = []string{allMemberClusterNames[2]} return hubClient.Update(ctx, crp) @@ -313,14 +328,15 @@ var _ = Describe("test CRP rollout with staged update run", func() { It("Should still have resources on all member clusters and complete stage canary", func() { checkIfPlacedWorkResourcesOnMemberClustersConsistently(allMemberClusters) - validateAndApproveClusterApprovalRequests(updateRunNames[2], envLabelValue2) + validateAndApproveClusterApprovalRequests(updateRunNames[2], envCanary) }) It("Should remove resources on member-cluster-1 and member-cluster-2 and complete the staged update run successfully", func() { + // need to go through two stages + updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[2], policySnapshotIndex3rd, 1, nil, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil) + Eventually(updateRunSucceededActual, 2*updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) checkIfRemovedWorkResourcesFromMemberClusters([]*framework.Cluster{allMemberClusters[0], allMemberClusters[1]}) checkIfPlacedWorkResourcesOnMemberClustersConsistently([]*framework.Cluster{allMemberClusters[2]}) - updateRunSucceededActual := updateRunStatusSucceededActual(updateRunNames[2], policySnapshotIndex3rd, 1, nil, &strategy.Spec, [][]string{{}, {allMemberClusterNames[2]}}, []string{allMemberClusterNames[0], allMemberClusterNames[1]}, nil, nil) - Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunNames[2]) }) }) @@ -349,7 +365,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue1}, // member-cluster-1 + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envProd}, // member-cluster-1 }, }, }, @@ -383,7 +399,7 @@ var _ = Describe("test CRP rollout with staged update run", func() { ClusterSelectorTerms: []placementv1beta1.ClusterSelectorTerm{ { LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{regionLabelName: regionLabelValue1, envLabelName: envLabelValue2}, + MatchLabels: map[string]string{regionLabelName: regionEast, envLabelName: envCanary}, }, }, }, @@ -458,17 +474,17 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) It("Should rollout resources to member-cluster-2 only and completes stage canary", func() { - checkIfPlacedWorkResourcesOnMemberClusters([]*framework.Cluster{allMemberClusters[1]}) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun([]*framework.Cluster{allMemberClusters[1]}) checkIfRemovedWorkResourcesFromMemberClustersConsistently([]*framework.Cluster{allMemberClusters[0], allMemberClusters[2]}) - validateAndApproveClusterApprovalRequests(updateRunName, envLabelValue2) + validateAndApproveClusterApprovalRequests(updateRunName, envCanary) }) It("Should rollout resources to member-cluster-1 and member-cluster-3 too and complete the staged update run successfully", func() { - checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters) wantCROs := map[string][]string{allMemberClusterNames[0]: {croName + "-0"}} // with override snapshot index 0 wantROs := map[string][]placementv1beta1.NamespacedName{allMemberClusterNames[1]: {placementv1beta1.NamespacedName{Namespace: roNamespace, Name: roName + "-0"}}} // with override snapshot index 0 updateRunSucceededActual := updateRunStatusSucceededActual(updateRunName, policySnapshotIndex1st, len(allMemberClusters), nil, &strategy.Spec, [][]string{{allMemberClusterNames[1]}, {allMemberClusterNames[0], allMemberClusterNames[2]}}, nil, wantCROs, wantROs) Eventually(updateRunSucceededActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to validate updateRun %s succeeded", updateRunName) + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) }) It("should have override annotations on the member cluster 1 and member cluster 2", func() { @@ -481,6 +497,18 @@ var _ = Describe("test CRP rollout with staged update run", func() { }) }) +func checkIfPlacedWorkResourcesOnAllMemberClusters() { + checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(allMemberClusters) +} + +func checkIfPlacedWorkResourcesOnMemberClustersInUpdateRun(clusters []*framework.Cluster) { + for idx := range clusters { + memberCluster := clusters[idx] + workResourcesPlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(memberCluster) + Eventually(workResourcesPlacedActual, updateRunEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } +} + func createStagedUpdateStrategySucceed(strategyName string) *placementv1beta1.ClusterStagedUpdateStrategy { strategy := &placementv1beta1.ClusterStagedUpdateStrategy{ ObjectMeta: metav1.ObjectMeta{ @@ -489,10 +517,10 @@ func createStagedUpdateStrategySucceed(strategyName string) *placementv1beta1.Cl Spec: placementv1beta1.StagedUpdateStrategySpec{ Stages: []placementv1beta1.StageConfig{ { - Name: envLabelValue2, + Name: envCanary, LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - envLabelName: envLabelValue2, // member-cluster-2 + envLabelName: envCanary, // member-cluster-2 }, }, AfterStageTasks: []placementv1beta1.AfterStageTask{ @@ -508,10 +536,10 @@ func createStagedUpdateStrategySucceed(strategyName string) *placementv1beta1.Cl }, }, { - Name: envLabelValue1, + Name: envProd, LabelSelector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - envLabelName: envLabelValue1, // member-cluster-1 and member-cluster-3 + envLabelName: envProd, // member-cluster-1 and member-cluster-3 }, }, }, @@ -529,14 +557,14 @@ func validateLatestPolicySnapshot(crpName, wantPolicySnapshotIndex string) { placementv1beta1.CRPTrackingLabel: crpName, placementv1beta1.IsLatestSnapshotLabel: "true", }); err != nil { - return "", fmt.Errorf("Failed to list the latest scheduling policy snapshot: %w", err) + return "", fmt.Errorf("failed to list the latest scheduling policy snapshot: %w", err) } if len(policySnapshotList.Items) != 1 { - return "", fmt.Errorf("Failed to find the latest scheduling policy snapshot") + return "", fmt.Errorf("failed to find the latest scheduling policy snapshot") } latestPolicySnapshot := policySnapshotList.Items[0] if !condition.IsConditionStatusTrue(latestPolicySnapshot.GetCondition(string(placementv1beta1.PolicySnapshotScheduled)), latestPolicySnapshot.Generation) { - return "", fmt.Errorf("The latest scheduling policy snapshot is not scheduled yet") + return "", fmt.Errorf("the latest scheduling policy snapshot is not scheduled yet") } return latestPolicySnapshot.Labels[placementv1beta1.PolicyIndexLabel], nil }, eventuallyDuration, eventuallyInterval).Should(Equal(wantPolicySnapshotIndex), "Policy snapshot index does not match") @@ -549,10 +577,10 @@ func validateLatestResourceSnapshot(crpName, wantResourceSnapshotIndex string) { placementv1beta1.CRPTrackingLabel: crpName, placementv1beta1.IsLatestSnapshotLabel: "true", }); err != nil { - return "", fmt.Errorf("Failed to list the latestresourcesnapshot: %w", err) + return "", fmt.Errorf("failed to list the latestresourcesnapshot: %w", err) } if len(crsList.Items) != 1 { - return "", fmt.Errorf("Got %d resourcesnapshots, want 1", len(crsList.Items)) + return "", fmt.Errorf("got %d resourcesnapshots, want 1", len(crsList.Items)) } return crsList.Items[0].Labels[placementv1beta1.ResourceIndexLabel], nil }, eventuallyDuration, eventuallyInterval).Should(Equal(wantResourceSnapshotIndex), "Resource snapshot index does not match") @@ -579,11 +607,11 @@ func validateAndApproveClusterApprovalRequests(updateRunName, stageName string) placementv1beta1.TargetUpdatingStageNameLabel: stageName, placementv1beta1.TargetUpdateRunLabel: updateRunName, }); err != nil { - return fmt.Errorf("Failed to list approval requests: %w", err) + return fmt.Errorf("failed to list approval requests: %w", err) } if len(appReqList.Items) != 1 { - return fmt.Errorf("Got %d approval requests, want 1", len(appReqList.Items)) + return fmt.Errorf("got %d approval requests, want 1", len(appReqList.Items)) } appReq := &appReqList.Items[0] meta.SetStatusCondition(&appReq.Status.Conditions, metav1.Condition{ diff --git a/test/e2e/utils_test.go b/test/e2e/utils_test.go index 00c73a538..d6bca6cdc 100644 --- a/test/e2e/utils_test.go +++ b/test/e2e/utils_test.go @@ -226,7 +226,7 @@ func checkIfMemberClusterHasJoined(memberCluster *framework.Cluster) { } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Member cluster has not joined yet") + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Member cluster has not joined yet") } // checkIfAzurePropertyProviderIsWorking verifies if all member clusters have the Azure property @@ -284,7 +284,7 @@ func checkIfAzurePropertyProviderIsWorking() { return fmt.Errorf("member cluster status conditions diff (-got, +want):\n%s", diff) } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to confirm that Azure property provider is up and running") + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to confirm that Azure property provider is up and running") } } @@ -475,7 +475,7 @@ func cleanupInvalidClusters() { } mcObj.Finalizers = []string{} return hubClient.Update(ctx, mcObj) - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update member cluster object") + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update member cluster object") Expect(hubClient.Delete(ctx, mcObj)).Should(SatisfyAny(Succeed(), utils.NotFoundMatcher{}), "Failed to delete member cluster object") Eventually(func() error { mcObj := &clusterv1beta1.MemberCluster{} @@ -483,7 +483,7 @@ func cleanupInvalidClusters() { return fmt.Errorf("member cluster still exists or an unexpected error occurred: %w", err) } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to check if member cluster is deleted, member cluster still exists") + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to check if member cluster is deleted, member cluster still exists") } } @@ -683,7 +683,7 @@ func cleanWorkResourcesOnCluster(cluster *framework.Cluster) { Expect(client.IgnoreNotFound(cluster.KubeClient.Delete(ctx, &ns))).To(Succeed(), "Failed to delete namespace %s", ns.Namespace) workResourcesRemovedActual := workNamespaceRemovedFromClusterActual(cluster) - Eventually(workResourcesRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove work resources from %s cluster", cluster.ClusterName) + Eventually(workResourcesRemovedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove work resources from %s cluster", cluster.ClusterName) } // setAllMemberClustersToLeave sets all member clusters to leave the fleet. @@ -711,20 +711,7 @@ func checkIfAllMemberClustersHaveLeft() { } return nil - }, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete member cluster") - } -} - -func checkIfPlacedWorkResourcesOnAllMemberClusters() { - checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters) -} - -func checkIfPlacedWorkResourcesOnMemberClusters(clusters []*framework.Cluster) { - for idx := range clusters { - memberCluster := clusters[idx] - - workResourcesPlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(memberCluster) - Eventually(workResourcesPlacedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to delete member cluster") } } @@ -790,9 +777,8 @@ func cleanupCRP(name string) { } // Delete the CRP (again, if applicable). - // // This helps the After All node to run successfully even if the steps above fail early. - if err := hubClient.Delete(ctx, crp); err != nil { + if err = hubClient.Delete(ctx, crp); err != nil { return err } @@ -802,7 +788,7 @@ func cleanupCRP(name string) { // Wait until the CRP is removed. removedActual := crpRemovedActual(name) - Eventually(removedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove CRP %s", name) + Eventually(removedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove CRP %s", name) } // createResourceOverrides creates a number of resource overrides. @@ -924,12 +910,12 @@ func ensureCRPAndRelatedResourcesDeleted(crpName string, memberClusters []*frame memberCluster := memberClusters[idx] workResourcesRemovedActual := workNamespaceRemovedFromClusterActual(memberCluster) - Eventually(workResourcesRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove work resources from member cluster %s", memberCluster.ClusterName) + Eventually(workResourcesRemovedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove work resources from member cluster %s", memberCluster.ClusterName) } // Verify that related finalizers have been removed from the CRP. finalizerRemovedActual := allFinalizersExceptForCustomDeletionBlockerRemovedFromCRPActual(crpName) - Eventually(finalizerRemovedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove controller finalizers from CRP") + Eventually(finalizerRemovedActual, workloadEventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to remove controller finalizers from CRP") // Remove the custom deletion blocker finalizer from the CRP. cleanupCRP(crpName)