diff --git a/.github/workflows/upgrade.yml b/.github/workflows/upgrade.yml new file mode 100644 index 000000000..09a93e3df --- /dev/null +++ b/.github/workflows/upgrade.yml @@ -0,0 +1,145 @@ +name: ci + +on: + push: + branches: + - main + - release-* + workflow_dispatch: {} + pull_request: + branches: + - main + - release-* + paths-ignore: [docs/**, "**.md", "**.mdx", "**.png", "**.jpg"] + +env: + GO_VERSION: '1.22.12' + +jobs: + detect-noop: + runs-on: ubuntu-latest + outputs: + noop: ${{ steps.noop.outputs.should_skip }} + steps: + - name: Detect No-op Changes + id: noop + uses: fkirc/skip-duplicate-actions@v5.3.1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + do_not_skip: '["workflow_dispatch", "schedule", "push"]' + concurrent_skipping: false + + hub-agent-backward-compatibility: + runs-on: ubuntu-latest + needs: detect-noop + if: needs.detect-noop.outputs.noop != 'true' + steps: + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Set up Ginkgo CLI + run: | + go install github.com/onsi/ginkgo/v2/ginkgo@v2.19.1 + + - name: Prepare the fleet + run: cd test/upgrade && chmod +x setup.sh && ./setup.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + HUB_SERVER_URL: 'https://172.19.0.2:6443' + + - name: Run the Before suite + run: cd test/upgrade/before && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Upgrade the Fleet hub agent + run: cd test/upgrade && chmod +x upgrade.sh && UPGRADE_HUB_SIDE=true ./upgrade.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Run the After suite + run: cd test/upgrade/after && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + member-agent-backward-compatibility: + runs-on: ubuntu-latest + needs: detect-noop + if: needs.detect-noop.outputs.noop != 'true' + steps: + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Set up Ginkgo CLI + run: | + go install github.com/onsi/ginkgo/v2/ginkgo@v2.19.1 + + - name: Prepare the fleet + run: cd test/upgrade && chmod +x setup.sh && ./setup.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + HUB_SERVER_URL: 'https://172.19.0.2:6443' + + - name: Run the Before suite + run: cd test/upgrade/before && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Upgrade the Fleet member agent + run: cd test/upgrade && chmod +x upgrade.sh && UPGRADE_MEMBER_SIDE=true ./upgrade.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Run the After suite + run: cd test/upgrade/after && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + full-backward-compatibility: + runs-on: ubuntu-latest + needs: detect-noop + if: needs.detect-noop.outputs.noop != 'true' + steps: + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + + - name: Check out code into the Go module directory + uses: actions/checkout@v4 + + - name: Set up Ginkgo CLI + run: | + go install github.com/onsi/ginkgo/v2/ginkgo@v2.19.1 + + - name: Prepare the fleet + run: cd test/upgrade && chmod +x setup.sh && ./setup.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + HUB_SERVER_URL: 'https://172.19.0.2:6443' + + - name: Run the Before suite + run: cd test/upgrade/before && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Upgrade all Fleet agents + run: cd test/upgrade && chmod +x upgrade.sh && UPGRADE_HUB_SIDE=true UPGRADE_MEMBER_SIDE=true ./upgrade.sh 3 && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + + - name: Run the After suite + run: cd test/upgrade/after && ginkgo -v -p . && cd - + env: + KUBECONFIG: '/home/runner/.kube/config' + diff --git a/test/scripts/snippets.sh b/test/scripts/snippets.sh new file mode 100644 index 000000000..e69de29bb diff --git a/test/upgrade/after/actuals_test.go b/test/upgrade/after/actuals_test.go new file mode 100644 index 000000000..55fc05dc3 --- /dev/null +++ b/test/upgrade/after/actuals_test.go @@ -0,0 +1,923 @@ +package after + +import ( + "fmt" + + "github.com/google/go-cmp/cmp" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/controllers/clusterresourceplacement" + "go.goms.io/fleet/pkg/controllers/work" + scheduler "go.goms.io/fleet/pkg/scheduler/framework" + "go.goms.io/fleet/pkg/utils/condition" + "go.goms.io/fleet/test/e2e/framework" +) + +func resourcePlacementRolloutCompletedConditions(generation int64, resourceIsTrackable bool, hasOverride bool) []metav1.Condition { + availableConditionReason := work.WorkNotTrackableReason + if resourceIsTrackable { + availableConditionReason = condition.AllWorkAvailableReason + } + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: availableConditionReason, + ObservedGeneration: generation, + }, + } +} + +func resourcePlacementScheduleFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: clusterresourceplacement.ResourceScheduleFailedReason, + }, + } +} + +func resourcePlacementApplyFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAppliedReason, + ObservedGeneration: generation, + }, + } +} + +func resourcePlacementAvailabilityCheckFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpRolloutCompletedConditions(generation int64, hasOverride bool) []metav1.Condition { + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpSchedulePartiallyFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: scheduler.NotFullyScheduledReason, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpScheduleFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: scheduler.NotFullyScheduledReason, + }, + } +} + +func crpNotAvailableConditions(generation int64, hasOverride bool) []metav1.Condition { + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.NotAvailableYetReason, + ObservedGeneration: generation, + }, + } +} + +func crpNotAppliedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.ApplyFailedReason, + ObservedGeneration: generation, + }, + } +} + +func crpStatusUpdatedActual(crpName string, wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, wantSelectedClusters, wantUnselectedClusters []string, wantObservedResourceIndex string) func() error { + return customizedCRPStatusUpdatedActual(crpName, wantSelectedResourceIdentifiers, wantSelectedClusters, wantUnselectedClusters, wantObservedResourceIndex, true) +} + +func customizedCRPStatusUpdatedActual(crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantSelectedClusters, wantUnselectedClusters []string, + wantObservedResourceIndex string, + resourceIsTrackable bool) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + wantPlacementStatus := []placementv1beta1.ResourcePlacementStatus{} + for _, name := range wantSelectedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, resourceIsTrackable, false), + }) + } + for i := 0; i < len(wantUnselectedClusters); i++ { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + Conditions: resourcePlacementScheduleFailedConditions(crp.Generation), + }) + } + + var wantCRPConditions []metav1.Condition + if len(wantSelectedClusters) > 0 { + wantCRPConditions = crpRolloutCompletedConditions(crp.Generation, false) + } else { + wantCRPConditions = []metav1.Condition{ + // we don't set the remaining resource conditions. + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + } + } + + if len(wantUnselectedClusters) > 0 { + if len(wantSelectedClusters) > 0 { + wantCRPConditions = crpSchedulePartiallyFailedConditions(crp.Generation) + } else { + // we don't set the remaining resource conditions if there is no clusters to select + wantCRPConditions = crpScheduleFailedConditions(crp.Generation) + } + } + + // Note that the CRP controller will only keep decisions regarding unselected clusters for a CRP if: + // + // * The CRP is of the PickN placement type and the required N count cannot be fulfilled; or + // * The CRP is of the PickFixed placement type and the list of target clusters specified cannot be fulfilled. + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithOneFailedAvailabilityCheckStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantFailedClusters []string, + wantFailedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + wantFailedResourceObservedGeneration int64, + wantAvailableClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + + for _, name := range wantFailedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementAvailabilityCheckFailedConditions(crp.Generation), + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: wantFailedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: "ManifestNotAvailableYet", + ObservedGeneration: wantFailedResourceObservedGeneration, + }, + }, + }, + }) + } + + for _, name := range wantAvailableClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, true, false), + }) + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: crpNotAvailableConditions(crp.Generation, false), + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithOneFailedApplyOpStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantFailedClusters []string, + wantFailedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + wantFailedResourceObservedGeneration int64, + wantAvailableClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + + for _, name := range wantFailedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementApplyFailedConditions(crp.Generation), + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: wantFailedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestApplyFailed", + ObservedGeneration: wantFailedResourceObservedGeneration, + }, + }, + }, + }) + } + + for _, name := range wantAvailableClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, true, false), + }) + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: crpNotAppliedConditions(crp.Generation), + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToOneFailedAvailabilityCheckStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + failedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + failedResourceObservedGeneration int64, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // We only expect the deployment to not be available on one cluster. + unavailableResourcePlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAvailableReason, + ObservedGeneration: crp.Generation, + }, + }, + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: failedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestNotAvailableYet", + ObservedGeneration: failedResourceObservedGeneration, + }, + }, + }, + } + wantPlacementStatus = append(wantPlacementStatus, unavailableResourcePlacementStatus) + + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters)-1; i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToOneFailedApplyOpStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + failedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + failedResourceObservedGeneration int64, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // We only expect the deployment to not be available on one cluster. + unavailableResourcePlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAppliedReason, + ObservedGeneration: crp.Generation, + }, + }, + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: failedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestApplyFailed", + ObservedGeneration: failedResourceObservedGeneration, + }, + }, + }, + } + wantPlacementStatus = append(wantPlacementStatus, unavailableResourcePlacementStatus) + + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters)-1; i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToUntrackableResourcesStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters); i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func validateWorkNamespaceOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + ns := &corev1.Namespace{} + if err := cluster.KubeClient.Get(ctx, name, ns); err != nil { + return err + } + + // Use the object created in the hub cluster as reference; this helps to avoid the trouble + // of having to ignore default fields in the spec. + wantNS := &corev1.Namespace{} + if err := hubClient.Get(ctx, name, wantNS); err != nil { + return err + } + + if diff := cmp.Diff( + ns, wantNS, + ignoreNamespaceStatusField, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("work namespace diff (-got, +want): %s", diff) + } + return nil +} + +func validateConfigMapOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + configMap := &corev1.ConfigMap{} + if err := cluster.KubeClient.Get(ctx, name, configMap); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantConfigMap := &corev1.ConfigMap{} + if err := hubClient.Get(ctx, name, wantConfigMap); err != nil { + return err + } + + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("app config map diff (-got, +want): %s", diff) + } + + return nil +} + +func workNamespaceAndConfigMapPlacedOnClusterActual(cluster *framework.Cluster, workNamespaceName, appConfigMapName string) func() error { + return func() error { + if err := validateWorkNamespaceOnCluster(cluster, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateConfigMapOnCluster(cluster, types.NamespacedName{Namespace: workNamespaceName, Name: appConfigMapName}) + } +} + +func validateJobOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + job := &batchv1.Job{} + if err := cluster.KubeClient.Get(ctx, name, job); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantJob := &batchv1.Job{} + if err := hubClient.Get(ctx, name, wantJob); err != nil { + return err + } + + if diff := cmp.Diff( + job, wantJob, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreJobSpecSelectorField, + ignorePodTemplateSpecObjectMetaField, + ignoreJobStatusField, + ); diff != "" { + return fmt.Errorf("job diff (-got, +want): %s", diff) + } + + return nil +} + +func validateServiceOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + service := &corev1.Service{} + if err := cluster.KubeClient.Get(ctx, name, service); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantService := &corev1.Service{} + if err := hubClient.Get(ctx, name, wantService); err != nil { + return err + } + + if diff := cmp.Diff( + service, wantService, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreServiceStatusField, + ignoreServiceSpecIPAndPolicyFields, + ignoreServicePortNodePortProtocolField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + + return nil +} diff --git a/test/upgrade/after/resources_test.go b/test/upgrade/after/resources_test.go new file mode 100644 index 000000000..3b95e43c6 --- /dev/null +++ b/test/upgrade/after/resources_test.go @@ -0,0 +1,26 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package after + +import ( + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +func workResourceIdentifiers(workNamespaceName, appConfigMapName string) []placementv1beta1.ResourceIdentifier { + return []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + }, + } +} diff --git a/test/upgrade/after/scenarios_test.go b/test/upgrade/after/scenarios_test.go new file mode 100644 index 000000000..5a45075be --- /dev/null +++ b/test/upgrade/after/scenarios_test.go @@ -0,0 +1,451 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package after + +import ( + "fmt" + + "github.com/google/go-cmp/cmp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +// TO-DO (chenyu1): expand the test specs to check agent liveness after upgrade. + +var _ = Describe("CRP with trackable resources, all available (after upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the before upgrade + // test stage. + crpName := "crp-trackable-available" + workNamespaceName := "work-trackable-available" + appConfigMapName := "app-configmap-trackable-available" + + // Setup is done in the previous step. + + It("should keep CRP status", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(crpName, workResourceIdentifiers(workNamespaceName, appConfigMapName), allMemberClusterNames, nil, "0") + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects", func() { + checkIfPlacedWorkResourcesOnAllMemberClustersConsistently(workNamespaceName, appConfigMapName) + }) +}) + +var _ = Describe("CRP with untrackable resources, all available (after upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the before upgrade + // test stage. + crpName := "crp-non-trackable-available" + workNamespaceName := "work-non-trackable-available" + jobName := "job-non-trackable-available" + + // Setup is done in the previous step. + + It("should keep CRP status unchanged", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Group: "batch", + Kind: "Job", + Name: jobName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantResourceIdentifiers, allMemberClusterNames, nil, "0", false) + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects", func() { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + // Give the system a bit more breathing room when process resource placement. + Consistently(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateJobOnCluster(memberCluster, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}) + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } + }) +}) + +var _ = Describe("CRP with availability failure (after upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the before upgrade + // test stage. + crpName := "crp-availability-failure" + workNamespaceName := "work-availability-failure" + svcName := "svc-availability-failure" + + It("should keep CRP status unchanged", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + wantFailedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithOneFailedAvailabilityCheckStatusUpdatedActual( + crpName, + wantResourceIdentifiers, + []string{memberCluster1EastProdName}, + wantFailedWorkloadResourceIdentifier, 0, + []string{}, + "0", + ) + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects", func() { + // Give the system a bit more breathing room when process resource placement. + Consistently(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateServiceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}) + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster1EastProd.ClusterName) + }) +}) + +var _ = Describe("CRP with apply op failure (after upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the before upgrade + // test stage. + crpName := "crp-apply-failure" + workNamespaceName := "work-apply-failure" + appConfigMapName := "app-configmap-apply-failure" + + It("should keep CRP status unchanged", func() { + wantFailedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithOneFailedApplyOpStatusUpdatedActual( + crpName, + workResourceIdentifiers(workNamespaceName, appConfigMapName), + []string{memberCluster1EastProdName}, + wantFailedWorkloadResourceIdentifier, 0, + []string{}, + "0", + ) + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects (no apply op failure)", func() { + // Give the system a bit more breathing room when process resource placement. + Consistently(func() error { + return validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}) + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster1EastProd.ClusterName) + }) + + It("should keep the state of objects (apply op failure)", func() { + // Give the system a bit more breathing room when process resource placement. + Consistently(func() error { + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: appConfigMapName, + Namespace: workNamespaceName, + }, + Data: map[string]string{ + "data": "foo", + }, + } + + wantConfigMap := &corev1.ConfigMap{} + if err := memberCluster1EastProdClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, wantConfigMap); err != nil { + return err + } + + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("app config map diff (-got, +want): %s", diff) + } + + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to skip resource placement") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (blocked by availability failure)", Ordered, func() { + // The names specified must match with those in the corresponding node from the before upgrade + // test stage. + crpName := "crp-availability-failure-stuck" + workNamespaceName := "work-availability-failure-stuck" + svcName := "svc-availability-failure-stuck" + + originalSvc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: svcName, + Namespace: workNamespaceName, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + "app": "nginx", + }, + Ports: []corev1.ServicePort{ + { + Port: 80, + TargetPort: intstr.FromInt(80), + }, + }, + }, + } + + It("should keep CRP status unchanged (rollout blocked due to unavailable objects)", func() { + wantSelectedResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + failedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToOneFailedAvailabilityCheckStatusUpdatedActual( + crpName, + wantSelectedResourceIdentifiers, + failedWorkloadResourceIdentifier, 0, + allMemberClusterNames, + "1", + ) + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keeps the state of objects", func() { + // Retrieve the CRP for its status. + crp := &placementv1beta1.ClusterResourcePlacement{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp)).To(Succeed(), "Failed to get CRP") + + // Find the clusters that have the updated Service object and those that do not. + clustersWithUpdatedService := map[string]struct{}{} + clustersWithOldService := map[string]struct{}{} + for idx := range crp.Status.PlacementStatuses { + rps := crp.Status.PlacementStatuses[idx] + availableCond := meta.FindStatusCondition(rps.Conditions, string(placementv1beta1.ResourcesAvailableConditionType)) + switch { + case availableCond == nil: + clustersWithOldService[rps.ClusterName] = struct{}{} + case availableCond.Status == metav1.ConditionFalse: + clustersWithUpdatedService[rps.ClusterName] = struct{}{} + default: + Fail(fmt.Sprintf("Found an unexpected availability reporting \n(%v)", rps)) + } + } + + // Validate things on the clusters with updated resources. + Consistently(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + if _, ok := clustersWithUpdatedService[memberCluster.ClusterName]; ok { + // No need to validate the NS as it is unchanged. + if err := validateServiceOnCluster(memberCluster, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}); err != nil { + return err + } + } + } + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update objects on clusters") + + // Validate things on the clusters with old resources. + Consistently(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + if _, ok := clustersWithOldService[memberCluster.ClusterName]; ok { + svc := &corev1.Service{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}, svc); err != nil { + return fmt.Errorf("failed to retrieve svc on cluster %s: %w", memberCluster.ClusterName, err) + } + + if diff := cmp.Diff( + svc, originalSvc, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreServiceStatusField, + ignoreServiceSpecIPAndPolicyFields, + ignoreServicePortNodePortProtocolField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + } + } + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (blocked by apply op failure)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-apply-failure-stuck" + workNamespaceName := "work-apply-failure-stuck" + appConfigMapName := "app-configmap-apply-failure-stuck" + + It("should keep CRP status unchanged (rollout blocked due to apply op failures)", func() { + wantSelectedResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + failedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToOneFailedApplyOpStatusUpdatedActual( + crpName, + wantSelectedResourceIdentifiers, + failedWorkloadResourceIdentifier, 0, + allMemberClusterNames, + "1", + ) + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects", func() { + // Validate things on the clusters with old resources. + Consistently(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + + configMap := &corev1.ConfigMap{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, configMap); err != nil { + return fmt.Errorf("failed to retrieve config map on cluster %s: %w", memberCluster.ClusterName, err) + } + + wantConfigMap := configMap.DeepCopy() + wantConfigMap.Data["custom"] = "foo" + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + } + + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (long wait time)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-non-trackable-stuck" + workNamespaceName := "work-non-trackable-stuck" + jobName := "job-non-trackable-stuck" + + originalJob := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: workNamespaceName, + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: "busybox", + ImagePullPolicy: corev1.PullIfNotPresent, + Name: "busybox", + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + BackoffLimit: ptr.To(int32(6)), + }, + } + + It("should keep the CRP status unchanged (rollout blocked due to long wait time)", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Group: "batch", + Kind: "Job", + Name: jobName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToUntrackableResourcesStatusUpdatedActual(crpName, wantResourceIdentifiers, allMemberClusterNames, "1") + Consistently(crpStatusUpdatedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should keep the state of objects", func() { + // Validate things on the clusters with old resources. + Consistently(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + + job := &batchv1.Job{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}, job); err != nil { + return fmt.Errorf("failed to retrieve job on cluster %s: %w", memberCluster.ClusterName, err) + } + + // For simplicity, we only check the BackoffLimit field. + if !cmp.Equal(job.Spec.BackoffLimit, originalJob.Spec.BackoffLimit) { + return fmt.Errorf("job backoff limit mismatches on cluster %s", memberCluster.ClusterName) + } + } + return nil + }, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) diff --git a/test/upgrade/after/setup_test.go b/test/upgrade/after/setup_test.go new file mode 100644 index 000000000..ec658948c --- /dev/null +++ b/test/upgrade/after/setup_test.go @@ -0,0 +1,220 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package after + +import ( + "context" + "flag" + "log" + "os" + "sync" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + k8sscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" + clusterinventory "sigs.k8s.io/cluster-inventory-api/apis/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/test/e2e/framework" +) + +const ( + // The names of the hub cluster + the member clusters set up in this E2E test environment. + // + // Note that these names must match with those in `setup.sh`, with a prefix `kind-`. + hubClusterName = "kind-hub" + memberCluster1EastProdName = "kind-cluster-1" + memberCluster2EastCanaryName = "kind-cluster-2" + memberCluster3WestProdName = "kind-cluster-3" + + // The names of the service accounts used by specific member clusters. + // + // Note that these names must also match those in `setup.sh`. + memberCluster1EastProdSAName = "fleet-member-agent-cluster-1" + memberCluster2EastCanarySAName = "fleet-member-agent-cluster-2" + memberCluster3WestProdSAName = "fleet-member-agent-cluster-3" + + kubeConfigPathEnvVarName = "KUBECONFIG" +) + +const ( + // This is for eventually timeouts in the cluster setup steps. + longEventuallyDuration = time.Minute * 2 + eventuallyInterval = time.Millisecond * 250 + consistentlyDuration = time.Second * 5 + consistentlyInterval = time.Millisecond * 500 +) + +var ( + ctx = context.Background() + scheme = runtime.NewScheme() + once = sync.Once{} + + hubCluster *framework.Cluster + memberCluster1EastProd *framework.Cluster + memberCluster2EastCanary *framework.Cluster + memberCluster3WestProd *framework.Cluster + + hubClient client.Client + impersonateHubClient client.Client + memberCluster1EastProdClient client.Client + memberCluster2EastCanaryClient client.Client + memberCluster3WestProdClient client.Client + + allMemberClusters []*framework.Cluster + allMemberClusterNames = []string{} +) + +var ( + lessFuncConditionByType = func(a, b metav1.Condition) bool { + return a.Type < b.Type + } + lessFuncPlacementStatusByClusterName = func(a, b placementv1beta1.ResourcePlacementStatus) bool { + return a.ClusterName < b.ClusterName + } + lessFuncPlacementStatusByConditions = func(a, b placementv1beta1.ResourcePlacementStatus) bool { + return len(a.Conditions) < len(b.Conditions) + } + + ignoreObjectMetaAutoGeneratedFields = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "CreationTimestamp", "ResourceVersion", "Generation", "ManagedFields", "OwnerReferences") + ignoreObjectMetaAnnotationField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Annotations") + ignoreNamespaceStatusField = cmpopts.IgnoreFields(corev1.Namespace{}, "Status") + ignoreJobSpecSelectorField = cmpopts.IgnoreFields(batchv1.JobSpec{}, "Selector") + ignorePodTemplateSpecObjectMetaField = cmpopts.IgnoreFields(corev1.PodTemplateSpec{}, "ObjectMeta") + ignoreJobStatusField = cmpopts.IgnoreFields(batchv1.Job{}, "Status") + ignoreServiceStatusField = cmpopts.IgnoreFields(corev1.Service{}, "Status") + ignoreServiceSpecIPAndPolicyFields = cmpopts.IgnoreFields(corev1.ServiceSpec{}, "ClusterIP", "ClusterIPs", "ExternalIPs", "SessionAffinity", "IPFamilies", "IPFamilyPolicy", "InternalTrafficPolicy") + ignoreServicePortNodePortProtocolField = cmpopts.IgnoreFields(corev1.ServicePort{}, "NodePort", "Protocol") + ignoreRPSClusterNameField = cmpopts.IgnoreFields(placementv1beta1.ResourcePlacementStatus{}, "ClusterName") + + crpStatusCmpOptions = cmp.Options{ + cmpopts.SortSlices(lessFuncConditionByType), + cmpopts.SortSlices(lessFuncPlacementStatusByClusterName), + cmpopts.SortSlices(utils.LessFuncResourceIdentifier), + cmpopts.SortSlices(utils.LessFuncFailedResourcePlacements), + utils.IgnoreConditionLTTAndMessageFields, + cmpopts.EquateEmpty(), + } + crpWithStuckRolloutStatusCmpOptions = cmp.Options{ + cmpopts.SortSlices(lessFuncConditionByType), + cmpopts.SortSlices(lessFuncPlacementStatusByConditions), + cmpopts.SortSlices(utils.LessFuncResourceIdentifier), + cmpopts.SortSlices(utils.LessFuncFailedResourcePlacements), + utils.IgnoreConditionLTTAndMessageFields, + ignoreRPSClusterNameField, + cmpopts.EquateEmpty(), + } +) + +// TestMain sets up the E2E test environment. +func TestMain(m *testing.M) { + // Add custom APIs to the scheme. + if err := clusterv1beta1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (cluster) to the runtime scheme: %v", err) + } + if err := placementv1alpha1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (placement v1alpha1) to the runtime scheme: %v", err) + } + if err := placementv1beta1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (placement) to the runtime scheme: %v", err) + } + + // Add built-in APIs and extensions to the scheme. + if err := k8sscheme.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add built-in APIs to the runtime scheme: %v", err) + } + if err := apiextensionsv1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add API extensions to the runtime scheme: %v", err) + } + if err := clusterinventory.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add cluster inventory APIs to the runtime scheme: %v", err) + } + + os.Exit(m.Run()) +} + +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Fleet Agent Upgrade Test Suite") +} + +func beforeSuiteForAllProcesses() { + // Set up the logger. + logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)) + klog.SetLogger(logger) + ctrllog.SetLogger(logger) + By("Setup klog") + fs := flag.NewFlagSet("klog", flag.ContinueOnError) + klog.InitFlags(fs) + Expect(fs.Parse([]string{"--v", "5", "-add_dir_header", "true"})).Should(Succeed()) + + // Check if the required environment variable, which specifies the path to kubeconfig file, has been set. + Expect(os.Getenv(kubeConfigPathEnvVarName)).NotTo(BeEmpty(), "Required environment variable KUBECONFIG is not set") + + // Initialize the cluster objects and their clients. + hubCluster = framework.NewCluster(hubClusterName, "", scheme, nil) + Expect(hubCluster).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(hubCluster) + hubClient = hubCluster.KubeClient + Expect(hubClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + impersonateHubClient = hubCluster.ImpersonateKubeClient + Expect(impersonateHubClient).NotTo(BeNil(), "Failed to initialize impersonate client for accessing Kubernetes cluster") + + memberCluster1EastProd = framework.NewCluster(memberCluster1EastProdName, memberCluster1EastProdSAName, scheme, nil) + Expect(memberCluster1EastProd).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster1EastProd) + memberCluster1EastProdClient = memberCluster1EastProd.KubeClient + Expect(memberCluster1EastProdClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + + memberCluster2EastCanary = framework.NewCluster(memberCluster2EastCanaryName, memberCluster2EastCanarySAName, scheme, nil) + Expect(memberCluster2EastCanary).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster2EastCanary) + memberCluster2EastCanaryClient = memberCluster2EastCanary.KubeClient + Expect(memberCluster2EastCanaryClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + + memberCluster3WestProd = framework.NewCluster(memberCluster3WestProdName, memberCluster3WestProdSAName, scheme, nil) + Expect(memberCluster3WestProd).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster3WestProd) + memberCluster3WestProdClient = memberCluster3WestProd.KubeClient + Expect(memberCluster3WestProdClient).NotTo(BeNil(), "Failed to initialize client for accessing kubernetes cluster") + + allMemberClusters = []*framework.Cluster{memberCluster1EastProd, memberCluster2EastCanary, memberCluster3WestProd} + once.Do(func() { + // Set these arrays only once; this is necessary as for the first spawned Ginkgo process, + // the `beforeSuiteForAllProcesses` function is called twice. + for i := range allMemberClusters { + allMemberClusterNames = append(allMemberClusterNames, allMemberClusters[i].ClusterName) + } + }) +} + +func beforeSuiteForProcess1() { + beforeSuiteForAllProcesses() + + // All member clusters have joined in the previous stage; no need to re-join them. +} + +var _ = SynchronizedBeforeSuite(beforeSuiteForProcess1, beforeSuiteForAllProcesses) + +// For upgrade tests in the after stage, there is no need to tear down the test environment +// (i.e., no AfterSuite node). diff --git a/test/upgrade/after/utils_test.go b/test/upgrade/after/utils_test.go new file mode 100644 index 000000000..92d45f5c0 --- /dev/null +++ b/test/upgrade/after/utils_test.go @@ -0,0 +1,29 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package after + +import ( + . "github.com/onsi/gomega" + + "go.goms.io/fleet/test/e2e/framework" +) + +// checkIfPlacedWorkResourcesOnAllMemberClustersConsistently verifies if the work resources have been placed on +// all applicable member clusters. +func checkIfPlacedWorkResourcesOnAllMemberClustersConsistently(workNamespaceName, appConfigMapName string) { + checkIfPlacedWorkResourcesOnMemberClustersConsistently(allMemberClusters, workNamespaceName, appConfigMapName) +} + +// checkIfPlacedWorkResourcesOnMemberClustersConsistently verifies if the work resources have been placed on +// the specified set of member clusters. +func checkIfPlacedWorkResourcesOnMemberClustersConsistently(clusters []*framework.Cluster, workNamespaceName, appConfigMapName string) { + for idx := range clusters { + memberCluster := clusters[idx] + workResourcesPlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(memberCluster, workNamespaceName, appConfigMapName) + // Give the system a bit more breathing room when process resource placement. + Consistently(workResourcesPlacedActual, consistentlyDuration, consistentlyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } +} diff --git a/test/upgrade/before/actuals_test.go b/test/upgrade/before/actuals_test.go new file mode 100644 index 000000000..f86500983 --- /dev/null +++ b/test/upgrade/before/actuals_test.go @@ -0,0 +1,924 @@ +package before + +import ( + "fmt" + + "github.com/google/go-cmp/cmp" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/controllers/clusterresourceplacement" + "go.goms.io/fleet/pkg/controllers/work" + scheduler "go.goms.io/fleet/pkg/scheduler/framework" + "go.goms.io/fleet/pkg/utils/condition" + "go.goms.io/fleet/test/e2e/framework" +) + +func resourcePlacementRolloutCompletedConditions(generation int64, resourceIsTrackable bool, hasOverride bool) []metav1.Condition { + availableConditionReason := work.WorkNotTrackableReason + if resourceIsTrackable { + availableConditionReason = condition.AllWorkAvailableReason + } + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: availableConditionReason, + ObservedGeneration: generation, + }, + } +} + +func resourcePlacementScheduleFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: clusterresourceplacement.ResourceScheduleFailedReason, + }, + } +} + +func resourcePlacementApplyFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAppliedReason, + ObservedGeneration: generation, + }, + } +} + +func resourcePlacementAvailabilityCheckFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpRolloutCompletedConditions(generation int64, hasOverride bool) []metav1.Condition { + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpSchedulePartiallyFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: scheduler.NotFullyScheduledReason, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AvailableReason, + ObservedGeneration: generation, + }, + } +} + +func crpScheduleFailedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionFalse, + ObservedGeneration: generation, + Reason: scheduler.NotFullyScheduledReason, + }, + } +} + +func crpNotAvailableConditions(generation int64, hasOverride bool) []metav1.Condition { + overrideConditionReason := condition.OverrideNotSpecifiedReason + if hasOverride { + overrideConditionReason = condition.OverriddenSucceededReason + } + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: overrideConditionReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ApplySucceededReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.NotAvailableYetReason, + ObservedGeneration: generation, + }, + } +} + +func crpNotAppliedConditions(generation int64) []metav1.Condition { + return []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.WorkSynchronizedReason, + ObservedGeneration: generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.ApplyFailedReason, + ObservedGeneration: generation, + }, + } +} + +func crpStatusUpdatedActual(crpName string, wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, wantSelectedClusters, wantUnselectedClusters []string, wantObservedResourceIndex string) func() error { + return customizedCRPStatusUpdatedActual(crpName, wantSelectedResourceIdentifiers, wantSelectedClusters, wantUnselectedClusters, wantObservedResourceIndex, true) +} + +func customizedCRPStatusUpdatedActual(crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantSelectedClusters, wantUnselectedClusters []string, + wantObservedResourceIndex string, + resourceIsTrackable bool) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + wantPlacementStatus := []placementv1beta1.ResourcePlacementStatus{} + for _, name := range wantSelectedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, resourceIsTrackable, false), + }) + } + for i := 0; i < len(wantUnselectedClusters); i++ { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + Conditions: resourcePlacementScheduleFailedConditions(crp.Generation), + }) + } + + var wantCRPConditions []metav1.Condition + if len(wantSelectedClusters) > 0 { + wantCRPConditions = crpRolloutCompletedConditions(crp.Generation, false) + } else { + wantCRPConditions = []metav1.Condition{ + // we don't set the remaining resource conditions. + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + } + } + + if len(wantUnselectedClusters) > 0 { + if len(wantSelectedClusters) > 0 { + wantCRPConditions = crpSchedulePartiallyFailedConditions(crp.Generation) + } else { + // we don't set the remaining resource conditions if there is no clusters to select + wantCRPConditions = crpScheduleFailedConditions(crp.Generation) + } + } + + // Note that the CRP controller will only keep decisions regarding unselected clusters for a CRP if: + // + // * The CRP is of the PickN placement type and the required N count cannot be fulfilled; or + // * The CRP is of the PickFixed placement type and the list of target clusters specified cannot be fulfilled. + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithOneFailedAvailabilityCheckStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantFailedClusters []string, + wantFailedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + wantFailedResourceObservedGeneration int64, + wantAvailableClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + + for _, name := range wantFailedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementAvailabilityCheckFailedConditions(crp.Generation), + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: wantFailedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestNotAvailableYet", + ObservedGeneration: wantFailedResourceObservedGeneration, + }, + }, + }, + }) + } + + for _, name := range wantAvailableClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, true, false), + }) + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: crpNotAvailableConditions(crp.Generation, false), + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithOneFailedApplyOpStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantFailedClusters []string, + wantFailedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + wantFailedResourceObservedGeneration int64, + wantAvailableClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + + for _, name := range wantFailedClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementApplyFailedConditions(crp.Generation), + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: wantFailedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestApplyFailed", + ObservedGeneration: wantFailedResourceObservedGeneration, + }, + }, + }, + }) + } + + for _, name := range wantAvailableClusters { + wantPlacementStatus = append(wantPlacementStatus, placementv1beta1.ResourcePlacementStatus{ + ClusterName: name, + Conditions: resourcePlacementRolloutCompletedConditions(crp.Generation, true, false), + }) + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: crpNotAppliedConditions(crp.Generation), + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToOneFailedAvailabilityCheckStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + failedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + failedResourceObservedGeneration int64, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // We only expect the deployment to not be available on one cluster. + unavailableResourcePlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkAppliedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAvailableReason, + ObservedGeneration: crp.Generation, + }, + }, + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: failedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAvailableConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestNotAvailableYet", + ObservedGeneration: failedResourceObservedGeneration, + }, + }, + }, + } + wantPlacementStatus = append(wantPlacementStatus, unavailableResourcePlacementStatus) + + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters)-1; i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToOneFailedApplyOpStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + failedWorkloadResourceIdentifier placementv1beta1.ResourceIdentifier, + failedResourceObservedGeneration int64, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // We only expect the deployment to not be available on one cluster. + unavailableResourcePlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.RolloutStartedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceOverriddenConditionType), + Status: metav1.ConditionTrue, + Reason: condition.OverrideNotSpecifiedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceWorkSynchronizedConditionType), + Status: metav1.ConditionTrue, + Reason: condition.AllWorkSyncedReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.WorkNotAppliedReason, + ObservedGeneration: crp.Generation, + }, + }, + FailedPlacements: []placementv1beta1.FailedResourcePlacement{ + { + ResourceIdentifier: failedWorkloadResourceIdentifier, + Condition: metav1.Condition{ + Type: string(placementv1beta1.ResourcesAppliedConditionType), + Status: metav1.ConditionFalse, + // TO-DO (chenyu1): this reason string is subject to change. + Reason: "ManifestApplyFailed", + ObservedGeneration: failedResourceObservedGeneration, + }, + }, + }, + } + wantPlacementStatus = append(wantPlacementStatus, unavailableResourcePlacementStatus) + + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters)-1; i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func crpWithStuckRolloutDueToUntrackableResourcesStatusUpdatedActual( + crpName string, + wantSelectedResourceIdentifiers []placementv1beta1.ResourceIdentifier, + wantSelectedClusters []string, + wantObservedResourceIndex string, +) func() error { + return func() error { + crp := &placementv1beta1.ClusterResourcePlacement{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp); err != nil { + return err + } + + var wantPlacementStatus []placementv1beta1.ResourcePlacementStatus + // For all the other connected member clusters rollout will be blocked. + rolloutBlockedPlacementStatus := placementv1beta1.ResourcePlacementStatus{ + Conditions: []metav1.Condition{ + { + Type: string(placementv1beta1.ResourceScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: condition.ScheduleSucceededReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ResourceRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + }, + } + + for i := 0; i < len(wantSelectedClusters); i++ { + wantPlacementStatus = append(wantPlacementStatus, rolloutBlockedPlacementStatus) + } + + wantCRPConditions := []metav1.Condition{ + { + Type: string(placementv1beta1.ClusterResourcePlacementScheduledConditionType), + Status: metav1.ConditionTrue, + Reason: scheduler.FullyScheduledReason, + ObservedGeneration: crp.Generation, + }, + { + Type: string(placementv1beta1.ClusterResourcePlacementRolloutStartedConditionType), + Status: metav1.ConditionFalse, + Reason: condition.RolloutNotStartedYetReason, + ObservedGeneration: crp.Generation, + }, + } + + wantStatus := placementv1beta1.ClusterResourcePlacementStatus{ + Conditions: wantCRPConditions, + PlacementStatuses: wantPlacementStatus, + SelectedResources: wantSelectedResourceIdentifiers, + ObservedResourceIndex: wantObservedResourceIndex, + } + + if diff := cmp.Diff(crp.Status, wantStatus, crpWithStuckRolloutStatusCmpOptions...); diff != "" { + return fmt.Errorf("CRP status diff (-got, +want): %s", diff) + } + return nil + } +} + +func validateWorkNamespaceOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + ns := &corev1.Namespace{} + if err := cluster.KubeClient.Get(ctx, name, ns); err != nil { + return err + } + + // Use the object created in the hub cluster as reference; this helps to avoid the trouble + // of having to ignore default fields in the spec. + wantNS := &corev1.Namespace{} + if err := hubClient.Get(ctx, name, wantNS); err != nil { + return err + } + + if diff := cmp.Diff( + ns, wantNS, + ignoreNamespaceStatusField, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("work namespace diff (-got, +want): %s", diff) + } + return nil +} + +func validateConfigMapOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + configMap := &corev1.ConfigMap{} + if err := cluster.KubeClient.Get(ctx, name, configMap); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantConfigMap := &corev1.ConfigMap{} + if err := hubClient.Get(ctx, name, wantConfigMap); err != nil { + return err + } + + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("app config map diff (-got, +want): %s", diff) + } + + return nil +} + +func validateJobOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + job := &batchv1.Job{} + if err := cluster.KubeClient.Get(ctx, name, job); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantJob := &batchv1.Job{} + if err := hubClient.Get(ctx, name, wantJob); err != nil { + return err + } + + if diff := cmp.Diff( + job, wantJob, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreJobSpecSelectorField, + ignorePodTemplateSpecObjectMetaField, + ignoreJobStatusField, + ); diff != "" { + return fmt.Errorf("job diff (-got, +want): %s", diff) + } + + return nil +} + +func validateServiceOnCluster(cluster *framework.Cluster, name types.NamespacedName) error { + service := &corev1.Service{} + if err := cluster.KubeClient.Get(ctx, name, service); err != nil { + return err + } + + // Use the object created in the hub cluster as reference. + wantService := &corev1.Service{} + if err := hubClient.Get(ctx, name, wantService); err != nil { + return err + } + + if diff := cmp.Diff( + service, wantService, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreServiceStatusField, + ignoreServiceSpecIPAndPolicyFields, + ignoreServicePortNodePortProtocolField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + + return nil +} + +func workNamespaceAndConfigMapPlacedOnClusterActual(cluster *framework.Cluster, workNamespaceName, appConfigMapName string) func() error { + return func() error { + if err := validateWorkNamespaceOnCluster(cluster, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateConfigMapOnCluster(cluster, types.NamespacedName{Namespace: workNamespaceName, Name: appConfigMapName}) + } +} diff --git a/test/upgrade/before/resources_test.go b/test/upgrade/before/resources_test.go new file mode 100644 index 000000000..6b94ef14f --- /dev/null +++ b/test/upgrade/before/resources_test.go @@ -0,0 +1,67 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package before + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +const ( + workNamespaceLabelName = "target-test-spec" +) + +func workResourceSelector(workNamespaceName string) []placementv1beta1.ClusterResourceSelector { + return []placementv1beta1.ClusterResourceSelector{ + { + Group: "", + Kind: "Namespace", + Version: "v1", + Name: workNamespaceName, + }, + } +} + +func appNamespace(workNamespaceName string, crpName string) corev1.Namespace { + return corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: workNamespaceName, + Labels: map[string]string{ + workNamespaceLabelName: crpName, + }, + }, + } +} + +func appConfigMap(workNamespaceName, appConfigMapName string) corev1.ConfigMap { + return corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: appConfigMapName, + Namespace: workNamespaceName, + }, + Data: map[string]string{ + "data": "test", + }, + } +} + +func workResourceIdentifiers(workNamespaceName, appConfigMapName string) []placementv1beta1.ResourceIdentifier { + return []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + }, + } +} diff --git a/test/upgrade/before/scenarios_test.go b/test/upgrade/before/scenarios_test.go new file mode 100644 index 000000000..9727b2b4d --- /dev/null +++ b/test/upgrade/before/scenarios_test.go @@ -0,0 +1,825 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package before + +import ( + "fmt" + + "github.com/google/go-cmp/cmp" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" +) + +var _ = Describe("CRP with trackable resources, all available (before upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-trackable-available" + workNamespaceName := "work-trackable-available" + appConfigMapName := "app-configmap-trackable-available" + + BeforeAll(func() { + // Create the resources. + createWorkResources(workNamespaceName, appConfigMapName, crpName) + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(crpName, workResourceIdentifiers(workNamespaceName, appConfigMapName), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on all member clusters", func() { + checkIfPlacedWorkResourcesOnAllMemberClusters(workNamespaceName, appConfigMapName) + }) +}) + +var _ = Describe("CRP with non-trackable resources, all available (before upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-non-trackable-available" + workNamespaceName := "work-non-trackable-available" + jobName := "job-non-trackable-available" + + BeforeAll(func() { + // Create the resources. + ns := appNamespace(workNamespaceName, crpName) + Expect(hubClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + // Job is currently untrackable in Fleet. + job := batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: workNamespaceName, + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: "busybox", + ImagePullPolicy: corev1.PullIfNotPresent, + Name: "busybox", + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + }, + } + Expect(hubClient.Create(ctx, &job)).To(Succeed(), "Failed to create job %s", job.Name) + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Group: "batch", + Kind: "Job", + Name: jobName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantResourceIdentifiers, allMemberClusterNames, nil, "0", false) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on all member clusters", func() { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateJobOnCluster(memberCluster, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}) + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } + }) +}) + +var _ = Describe("CRP with availability failure (before upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-availability-failure" + workNamespaceName := "work-availability-failure" + svcName := "svc-availability-failure" + + BeforeAll(func() { + // Create the resources. + ns := appNamespace(workNamespaceName, crpName) + Expect(hubClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + // Use a Service of the LoadBalancer type as by default KinD environment does not support + // this service type and such services will always be unavailable. + // + // Fleet support a few other API types for availability checks (e.g., Deployment, DaemonSet. + // etc.); however, they cannot be used in this test spec as they might spawn derived + // resources (ReplicaSets, ClusterRevisions, etc.) and may cause ownership conflicts if + // placed. + svc := corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: svcName, + Namespace: workNamespaceName, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Selector: map[string]string{ + "app": "nginx", + }, + Ports: []corev1.ServicePort{ + { + Port: 80, + TargetPort: intstr.FromInt(80), + }, + }, + }, + } + Expect(hubClient.Create(ctx, &svc)).To(Succeed(), "Failed to create daement set") + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{memberCluster1EastProdName}, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + wantFailedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithOneFailedAvailabilityCheckStatusUpdatedActual( + crpName, + wantResourceIdentifiers, + []string{memberCluster1EastProdName}, + wantFailedWorkloadResourceIdentifier, 0, + []string{}, + "0", + ) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on the member cluster", func() { + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateServiceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}) + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster1EastProd.ClusterName) + }) +}) + +var _ = Describe("CRP with apply op failure (before upgrade)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-apply-failure" + workNamespaceName := "work-apply-failure" + appConfigMapName := "app-configmap-apply-failure" + + BeforeAll(func() { + // Create the resources on the hub cluster. + createWorkResources(workNamespaceName, appConfigMapName, crpName) + + // Create the resources on the member cluster with a custom manager + ns := appNamespace(workNamespaceName, crpName) + Expect(memberCluster1EastProdClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + configMap := appConfigMap(workNamespaceName, appConfigMapName) + configMap.Data = map[string]string{ + "data": "foo", + } + Expect(memberCluster1EastProdClient.Create(ctx, &configMap, &client.CreateOptions{FieldManager: "custom"})).To(Succeed(), "Failed to create configMap") + + // Create the CRP. + // + // Apply would fail due to SSA owner (manager) conflicts. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickFixedPlacementType, + ClusterNames: []string{memberCluster1EastProdName}, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + }, + ApplyStrategy: &placementv1beta1.ApplyStrategy{ + Type: placementv1beta1.ApplyStrategyTypeServerSideApply, + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + wantFailedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithOneFailedApplyOpStatusUpdatedActual( + crpName, + workResourceIdentifiers(workNamespaceName, appConfigMapName), + []string{memberCluster1EastProdName}, + wantFailedWorkloadResourceIdentifier, 0, + []string{}, + "0", + ) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place some resources on the member cluster", func() { + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + return validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}) + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster1EastProd.ClusterName) + }) + + It("should not place some resources on the member cluster", func() { + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: appConfigMapName, + Namespace: workNamespaceName, + }, + Data: map[string]string{ + "data": "foo", + }, + } + + wantConfigMap := &corev1.ConfigMap{} + if err := memberCluster1EastProdClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, wantConfigMap); err != nil { + return err + } + + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("app config map diff (-got, +want): %s", diff) + } + + return nil + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to skip resource placement") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (blocked by availability failure)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-availability-failure-stuck" + workNamespaceName := "work-availability-failure-stuck" + svcName := "svc-availability-failure-stuck" + + var originalSvc *corev1.Service + BeforeAll(func() { + // Create the resources. + ns := appNamespace(workNamespaceName, crpName) + Expect(hubClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + // Use a Service of the ClusterIP type. KinD supports it and it will become available + // once an IP has been assigned. + // + // Fleet support a few other API types for availability checks (e.g., Deployment, DaemonSet. + // etc.); however, they cannot be used in this test spec as they might spawn derived + // resources (ReplicaSets, ClusterRevisions, etc.) and may cause ownership conflicts if + // placed. + originalSvc = &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: svcName, + Namespace: workNamespaceName, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + "app": "nginx", + }, + Ports: []corev1.ServicePort{ + { + Port: 80, + TargetPort: intstr.FromInt(80), + }, + }, + }, + } + Expect(hubClient.Create(ctx, originalSvc.DeepCopy())).To(Succeed(), "Failed to create daement set") + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Policy: &placementv1beta1.PlacementPolicy{ + PlacementType: placementv1beta1.PickAllPlacementType, + }, + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + MaxUnavailable: ptr.To(intstr.FromInt(1)), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := customizedCRPStatusUpdatedActual( + crpName, + wantResourceIdentifiers, + allMemberClusterNames, + []string{}, + "0", + true, + ) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on the member cluster", func() { + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateServiceOnCluster(memberCluster1EastProd, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}) + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster1EastProd.ClusterName) + }) + + It("can update the service objects", func() { + // Update the service object. + svc := &corev1.Service{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}, svc)).To(Succeed(), "Failed to get service %s", svcName) + + // KinD does not support LoadBalancer typed Services; no LB can be provisioned and Fleet + // will consider the service to be of an unavailable state. + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + Expect(hubClient.Update(ctx, svc)).To(Succeed(), "Failed to update service %s", svcName) + }) + + It("should update CRP status as expected (rollout blocked due to unavailable objects)", func() { + wantSelectedResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + failedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "Service", + Name: svcName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToOneFailedAvailabilityCheckStatusUpdatedActual( + crpName, + wantSelectedResourceIdentifiers, + failedWorkloadResourceIdentifier, 0, + allMemberClusterNames, + "1", + ) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should update objects and leave objects alone on respective clusters", func() { + // Retrieve the CRP for its status. + crp := &placementv1beta1.ClusterResourcePlacement{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: crpName}, crp)).To(Succeed(), "Failed to get CRP") + + // Find the clusters that have the updated Service object and those that do not. + clustersWithUpdatedService := map[string]struct{}{} + clustersWithOldService := map[string]struct{}{} + for idx := range crp.Status.PlacementStatuses { + rps := crp.Status.PlacementStatuses[idx] + availableCond := meta.FindStatusCondition(rps.Conditions, string(placementv1beta1.ResourcesAvailableConditionType)) + switch { + case availableCond == nil: + clustersWithOldService[rps.ClusterName] = struct{}{} + case availableCond.Status == metav1.ConditionFalse: + clustersWithUpdatedService[rps.ClusterName] = struct{}{} + default: + Fail(fmt.Sprintf("Found an unexpected availability reporting \n(%v)", rps)) + } + } + + // Validate things on the clusters with updated resources. + Eventually(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + if _, ok := clustersWithUpdatedService[memberCluster.ClusterName]; ok { + // No need to validate the NS as it is unchanged. + if err := validateServiceOnCluster(memberCluster, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}); err != nil { + return err + } + } + } + return nil + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to update objects on clusters") + + // Validate things on the clusters with old resources. + Eventually(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + if _, ok := clustersWithOldService[memberCluster.ClusterName]; ok { + svc := &corev1.Service{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: svcName, Namespace: workNamespaceName}, svc); err != nil { + return fmt.Errorf("failed to retrieve svc on cluster %s: %w", memberCluster.ClusterName, err) + } + + if diff := cmp.Diff( + svc, originalSvc, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ignoreServiceStatusField, + ignoreServiceSpecIPAndPolicyFields, + ignoreServicePortNodePortProtocolField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + } + } + return nil + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (blocked by apply op failure)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-apply-failure-stuck" + workNamespaceName := "work-apply-failure-stuck" + appConfigMapName := "app-configmap-apply-failure-stuck" + + BeforeAll(func() { + // Create the resources. + createWorkResources(workNamespaceName, appConfigMapName, crpName) + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + UnavailablePeriodSeconds: ptr.To(2), + MaxUnavailable: ptr.To(intstr.FromInt(1)), + }, + ApplyStrategy: &placementv1beta1.ApplyStrategy{ + Type: placementv1beta1.ApplyStrategyTypeServerSideApply, + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + crpStatusUpdatedActual := crpStatusUpdatedActual(crpName, workResourceIdentifiers(workNamespaceName, appConfigMapName), allMemberClusterNames, nil, "0") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on all member clusters", func() { + checkIfPlacedWorkResourcesOnAllMemberClusters(workNamespaceName, appConfigMapName) + }) + + It("assume ownership", func() { + // Update the ConfigMap object on all member clusters. + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + + configMap := &corev1.ConfigMap{} + Expect(memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, configMap)).To(Succeed(), "Failed to get config map %s", appConfigMapName) + + configMap.TypeMeta = metav1.TypeMeta{ + APIVersion: "v1", + Kind: "ConfigMap", + } + configMap.Data["custom"] = "foo" + // Unset this field as required by the server. + configMap.ObjectMeta.ManagedFields = nil + Expect(memberCluster.KubeClient.Patch(ctx, configMap, client.Apply, &client.PatchOptions{FieldManager: "handover", Force: ptr.To(true)})).To(Succeed(), "Failed to update config map %s", appConfigMapName) + } + }) + + It("can update the config map", func() { + // Update the ConfigMap object on the hub cluster. + configMap := &corev1.ConfigMap{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, configMap)).To(Succeed(), "Failed to get config map %s", appConfigMapName) + + // This change cannot be applied as the added field is not currently managed by Fleet. + configMap.Data["custom"] = "baz" + Expect(hubClient.Update(ctx, configMap)).To(Succeed(), "Failed to update config map %s", appConfigMapName) + }) + + It("should update CRP status as expected (rollout blocked due to apply op failures)", func() { + wantSelectedResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + failedWorkloadResourceIdentifier := placementv1beta1.ResourceIdentifier{ + Kind: "ConfigMap", + Name: appConfigMapName, + Version: "v1", + Namespace: workNamespaceName, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToOneFailedApplyOpStatusUpdatedActual( + crpName, + wantSelectedResourceIdentifiers, + failedWorkloadResourceIdentifier, 0, + allMemberClusterNames, + "1", + ) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should leave objects alone on all clusters", func() { + // Validate things on the clusters with old resources. + Eventually(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + + configMap := &corev1.ConfigMap{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: appConfigMapName, Namespace: workNamespaceName}, configMap); err != nil { + return fmt.Errorf("failed to retrieve config map on cluster %s: %w", memberCluster.ClusterName, err) + } + + wantConfigMap := configMap.DeepCopy() + wantConfigMap.Data["custom"] = "foo" + if diff := cmp.Diff( + configMap, wantConfigMap, + ignoreObjectMetaAutoGeneratedFields, + ignoreObjectMetaAnnotationField, + ); diff != "" { + return fmt.Errorf("service diff (-got, +want): %s", diff) + } + } + + return nil + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) + +var _ = Describe("CRP stuck in the rollout process (long wait time)", Ordered, func() { + // The names specified must match with those in the corresponding node from the after upgrade + // test stage. + crpName := "crp-non-trackable-stuck" + workNamespaceName := "work-non-trackable-stuck" + jobName := "job-non-trackable-stuck" + + var originalJob *batchv1.Job + BeforeAll(func() { + // Create the resources. + ns := appNamespace(workNamespaceName, crpName) + Expect(hubClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + // Job is currently untrackable in Fleet. + originalJob = &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: jobName, + Namespace: workNamespaceName, + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Image: "busybox", + ImagePullPolicy: corev1.PullIfNotPresent, + Name: "busybox", + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + }, + BackoffLimit: ptr.To(int32(6)), + }, + } + Expect(hubClient.Create(ctx, originalJob.DeepCopy())).To(Succeed(), "Failed to create job") + + // Create the CRP. + crp := &placementv1beta1.ClusterResourcePlacement{ + ObjectMeta: metav1.ObjectMeta{ + Name: crpName, + }, + Spec: placementv1beta1.ClusterResourcePlacementSpec{ + ResourceSelectors: workResourceSelector(workNamespaceName), + Strategy: placementv1beta1.RolloutStrategy{ + Type: placementv1beta1.RollingUpdateRolloutStrategyType, + RollingUpdate: &placementv1beta1.RollingUpdateConfig{ + // Block rollout for a long time if there are untrackable resources. + UnavailablePeriodSeconds: ptr.To(3600), + }, + }, + }, + } + Expect(hubClient.Create(ctx, crp)).To(Succeed(), "Failed to create CRP") + }) + + It("should update CRP status as expected", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Group: "batch", + Kind: "Job", + Name: jobName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := customizedCRPStatusUpdatedActual(crpName, wantResourceIdentifiers, allMemberClusterNames, nil, "0", false) + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should place the resources on all member clusters", func() { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + // Give the system a bit more breathing room when process resource placement. + Eventually(func() error { + if err := validateWorkNamespaceOnCluster(memberCluster, types.NamespacedName{Name: workNamespaceName}); err != nil { + return err + } + + return validateJobOnCluster(memberCluster, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}) + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } + }) + + It("can update the job object", func() { + // Update the service object. + job := &batchv1.Job{} + Expect(hubClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}, job)).To(Succeed(), "Failed to get job %s", jobName) + + job.Spec.BackoffLimit = ptr.To(int32(10)) + Expect(hubClient.Update(ctx, job)).To(Succeed(), "Failed to update job %s", jobName) + }) + + It("should update CRP status as expected (rollout blocked due to long wait time)", func() { + wantResourceIdentifiers := []placementv1beta1.ResourceIdentifier{ + { + Kind: "Namespace", + Name: workNamespaceName, + Version: "v1", + }, + { + Group: "batch", + Kind: "Job", + Name: jobName, + Version: "v1", + Namespace: workNamespaceName, + }, + } + crpStatusUpdatedActual := crpWithStuckRolloutDueToUntrackableResourcesStatusUpdatedActual(crpName, wantResourceIdentifiers, allMemberClusterNames, "1") + Eventually(crpStatusUpdatedActual, eventuallyDuration, eventuallyInterval).Should(Succeed(), "Failed to update CRP status as expected") + }) + + It("should leave objects alone on all clusters", func() { + // Validate things on the clusters with old resources. + Eventually(func() error { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + + job := &batchv1.Job{} + if err := memberCluster.KubeClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: workNamespaceName}, job); err != nil { + return fmt.Errorf("failed to retrieve job on cluster %s: %w", memberCluster.ClusterName, err) + } + + // For simplicity, we only check the BackoffLimit field. + if !cmp.Equal(job.Spec.BackoffLimit, originalJob.Spec.BackoffLimit) { + return fmt.Errorf("job backoff limit mismatches on cluster %s", memberCluster.ClusterName) + } + } + return nil + }, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to leave objects alone on clusters") + }) +}) diff --git a/test/upgrade/before/setup_test.go b/test/upgrade/before/setup_test.go new file mode 100644 index 000000000..d7d339238 --- /dev/null +++ b/test/upgrade/before/setup_test.go @@ -0,0 +1,228 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package before + +import ( + "context" + "flag" + "log" + "os" + "sync" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + k8sscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/klog/v2" + clusterinventory "sigs.k8s.io/cluster-inventory-api/apis/v1alpha1" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1" + placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1" + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/test/e2e/framework" +) + +const ( + // The names of the hub cluster + the member clusters set up in this E2E test environment. + // + // Note that these names must match with those in `setup.sh`, with a prefix `kind-`. + hubClusterName = "kind-hub" + memberCluster1EastProdName = "kind-cluster-1" + memberCluster2EastCanaryName = "kind-cluster-2" + memberCluster3WestProdName = "kind-cluster-3" + + // The names of the service accounts used by specific member clusters. + // + // Note that these names must also match those in `setup.sh`. + memberCluster1EastProdSAName = "fleet-member-agent-cluster-1" + memberCluster2EastCanarySAName = "fleet-member-agent-cluster-2" + memberCluster3WestProdSAName = "fleet-member-agent-cluster-3" + + hubClusterSAName = "fleet-hub-agent" + fleetSystemNS = "fleet-system" + + kubeConfigPathEnvVarName = "KUBECONFIG" +) + +const ( + // Do not bump this value unless you have a good reason. This is to safeguard any performance related regressions. + eventuallyDuration = time.Second * 10 + // This is for eventually timeouts in the cluster setup steps. + longEventuallyDuration = time.Minute * 2 + eventuallyInterval = time.Millisecond * 250 + consistentlyDuration = time.Second * 15 + consistentlyInterval = time.Millisecond * 500 +) + +var ( + ctx = context.Background() + scheme = runtime.NewScheme() + once = sync.Once{} + + hubCluster *framework.Cluster + memberCluster1EastProd *framework.Cluster + memberCluster2EastCanary *framework.Cluster + memberCluster3WestProd *framework.Cluster + + hubClient client.Client + impersonateHubClient client.Client + memberCluster1EastProdClient client.Client + memberCluster2EastCanaryClient client.Client + memberCluster3WestProdClient client.Client + + allMemberClusters []*framework.Cluster + allMemberClusterNames = []string{} +) + +var ( + lessFuncConditionByType = func(a, b metav1.Condition) bool { + return a.Type < b.Type + } + lessFuncPlacementStatusByClusterName = func(a, b placementv1beta1.ResourcePlacementStatus) bool { + return a.ClusterName < b.ClusterName + } + lessFuncPlacementStatusByConditions = func(a, b placementv1beta1.ResourcePlacementStatus) bool { + return len(a.Conditions) < len(b.Conditions) + } + + ignoreObjectMetaAutoGeneratedFields = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "CreationTimestamp", "ResourceVersion", "Generation", "ManagedFields", "OwnerReferences") + ignoreObjectMetaAnnotationField = cmpopts.IgnoreFields(metav1.ObjectMeta{}, "Annotations") + ignoreConditionObservedGenerationField = cmpopts.IgnoreFields(metav1.Condition{}, "ObservedGeneration") + ignoreAgentStatusHeartbeatField = cmpopts.IgnoreFields(clusterv1beta1.AgentStatus{}, "LastReceivedHeartbeat") + ignoreNamespaceStatusField = cmpopts.IgnoreFields(corev1.Namespace{}, "Status") + ignoreJobSpecSelectorField = cmpopts.IgnoreFields(batchv1.JobSpec{}, "Selector") + ignorePodTemplateSpecObjectMetaField = cmpopts.IgnoreFields(corev1.PodTemplateSpec{}, "ObjectMeta") + ignoreJobStatusField = cmpopts.IgnoreFields(batchv1.Job{}, "Status") + ignoreServiceStatusField = cmpopts.IgnoreFields(corev1.Service{}, "Status") + ignoreServiceSpecIPAndPolicyFields = cmpopts.IgnoreFields(corev1.ServiceSpec{}, "ClusterIP", "ClusterIPs", "ExternalIPs", "SessionAffinity", "IPFamilies", "IPFamilyPolicy", "InternalTrafficPolicy") + ignoreServicePortNodePortProtocolField = cmpopts.IgnoreFields(corev1.ServicePort{}, "NodePort", "Protocol") + ignoreRPSClusterNameField = cmpopts.IgnoreFields(placementv1beta1.ResourcePlacementStatus{}, "ClusterName") + + crpStatusCmpOptions = cmp.Options{ + cmpopts.SortSlices(lessFuncConditionByType), + cmpopts.SortSlices(lessFuncPlacementStatusByClusterName), + cmpopts.SortSlices(utils.LessFuncResourceIdentifier), + cmpopts.SortSlices(utils.LessFuncFailedResourcePlacements), + utils.IgnoreConditionLTTAndMessageFields, + cmpopts.EquateEmpty(), + } + crpWithStuckRolloutStatusCmpOptions = cmp.Options{ + cmpopts.SortSlices(lessFuncConditionByType), + cmpopts.SortSlices(lessFuncPlacementStatusByConditions), + cmpopts.SortSlices(utils.LessFuncResourceIdentifier), + cmpopts.SortSlices(utils.LessFuncFailedResourcePlacements), + utils.IgnoreConditionLTTAndMessageFields, + ignoreRPSClusterNameField, + cmpopts.EquateEmpty(), + } +) + +// TestMain sets up the E2E test environment. +func TestMain(m *testing.M) { + // Add custom APIs to the scheme. + if err := clusterv1beta1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (cluster) to the runtime scheme: %v", err) + } + if err := placementv1alpha1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (placement v1alpha1) to the runtime scheme: %v", err) + } + if err := placementv1beta1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add custom APIs (placement) to the runtime scheme: %v", err) + } + + // Add built-in APIs and extensions to the scheme. + if err := k8sscheme.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add built-in APIs to the runtime scheme: %v", err) + } + if err := apiextensionsv1.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add API extensions to the runtime scheme: %v", err) + } + if err := clusterinventory.AddToScheme(scheme); err != nil { + log.Fatalf("failed to add cluster inventory APIs to the runtime scheme: %v", err) + } + + os.Exit(m.Run()) +} + +func TestE2E(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Fleet Agent Upgrade Test Suite") +} + +func beforeSuiteForAllProcesses() { + // Set up the logger. + logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)) + klog.SetLogger(logger) + ctrllog.SetLogger(logger) + By("Setup klog") + fs := flag.NewFlagSet("klog", flag.ContinueOnError) + klog.InitFlags(fs) + Expect(fs.Parse([]string{"--v", "5", "-add_dir_header", "true"})).Should(Succeed()) + + // Check if the required environment variable, which specifies the path to kubeconfig file, has been set. + Expect(os.Getenv(kubeConfigPathEnvVarName)).NotTo(BeEmpty(), "Required environment variable KUBECONFIG is not set") + + // Initialize the cluster objects and their clients. + hubCluster = framework.NewCluster(hubClusterName, "", scheme, nil) + Expect(hubCluster).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(hubCluster) + hubClient = hubCluster.KubeClient + Expect(hubClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + impersonateHubClient = hubCluster.ImpersonateKubeClient + Expect(impersonateHubClient).NotTo(BeNil(), "Failed to initialize impersonate client for accessing Kubernetes cluster") + + memberCluster1EastProd = framework.NewCluster(memberCluster1EastProdName, memberCluster1EastProdSAName, scheme, nil) + Expect(memberCluster1EastProd).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster1EastProd) + memberCluster1EastProdClient = memberCluster1EastProd.KubeClient + Expect(memberCluster1EastProdClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + + memberCluster2EastCanary = framework.NewCluster(memberCluster2EastCanaryName, memberCluster2EastCanarySAName, scheme, nil) + Expect(memberCluster2EastCanary).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster2EastCanary) + memberCluster2EastCanaryClient = memberCluster2EastCanary.KubeClient + Expect(memberCluster2EastCanaryClient).NotTo(BeNil(), "Failed to initialize client for accessing Kubernetes cluster") + + memberCluster3WestProd = framework.NewCluster(memberCluster3WestProdName, memberCluster3WestProdSAName, scheme, nil) + Expect(memberCluster3WestProd).NotTo(BeNil(), "Failed to initialize cluster object") + framework.GetClusterClient(memberCluster3WestProd) + memberCluster3WestProdClient = memberCluster3WestProd.KubeClient + Expect(memberCluster3WestProdClient).NotTo(BeNil(), "Failed to initialize client for accessing kubernetes cluster") + + allMemberClusters = []*framework.Cluster{memberCluster1EastProd, memberCluster2EastCanary, memberCluster3WestProd} + once.Do(func() { + // Set these arrays only once; this is necessary as for the first spawned Ginkgo process, + // the `beforeSuiteForAllProcesses` function is called twice. + for i := range allMemberClusters { + allMemberClusterNames = append(allMemberClusterNames, allMemberClusters[i].ClusterName) + } + }) +} + +func beforeSuiteForProcess1() { + beforeSuiteForAllProcesses() + + setAllMemberClustersToJoin() + checkIfAllMemberClustersHaveJoined() +} + +var _ = SynchronizedBeforeSuite(beforeSuiteForProcess1, beforeSuiteForAllProcesses) + +// For upgrade tests in the before stage, there is no need to tear down the test environment +// (i.e., no AfterSuite node). diff --git a/test/upgrade/before/utils_test.go b/test/upgrade/before/utils_test.go new file mode 100644 index 000000000..00f0ce0a1 --- /dev/null +++ b/test/upgrade/before/utils_test.go @@ -0,0 +1,128 @@ +/* +Copyright (c) Microsoft Corporation. +Licensed under the MIT license. +*/ + +package before + +import ( + "fmt" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1" + imcv1beta1 "go.goms.io/fleet/pkg/controllers/internalmembercluster/v1beta1" + "go.goms.io/fleet/pkg/utils" + "go.goms.io/fleet/test/e2e/framework" +) + +// createMemberCluster creates a MemberCluster object. +func createMemberCluster(name, svcAccountName string, labels, annotations map[string]string) { + mcObj := &clusterv1beta1.MemberCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: labels, + Annotations: annotations, + }, + Spec: clusterv1beta1.MemberClusterSpec{ + Identity: rbacv1.Subject{ + Name: svcAccountName, + Kind: "ServiceAccount", + Namespace: fleetSystemNS, + }, + HeartbeatPeriodSeconds: 60, + }, + } + Expect(hubClient.Create(ctx, mcObj)).To(Succeed(), "Failed to create member cluster object %s", name) +} + +// setAllMemberClustersToJoin creates a MemberCluster object for each member cluster. +func setAllMemberClustersToJoin() { + for idx := range allMemberClusters { + memberCluster := allMemberClusters[idx] + createMemberCluster(memberCluster.ClusterName, memberCluster.PresentingServiceAccountInHubClusterName, nil, nil) + } +} + +// checkIfMemberClusterHasJoined verifies if the specified member cluster has connected to the hub +// cluster, i.e., updated the MemberCluster object status as expected. +func checkIfMemberClusterHasJoined(memberCluster *framework.Cluster) { + wantAgentStatus := []clusterv1beta1.AgentStatus{ + { + Type: clusterv1beta1.MemberAgent, + Conditions: []metav1.Condition{ + { + Status: metav1.ConditionTrue, + Type: string(clusterv1beta1.AgentHealthy), + Reason: imcv1beta1.EventReasonInternalMemberClusterHealthy, + }, + { + Status: metav1.ConditionTrue, + Type: string(clusterv1beta1.AgentJoined), + Reason: imcv1beta1.EventReasonInternalMemberClusterJoined, + }, + }, + }, + } + + Eventually(func() error { + mcObj := &clusterv1beta1.MemberCluster{} + if err := hubClient.Get(ctx, types.NamespacedName{Name: memberCluster.ClusterName}, mcObj); err != nil { + By(fmt.Sprintf("Failed to get member cluster object %s", memberCluster.ClusterName)) + return err + } + + if diff := cmp.Diff( + mcObj.Status.AgentStatus, + wantAgentStatus, + cmpopts.SortSlices(lessFuncConditionByType), + ignoreConditionObservedGenerationField, + utils.IgnoreConditionLTTAndMessageFields, + ignoreAgentStatusHeartbeatField, + ); diff != "" { + return fmt.Errorf("agent status diff (-got, +want): %s", diff) + } + + return nil + }, longEventuallyDuration, eventuallyInterval).Should(Succeed(), "Member cluster has not joined yet") +} + +// checkIfAllMemberClustersHaveJoined verifies if all member clusters have connected to the hub +// cluster, i.e., updated the MemberCluster object status as expected. +func checkIfAllMemberClustersHaveJoined() { + for idx := range allMemberClusters { + checkIfMemberClusterHasJoined(allMemberClusters[idx]) + } +} + +// createWorkResources creates some resources on the hub cluster for testing purposes. +func createWorkResources(workNamespaceName, appConfigMapName, crpName string) { + ns := appNamespace(workNamespaceName, crpName) + Expect(hubClient.Create(ctx, &ns)).To(Succeed(), "Failed to create namespace %s", ns.Namespace) + + configMap := appConfigMap(workNamespaceName, appConfigMapName) + Expect(hubClient.Create(ctx, &configMap)).To(Succeed(), "Failed to create config map %s", configMap.Name) +} + +// checkIfPlacedWorkResourcesOnAllMemberClusters verifies if the work resources have been placed on +// all applicable member clusters. +func checkIfPlacedWorkResourcesOnAllMemberClusters(workNamespaceName, appConfigMapName string) { + checkIfPlacedWorkResourcesOnMemberClusters(allMemberClusters, workNamespaceName, appConfigMapName) +} + +// checkIfPlacedWorkResourcesOnMemberClusters verifies if the work resources have been placed on +// the specified set of member clusters. +func checkIfPlacedWorkResourcesOnMemberClusters(clusters []*framework.Cluster, workNamespaceName, appConfigMapName string) { + for idx := range clusters { + memberCluster := clusters[idx] + workResourcesPlacedActual := workNamespaceAndConfigMapPlacedOnClusterActual(memberCluster, workNamespaceName, appConfigMapName) + // Give the system a bit more breathing room when process resource placement. + Eventually(workResourcesPlacedActual, eventuallyDuration*2, eventuallyInterval).Should(Succeed(), "Failed to place work resources on member cluster %s", memberCluster.ClusterName) + } +} diff --git a/test/upgrade/setup.sh b/test/upgrade/setup.sh new file mode 100755 index 000000000..daf501e5b --- /dev/null +++ b/test/upgrade/setup.sh @@ -0,0 +1,156 @@ +#!/usr/bin/env bash + +set -o errexit +set -o nounset +set -o pipefail + +# Before updating the default kind image to use, verify that the version is supported +# by the current kind release. +KIND_IMAGE="${KIND_IMAGE:-kindest/node:v1.30.0}" +KUBECONFIG="${KUBECONFIG:-$HOME/.kube/config}" +MEMBER_CLUSTER_COUNT=$1 + +HUB_CLUSTER="hub" +declare -a MEMBER_CLUSTERS=() + +for (( i=1;i<=MEMBER_CLUSTER_COUNT;i++ )) +do + MEMBER_CLUSTERS+=("cluster-$i") +done + +export REGISTRY="${REGISTRY:-ghcr.io}" +export IMAGE_TAG="${IMAGE_TAG:-before-upgrade}" +export OUTPUT_TYPE="${OUTPUT_TYPE:-type=docker}" +export HUB_AGENT_IMAGE="${HUB_AGENT_IMAGE:-hub-agent}" +export MEMBER_AGENT_IMAGE="${MEMBER_AGENT_IMAGE:-member-agent}" +export REFRESH_TOKEN_IMAGE="${REFRESH_TOKEN_IMAGE:-refresh-token}" +export GIT_TAG="${GIT_TAG:-}" + +PREVIOUS_BRANCH="" +PREVIOUS_COMMIT="" +if [ -z "${GIT_TAG}" ]; then + echo "No tag is specified; use the latest tag." + + PREVIOUS_BRANCH=$(git branch --show-current) + PREVIOUS_COMMIT=$(git rev-parse HEAD) + echo "Current at branch $PREVIOUS_BRANCH, commit $PREVIOUS_COMMIT." + + echo "Fetch all tags..." + git fetch --all + GIT_TAG=$(git describe --tags $(git rev-list --tags --max-count=1)) + git checkout $GIT_TAG + echo "Checked out source code at $GIT_TAG." + + echo "Switch back to the root directory to avoid consistency issues." + cd ../.. +fi + +# Build the Fleet agent images. +echo "Building and the Fleet agent images..." + +TAG=$IMAGE_TAG make docker-build-hub-agent +TAG=$IMAGE_TAG make docker-build-member-agent +TAG=$IMAGE_TAG make docker-build-refresh-token + +# Restore to the previous branch. This must be done immediately after the image building to avoid +# consistency issues. +if [ -n "$PREVIOUS_COMMIT" ]; then + git checkout $PREVIOUS_COMMIT + echo "Checked out source code at $PREVIOUS_COMMIT." +fi + +echo "Switch back to the test/upgrade directory to avoid consistency issues." +cd test/upgrade + +# Create the kind clusters. +echo "Creating the kind clusters..." + +# Create the hub cluster. +kind create cluster --name $HUB_CLUSTER --image=$KIND_IMAGE --kubeconfig=$KUBECONFIG + +# Create the member clusters. +for (( i=0; i<${MEMBER_CLUSTER_COUNT}; i++ )); +do + kind create cluster --name "${MEMBER_CLUSTERS[$i]}" --image=$KIND_IMAGE --kubeconfig=$KUBECONFIG +done + +# Load the Fleet agent images into the kind clusters. + +# Load the hub agent image into the hub cluster. +kind load docker-image --name $HUB_CLUSTER $REGISTRY/$HUB_AGENT_IMAGE:$IMAGE_TAG + +# Load the member agent image and the refresh token image into the member clusters. +for i in "${MEMBER_CLUSTERS[@]}" +do + kind load docker-image --name "$i" $REGISTRY/$MEMBER_AGENT_IMAGE:$IMAGE_TAG + kind load docker-image --name "$i" $REGISTRY/$REFRESH_TOKEN_IMAGE:$IMAGE_TAG +done + +# Install the helm charts. + +# Install the hub agent to the hub cluster. +kind export kubeconfig --name $HUB_CLUSTER +helm install hub-agent ../../charts/hub-agent/ \ + --set image.pullPolicy=Never \ + --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ + --set image.tag=$IMAGE_TAG \ + --set namespace=fleet-system \ + --set logVerbosity=5 \ + --set enableWebhook=false \ + --set webhookClientConnectionType=service \ + --set forceDeleteWaitTime="1m0s" \ + --set clusterUnhealthyThreshold="3m0s" \ + --set logFileMaxSize=1000000 + +# Install the member agent and related components to the member clusters. + +# Set up a service account for each member in the hub cluster. +# +# Note that these service account has no permission set up at all; the authorization will be +# configured by the hub agent. +for i in "${MEMBER_CLUSTERS[@]}" +do + kubectl create serviceaccount fleet-member-agent-$i -n fleet-system + cat <