diff --git a/Makefile b/Makefile index 50add359..61a7362d 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,7 @@ GOLANGCI_LINT = $(LOCALBIN)/golangci-lint-$(GOLANGCI_LINT_VERSION) ## Tool Versions CONTROLLER_TOOLS_VERSION ?= v0.17.2 ENVTEST_VERSION ?= release-0.20 -GOLANGCI_LINT_VERSION ?= v2.1.6 +GOLANGCI_LINT_VERSION ?= v2.7.2 .PHONY: controller-gen controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. @@ -154,7 +154,7 @@ $(ENVTEST): $(LOCALBIN) .PHONY: golangci-lint golangci-lint: $(GOLANGCI_LINT) ## Download golangci-lint locally if necessary. $(GOLANGCI_LINT): $(LOCALBIN) - $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint,${GOLANGCI_LINT_VERSION}) + $(call go-install-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,${GOLANGCI_LINT_VERSION}) golangci-lint-docker: docker run --rm -v $(shell pwd):/app -w /app golangci/golangci-lint:${GOLANGCI_LINT_VERSION} golangci-lint run -v diff --git a/docs/async-eni-queue.md b/docs/async-eni-queue.md new file mode 100644 index 00000000..1c89f9de --- /dev/null +++ b/docs/async-eni-queue.md @@ -0,0 +1,67 @@ +# Async ENI Task Queue Design + +## Overview + +The Async ENI Task Queue is designed to decouple the blocking ENI attachment operations from the main Node reconciliation loop in the Terway controller. This improves the responsiveness of the controller and prevents blocking the reconciliation worker threads during slow Aliyun API calls. + +## Key Components + +### 1. ENITaskQueue (`pkg/controller/multi-ip/node/eni_task_queue.go`) + +The core component that manages the lifecycle of async ENI operations. + +- **In-Memory Queue**: Stores task state (`ENITaskRecord`) keyed by ENI ID. +- **Async Processing**: Uses goroutines to handle individual attach tasks. +- **Notification**: Signals the controller via a channel when a task completes. + +### 2. Executor (`pkg/eni/ops/executor.go`) + +Provides the low-level ENI operations. + +- **AttachAsync**: Initiates the attach operation via Aliyun API (non-blocking). +- **CheckStatus**: Checks the current status of an ENI. +- **Wait Logic**: Handles backoff and polling for status changes. + +## Workflow + +### 1. Submission + +When the controller determines a new ENI is needed: + +1. It creates the ENI via OpenAPI (blocking, as it's fast). +2. It calls `SubmitAttach` to queue the attach operation. +3. The task is added to the map with `Pending` status. +4. A background goroutine is started for the task. +5. The Node CR status is optimistically updated to `Attaching`. + +### 2. Processing (`processAttachTask`) + +The background goroutine performs the following steps: + +1. **Status Check**: Verifies the current ENI status. If already `InUse`, marks as `Completed` (handles controller restarts). +2. **Initiate Attach**: Calls `AttachAsync` if needed. +3. **Wait**: Sleeps for an initial delay (based on ENI type). +4. **Poll**: Polls the API until the status becomes `InUse` or timeout. +5. **Completion**: Updates the task status to `Completed` or `Failed` and notifies the controller. + +### 3. Reconciliation (`syncTaskQueueStatus`) + +In the main `Reconcile` loop: + +1. The controller calls `GetCompletedTasks`. +2. Completed tasks are **removed** from the queue. +3. The Node CR is updated with the result (e.g., `InUse` status, IP details). + +## State Machine + +- **Pending**: Task submitted, waiting to start. +- **Running**: Goroutine started, operation in progress. +- **Completed**: Operation successful. +- **Failed**: Operation failed (API error). +- **Timeout**: Operation timed out. + +## Reliability & Idempotency + +- **Duplicate Submission**: `SubmitAttach` ignores tasks that are already `Pending` or `Running`. +- **Controller Restarts**: The `processAttachTask` first checks the actual ENI status. If the ENI was attached during a previous run (but status wasn't updated), it detects this and completes immediately. +- **Reconciliation Loop**: The controller periodically reconciles and checks for completed tasks, ensuring the CR state eventually matches the actual state. diff --git a/pkg/controller/multi-ip/node/eni_task_queue.go b/pkg/controller/multi-ip/node/eni_task_queue.go new file mode 100644 index 00000000..9ad2dbe9 --- /dev/null +++ b/pkg/controller/multi-ip/node/eni_task_queue.go @@ -0,0 +1,463 @@ +/* +Copyright 2025 Terway Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + "time" + + "github.com/go-logr/logr" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + aliyunClient "github.com/AliyunContainerService/terway/pkg/aliyun/client" + "github.com/AliyunContainerService/terway/pkg/eni/ops" +) + +// ENIOperation represents an ENI operation type +type ENIOperation string + +const ( + OpAttach ENIOperation = "Attach" + OpDetach ENIOperation = "Detach" + OpDelete ENIOperation = "Delete" +) + +// ENITaskStatus represents the status of an ENI task +type ENITaskStatus string + +const ( + TaskStatusPending ENITaskStatus = "Pending" + TaskStatusRunning ENITaskStatus = "Running" + TaskStatusCompleted ENITaskStatus = "Completed" + TaskStatusFailed ENITaskStatus = "Failed" + TaskStatusTimeout ENITaskStatus = "Timeout" +) + +// ENITaskRecord stores the state of an ENI operation task +type ENITaskRecord struct { + ENIID string + Operation ENIOperation + InstanceID string + TrunkENIID string + NodeName string + + // BackendAPI stores the backend type (ECS or EFLO) for this task. + // This is needed because the task runs asynchronously and needs to use + // the correct backend API for attach/query operations. + BackendAPI aliyunClient.BackendAPI + + Status ENITaskStatus + CreatedAt time.Time + CompletedAt *time.Time + + // Record the number of IPs requested when creating ENI, used for quota calculation + RequestedIPv4Count int + RequestedIPv6Count int + + // Result after completion + ENIInfo *aliyunClient.NetworkInterface + Error error +} + +// ENITaskQueue manages async ENI operations +type ENITaskQueue struct { + ctx context.Context + mu sync.RWMutex + tasks map[string]*ENITaskRecord // key: ENIID + + executor *ops.Executor + notifyCh chan string // node name to notify + + log logr.Logger +} + +// NewENITaskQueue creates a new task queue +func NewENITaskQueue(ctx context.Context, executor *ops.Executor, notifyCh chan string) *ENITaskQueue { + return &ENITaskQueue{ + ctx: ctx, + tasks: make(map[string]*ENITaskRecord), + executor: executor, + notifyCh: notifyCh, + log: logf.Log.WithName("eni-task-queue"), + } +} + +// SubmitAttach submits an async attach task with requested IP counts +// This method never fails - it only adds a task to in-memory queue +func (q *ENITaskQueue) SubmitAttach(ctx context.Context, eniID, instanceID, trunkENIID, nodeName string, + requestedIPv4, requestedIPv6 int) { + q.mu.Lock() + defer q.mu.Unlock() + + // Check if task already exists + if existing, ok := q.tasks[eniID]; ok { + if existing.Status == TaskStatusPending || existing.Status == TaskStatusRunning { + q.log.Info("task already exists and in progress", "eni", eniID, "status", existing.Status) + return // Task already in progress + } + // Remove completed/failed task to allow re-submission + delete(q.tasks, eniID) + } + + // Capture the backend API from ctx for later use in async processing + backendAPI := aliyunClient.GetBackendAPI(ctx) + + task := &ENITaskRecord{ + ENIID: eniID, + Operation: OpAttach, + InstanceID: instanceID, + TrunkENIID: trunkENIID, + NodeName: nodeName, + BackendAPI: backendAPI, + Status: TaskStatusPending, + CreatedAt: time.Now(), + RequestedIPv4Count: requestedIPv4, + RequestedIPv6Count: requestedIPv6, + } + + q.tasks[eniID] = task + q.log.Info("submitted attach task", "eni", eniID, "node", nodeName, + "backendAPI", backendAPI, "requestedIPv4", requestedIPv4, "requestedIPv6", requestedIPv6) + + // Start processing in background + go q.processAttachTask(q.ctx, task) +} + +// processAttachTask handles an attach task +func (q *ENITaskQueue) processAttachTask(ctx context.Context, task *ENITaskRecord) { + log := q.log.WithValues("eni", task.ENIID, "node", task.NodeName, "backendAPI", task.BackendAPI) + log.Info("starting attach task") + + q.updateTaskStatus(task.ENIID, TaskStatusRunning, nil) + + // Set the backend API in context for API calls + // This ensures attach/query operations use the correct backend (ECS or EFLO) + ctx = aliyunClient.SetBackendAPI(ctx, task.BackendAPI) + + // Get timeout based on ENI type + timeout := q.executor.GetTimeout(task.ENIID) + taskCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + if !isEFLORes(task.ENIID) { + // ECS: Attach first (lazy check) + err := q.executor.AttachAsync(taskCtx, task.ENIID, task.InstanceID, task.TrunkENIID) + if err != nil { + log.Error(err, "failed to initiate attach") + q.completeTask(task.ENIID, TaskStatusFailed, nil, err) + q.notifyNode(task.NodeName) + return + } + } + + // Wait initial delay + initialDelay := q.executor.GetInitialDelay(task.ENIID) + select { + case <-taskCtx.Done(): + q.completeTask(task.ENIID, TaskStatusTimeout, nil, taskCtx.Err()) + q.notifyNode(task.NodeName) + return + case <-time.After(initialDelay): + } + + // Poll for completion using BackoffManager + result, err := q.executor.WaitForStatus(taskCtx, task.ENIID, aliyunClient.ENIStatusInUse) + now := time.Now() + + if err != nil { + if errors.Is(err, context.DeadlineExceeded) || errors.Is(taskCtx.Err(), context.DeadlineExceeded) { + log.Error(err, "attach task timeout") + q.completeTask(task.ENIID, TaskStatusTimeout, nil, fmt.Errorf("attach timeout after %v", timeout)) + } else { + log.Error(err, "attach task failed") + q.completeTask(task.ENIID, TaskStatusFailed, nil, err) + } + } else { + log.Info("attach task completed successfully", "duration", now.Sub(task.CreatedAt)) + q.completeTask(task.ENIID, TaskStatusCompleted, result, nil) + } + + q.notifyNode(task.NodeName) +} + +// completeTask marks a task as completed with result +func (q *ENITaskQueue) completeTask(eniID string, status ENITaskStatus, eniInfo *aliyunClient.NetworkInterface, err error) { + q.mu.Lock() + + task, ok := q.tasks[eniID] + if !ok { + q.mu.Unlock() + return + } + + now := time.Now() + task.Status = status + task.CompletedAt = &now + task.ENIInfo = eniInfo + task.Error = err + q.mu.Unlock() + + // Record metrics + q.recordAttachDuration(task) + q.updateQueueMetrics() +} + +// updateTaskStatus updates the status of a task +func (q *ENITaskQueue) updateTaskStatus(eniID string, status ENITaskStatus, err error) { + q.mu.Lock() + defer q.mu.Unlock() + + if task, ok := q.tasks[eniID]; ok { + task.Status = status + task.Error = err + } +} + +// GetTaskStatus returns the current status of a task +func (q *ENITaskQueue) GetTaskStatus(eniID string) (*ENITaskRecord, bool) { + q.mu.RLock() + defer q.mu.RUnlock() + + task, ok := q.tasks[eniID] + if !ok { + return nil, false + } + + // Return a copy to avoid race conditions + taskCopy := *task + return &taskCopy, true +} + +// PeekCompletedTasks returns all completed/failed tasks for a node without removing them +func (q *ENITaskQueue) PeekCompletedTasks(nodeName string) []*ENITaskRecord { + q.mu.RLock() + defer q.mu.RUnlock() + + var completedTasks []*ENITaskRecord + + for _, task := range q.tasks { + if task.NodeName != nodeName { + continue + } + + if task.Status == TaskStatusCompleted || + task.Status == TaskStatusFailed || + task.Status == TaskStatusTimeout { + // Return a copy + taskCopy := *task + completedTasks = append(completedTasks, &taskCopy) + } + } + + return completedTasks +} + +// DeleteTasks removes specific tasks from the queue +func (q *ENITaskQueue) DeleteTasks(eniIDs []string) { + q.mu.Lock() + defer q.mu.Unlock() + + for _, eniID := range eniIDs { + delete(q.tasks, eniID) + q.log.V(4).Info("removed task from queue", "eni", eniID) + } +} + +// GetPendingENIs returns ENI IDs that are still pending/running for a node +func (q *ENITaskQueue) GetPendingENIs(nodeName string) []string { + q.mu.RLock() + defer q.mu.RUnlock() + + var eniIDs []string + for eniID, task := range q.tasks { + if task.NodeName == nodeName && + (task.Status == TaskStatusPending || task.Status == TaskStatusRunning) { + eniIDs = append(eniIDs, eniID) + } + } + return eniIDs +} + +// GetAttachingCount returns the number of ENIs currently being attached for a node. +// This includes both in-queue tasks (Pending/Running) to help enforce concurrent attach limits. +func (q *ENITaskQueue) GetAttachingCount(nodeName string) int { + q.mu.RLock() + defer q.mu.RUnlock() + + count := 0 + for _, task := range q.tasks { + if task.NodeName == nodeName && + (task.Status == TaskStatusPending || task.Status == TaskStatusRunning) { + count++ + } + } + return count +} + +// HasPendingTasks checks if there are any pending tasks for a node +func (q *ENITaskQueue) HasPendingTasks(nodeName string) bool { + q.mu.RLock() + defer q.mu.RUnlock() + + for _, task := range q.tasks { + if task.NodeName == nodeName && + (task.Status == TaskStatusPending || task.Status == TaskStatusRunning) { + return true + } + } + return false +} + +// RemoveTask removes a task from the queue +func (q *ENITaskQueue) RemoveTask(eniID string) { + q.mu.Lock() + defer q.mu.Unlock() + + delete(q.tasks, eniID) + q.log.V(4).Info("removed task from queue", "eni", eniID) +} + +// RemoveTasks removes all tasks for a specific node +func (q *ENITaskQueue) RemoveTasks(nodeName string) { + q.mu.Lock() + defer q.mu.Unlock() + + var toRemove []string + for eniID, task := range q.tasks { + if task.NodeName == nodeName { + toRemove = append(toRemove, eniID) + } + } + + for _, eniID := range toRemove { + delete(q.tasks, eniID) + } + + if len(toRemove) > 0 { + q.log.Info("removed tasks for node", "node", nodeName, "count", len(toRemove)) + } +} + +// notifyNode sends a notification to reconcile a node +func (q *ENITaskQueue) notifyNode(nodeName string) { + select { + case q.notifyCh <- nodeName: + q.log.V(4).Info("notified node", "node", nodeName) + default: + // Channel full, node will be reconciled eventually + q.log.V(4).Info("notify channel full, skipping", "node", nodeName) + } +} + +// GetQueueStats returns queue statistics for metrics +func (q *ENITaskQueue) GetQueueStats() map[ENITaskStatus]int { + q.mu.RLock() + defer q.mu.RUnlock() + + stats := make(map[ENITaskStatus]int) + for _, task := range q.tasks { + stats[task.Status]++ + } + return stats +} + +// updateQueueMetrics updates Prometheus metrics for queue size +func (q *ENITaskQueue) updateQueueMetrics() { + stats := q.GetQueueStats() + for status, count := range stats { + ENITaskQueueSize.WithLabelValues(string(status)).Set(float64(count)) + } +} + +// recordAttachDuration records the duration of an attach operation +func (q *ENITaskQueue) recordAttachDuration(task *ENITaskRecord) { + if task.CompletedAt == nil { + return + } + + duration := task.CompletedAt.Sub(task.CreatedAt).Seconds() + result := "success" + + switch task.Status { + case TaskStatusFailed: + result = "failed" + case TaskStatusTimeout: + result = "timeout" + } + + eniType := "ecs" + if strings.HasPrefix(task.ENIID, "leni-") { + eniType = "eflo" + } else if strings.HasPrefix(task.ENIID, "hdeni-") { + eniType = "hdeni" + } + + ENIAttachDuration.WithLabelValues(result, eniType).Observe(duration) +} + +// CleanupStaleTasks removes tasks that are orphaned or stale +// - Tasks for ENIs not in validENIIDs are considered orphaned +// - Completed tasks older than staleThreshold are considered stale +func (q *ENITaskQueue) CleanupStaleTasks(nodeName string, validENIIDs map[string]struct{}, staleThreshold time.Duration) []string { + q.mu.Lock() + defer q.mu.Unlock() + + var removed []string + now := time.Now() + + for eniID, task := range q.tasks { + if task.NodeName != nodeName { + continue + } + + shouldRemove := false + + // Check if ENI no longer exists in CR (orphaned task) + if _, exists := validENIIDs[eniID]; !exists { + q.log.Info("removing orphaned task (ENI not in CR)", "eni", eniID, "status", task.Status) + shouldRemove = true + } + + // Check if task is completed but stale (not consumed for too long) + if !shouldRemove && task.CompletedAt != nil { + if task.Status == TaskStatusCompleted || + task.Status == TaskStatusFailed || + task.Status == TaskStatusTimeout { + if now.Sub(*task.CompletedAt) > staleThreshold { + q.log.Info("removing stale completed task", "eni", eniID, "status", task.Status, + "completedAt", task.CompletedAt, "staleDuration", now.Sub(*task.CompletedAt)) + shouldRemove = true + } + } + } + + if shouldRemove { + delete(q.tasks, eniID) + removed = append(removed, eniID) + } + } + + return removed +} + +func isEFLORes(in string) bool { + return strings.HasPrefix(in, "leni-") || strings.HasPrefix(in, "hdeni-") +} diff --git a/pkg/controller/multi-ip/node/eni_task_queue_test.go b/pkg/controller/multi-ip/node/eni_task_queue_test.go new file mode 100644 index 00000000..bb362535 --- /dev/null +++ b/pkg/controller/multi-ip/node/eni_task_queue_test.go @@ -0,0 +1,439 @@ +/* +Copyright 2025 Terway Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "go.opentelemetry.io/otel/trace/noop" + + aliyunClient "github.com/AliyunContainerService/terway/pkg/aliyun/client" + "github.com/AliyunContainerService/terway/pkg/aliyun/client/mocks" + "github.com/AliyunContainerService/terway/pkg/eni/ops" +) + +// createTestExecutor creates an executor with a mock aliyun client for testing +func createTestExecutor(t *testing.T) *ops.Executor { + mockAPI := mocks.NewOpenAPI(t) + tracer := noop.NewTracerProvider().Tracer("test") + return ops.NewExecutor(mockAPI, tracer) +} + +func TestENITaskQueue_RemoveTasks(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Add tasks for node1 + q.tasks["eni-1"] = &ENITaskRecord{ENIID: "eni-1", NodeName: "node1", Status: TaskStatusPending} + q.tasks["eni-2"] = &ENITaskRecord{ENIID: "eni-2", NodeName: "node1", Status: TaskStatusPending} + + // Add tasks for node2 + q.tasks["eni-3"] = &ENITaskRecord{ENIID: "eni-3", NodeName: "node2", Status: TaskStatusPending} + + // Verify all tasks are present + assert.True(t, q.HasPendingTasks("node1")) + assert.True(t, q.HasPendingTasks("node2")) + + // Remove tasks for node1 + q.RemoveTasks("node1") + + // Verify node1 tasks are gone + assert.False(t, q.HasPendingTasks("node1")) + + // Verify node2 tasks are still there + assert.True(t, q.HasPendingTasks("node2")) + + // Verify specific ENIs + _, ok := q.GetTaskStatus("eni-1") + assert.False(t, ok) + _, ok = q.GetTaskStatus("eni-2") + assert.False(t, ok) + _, ok = q.GetTaskStatus("eni-3") + assert.True(t, ok) +} + +func TestENITaskQueue_PeekAndDelete(t *testing.T) { + if ENITaskQueueSize == nil { + t.Fatal("ENITaskQueueSize is nil") + } + if ENIAttachDuration == nil { + t.Fatal("ENIAttachDuration is nil") + } + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Add a task manually + q.tasks["eni-1"] = &ENITaskRecord{ + ENIID: "eni-1", + NodeName: "node1", + Status: TaskStatusRunning, + CreatedAt: time.Now(), + } + + // Manually complete it for testing + q.completeTask("eni-1", TaskStatusCompleted, &aliyunClient.NetworkInterface{NetworkInterfaceID: "eni-1"}, nil) + + // Peek + tasks := q.PeekCompletedTasks("node1") + assert.Len(t, tasks, 1) + assert.Equal(t, "eni-1", tasks[0].ENIID) + + // Verify it's still in the queue + assert.True(t, q.HasPendingTasks("node1") == false) // It's completed, not pending + // Check internal map directly or via GetTaskStatus + _, ok := q.GetTaskStatus("eni-1") + assert.True(t, ok) + + // Delete + q.DeleteTasks([]string{"eni-1"}) + + // Verify it's gone + _, ok = q.GetTaskStatus("eni-1") + assert.False(t, ok) +} + +func TestENITaskQueue_IPSync(t *testing.T) { + mockAPI := mocks.NewOpenAPI(t) + tracer := noop.NewTracerProvider().Tracer("test") + executor := ops.NewExecutor(mockAPI, tracer) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Setup mock expectations for AttachAsync + mockAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + // Setup mock expectations for WaitForStatus (CheckStatus) + mockAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + { + NetworkInterfaceID: "eni-recovery", + Status: aliyunClient.ENIStatusInUse, + }, + }, nil).Maybe() + + // 1. Submit a task with 0 IPs (simulating recovery) + q.SubmitAttach(context.Background(), "eni-recovery", "i-1", "", "node1", 0, 0) + + // Wait a bit for the async task to start + time.Sleep(100 * time.Millisecond) + + // 2. Verify initial state + task, ok := q.GetTaskStatus("eni-recovery") + assert.True(t, ok) + assert.Equal(t, 0, task.RequestedIPv4Count) +} + +func TestENITaskQueue_CleanupStaleTasks_OrphanedTask(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Add tasks for node1 + q.tasks["eni-1"] = &ENITaskRecord{ENIID: "eni-1", NodeName: "node1", Status: TaskStatusRunning} + q.tasks["eni-2"] = &ENITaskRecord{ENIID: "eni-2", NodeName: "node1", Status: TaskStatusPending} + q.tasks["eni-3"] = &ENITaskRecord{ENIID: "eni-3", NodeName: "node1", Status: TaskStatusCompleted} + q.tasks["eni-4"] = &ENITaskRecord{ENIID: "eni-4", NodeName: "node2", Status: TaskStatusPending} + + // Only eni-1 exists in CR + validENIIDs := map[string]struct{}{ + "eni-1": {}, + } + + // Cleanup should remove eni-2 and eni-3 (not in CR), keep eni-1 and eni-4 (different node) + removed := q.CleanupStaleTasks("node1", validENIIDs, 30*time.Minute) + + assert.Len(t, removed, 2) + assert.Contains(t, removed, "eni-2") + assert.Contains(t, removed, "eni-3") + + // Verify eni-1 still exists + _, ok := q.GetTaskStatus("eni-1") + assert.True(t, ok) + + // Verify eni-4 still exists (different node) + _, ok = q.GetTaskStatus("eni-4") + assert.True(t, ok) + + // Verify eni-2 and eni-3 are gone + _, ok = q.GetTaskStatus("eni-2") + assert.False(t, ok) + _, ok = q.GetTaskStatus("eni-3") + assert.False(t, ok) +} + +func TestENITaskQueue_CleanupStaleTasks_StaleCompletedTask(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + now := time.Now() + oldTime := now.Add(-35 * time.Minute) // 35 minutes ago + recentTime := now.Add(-5 * time.Minute) // 5 minutes ago + + // Add tasks with different completion times + q.tasks["eni-1"] = &ENITaskRecord{ + ENIID: "eni-1", + NodeName: "node1", + Status: TaskStatusCompleted, + CompletedAt: &oldTime, // Stale + } + q.tasks["eni-2"] = &ENITaskRecord{ + ENIID: "eni-2", + NodeName: "node1", + Status: TaskStatusCompleted, + CompletedAt: &recentTime, // Recent + } + q.tasks["eni-3"] = &ENITaskRecord{ + ENIID: "eni-3", + NodeName: "node1", + Status: TaskStatusFailed, + CompletedAt: &oldTime, // Stale + } + q.tasks["eni-4"] = &ENITaskRecord{ + ENIID: "eni-4", + NodeName: "node1", + Status: TaskStatusRunning, + CompletedAt: nil, // Still running + } + + // All ENIs exist in CR + validENIIDs := map[string]struct{}{ + "eni-1": {}, + "eni-2": {}, + "eni-3": {}, + "eni-4": {}, + } + + // Cleanup should only remove stale completed tasks (eni-1 and eni-3) + removed := q.CleanupStaleTasks("node1", validENIIDs, 30*time.Minute) + + assert.Len(t, removed, 2) + assert.Contains(t, removed, "eni-1") + assert.Contains(t, removed, "eni-3") + + // Verify eni-2 still exists (completed but recent) + _, ok := q.GetTaskStatus("eni-2") + assert.True(t, ok) + + // Verify eni-4 still exists (still running) + _, ok = q.GetTaskStatus("eni-4") + assert.True(t, ok) +} + +func TestENITaskQueue_CleanupStaleTasks_EmptyQueue(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + validENIIDs := map[string]struct{}{ + "eni-1": {}, + } + + // Should not panic or error on empty queue + removed := q.CleanupStaleTasks("node1", validENIIDs, 30*time.Minute) + assert.Len(t, removed, 0) +} + +func TestENITaskQueue_CleanupStaleTasks_MixedScenario(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + now := time.Now() + oldTime := now.Add(-45 * time.Minute) + recentTime := now.Add(-10 * time.Minute) + + // Mix of orphaned and stale tasks + q.tasks["eni-orphaned"] = &ENITaskRecord{ + ENIID: "eni-orphaned", + NodeName: "node1", + Status: TaskStatusPending, + } + q.tasks["eni-stale"] = &ENITaskRecord{ + ENIID: "eni-stale", + NodeName: "node1", + Status: TaskStatusTimeout, + CompletedAt: &oldTime, + } + q.tasks["eni-valid"] = &ENITaskRecord{ + ENIID: "eni-valid", + NodeName: "node1", + Status: TaskStatusCompleted, + CompletedAt: &recentTime, + } + q.tasks["eni-running"] = &ENITaskRecord{ + ENIID: "eni-running", + NodeName: "node1", + Status: TaskStatusRunning, + } + + // Only eni-stale, eni-valid, and eni-running exist in CR (eni-orphaned was deleted) + validENIIDs := map[string]struct{}{ + "eni-stale": {}, + "eni-valid": {}, + "eni-running": {}, + } + + removed := q.CleanupStaleTasks("node1", validENIIDs, 30*time.Minute) + + // Should remove orphaned + stale + assert.Len(t, removed, 2) + assert.Contains(t, removed, "eni-orphaned") + assert.Contains(t, removed, "eni-stale") + + // Valid and running should remain + _, ok := q.GetTaskStatus("eni-valid") + assert.True(t, ok) + _, ok = q.GetTaskStatus("eni-running") + assert.True(t, ok) +} + +func TestENITaskQueue_GetAttachingCount(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Empty queue should return 0 + assert.Equal(t, 0, q.GetAttachingCount("node1")) + + // Add tasks with different statuses for node1 + q.tasks["eni-1"] = &ENITaskRecord{ENIID: "eni-1", NodeName: "node1", Status: TaskStatusPending} + q.tasks["eni-2"] = &ENITaskRecord{ENIID: "eni-2", NodeName: "node1", Status: TaskStatusRunning} + q.tasks["eni-3"] = &ENITaskRecord{ENIID: "eni-3", NodeName: "node1", Status: TaskStatusCompleted} + q.tasks["eni-4"] = &ENITaskRecord{ENIID: "eni-4", NodeName: "node1", Status: TaskStatusFailed} + q.tasks["eni-5"] = &ENITaskRecord{ENIID: "eni-5", NodeName: "node1", Status: TaskStatusTimeout} + + // Only Pending and Running should be counted + assert.Equal(t, 2, q.GetAttachingCount("node1")) + + // Add tasks for node2 + q.tasks["eni-6"] = &ENITaskRecord{ENIID: "eni-6", NodeName: "node2", Status: TaskStatusPending} + q.tasks["eni-7"] = &ENITaskRecord{ENIID: "eni-7", NodeName: "node2", Status: TaskStatusRunning} + q.tasks["eni-8"] = &ENITaskRecord{ENIID: "eni-8", NodeName: "node2", Status: TaskStatusRunning} + + // node1 count should still be 2 + assert.Equal(t, 2, q.GetAttachingCount("node1")) + + // node2 count should be 3 + assert.Equal(t, 3, q.GetAttachingCount("node2")) + + // Non-existent node should return 0 + assert.Equal(t, 0, q.GetAttachingCount("node-nonexistent")) +} + +func TestENITaskQueue_GetAttachingCount_ConcurrentLimit(t *testing.T) { + executor := createTestExecutor(t) + q := NewENITaskQueue(context.Background(), executor, nil) + + // Simulate ECS max concurrent attach limit (5) + ecsMaxConcurrent := 5 + efloMaxConcurrent := 2 + + // Add 4 running tasks for ECS node + for i := 0; i < 4; i++ { + q.tasks[fmt.Sprintf("eni-%d", i)] = &ENITaskRecord{ + ENIID: fmt.Sprintf("eni-%d", i), + NodeName: "ecs-node", + Status: TaskStatusRunning, + } + } + + // ECS node can still accept 1 more task + attachingCount := q.GetAttachingCount("ecs-node") + availableSlots := ecsMaxConcurrent - attachingCount + assert.Equal(t, 4, attachingCount) + assert.Equal(t, 1, availableSlots) + + // Add 1 more task to reach limit + q.tasks["eni-4"] = &ENITaskRecord{ + ENIID: "eni-4", + NodeName: "ecs-node", + Status: TaskStatusPending, + } + + // Now ECS node has no available slots + attachingCount = q.GetAttachingCount("ecs-node") + availableSlots = ecsMaxConcurrent - attachingCount + assert.Equal(t, 5, attachingCount) + assert.Equal(t, 0, availableSlots) + + // Add 1 running task for EFLO node + q.tasks["leni-1"] = &ENITaskRecord{ + ENIID: "leni-1", + NodeName: "eflo-node", + Status: TaskStatusRunning, + } + + // EFLO node can still accept 1 more task + attachingCount = q.GetAttachingCount("eflo-node") + availableSlots = efloMaxConcurrent - attachingCount + assert.Equal(t, 1, attachingCount) + assert.Equal(t, 1, availableSlots) + + // Add 1 more task to reach EFLO limit + q.tasks["leni-2"] = &ENITaskRecord{ + ENIID: "leni-2", + NodeName: "eflo-node", + Status: TaskStatusPending, + } + + // Now EFLO node has no available slots + attachingCount = q.GetAttachingCount("eflo-node") + availableSlots = efloMaxConcurrent - attachingCount + assert.Equal(t, 2, attachingCount) + assert.Equal(t, 0, availableSlots) +} + +func TestENITaskQueue_SubmitAttach_BackendAPI(t *testing.T) { + // Setup mock API with expectations for async operations + mockAPI := mocks.NewOpenAPI(t) + tracer := noop.NewTracerProvider().Tracer("test") + executor := ops.NewExecutor(mockAPI, tracer) + + // Setup mock expectations for AttachAsync and WaitForStatus (both ECS and EFLO ENIs) + mockAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + mockAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + {NetworkInterfaceID: "eni-ecs-1", Status: aliyunClient.ENIStatusInUse}, + {NetworkInterfaceID: "leni-eflo-1", Status: aliyunClient.ENIStatusInUse}, + {NetworkInterfaceID: "eni-default-1", Status: aliyunClient.ENIStatusInUse}, + }, nil).Maybe() + + q := NewENITaskQueue(context.Background(), executor, nil) + + // Test ECS backend + ecsCtx := aliyunClient.SetBackendAPI(context.Background(), aliyunClient.BackendAPIECS) + q.SubmitAttach(ecsCtx, "eni-ecs-1", "i-ecs", "", "ecs-node", 5, 0) + + // Wait for task to be added + task, ok := q.GetTaskStatus("eni-ecs-1") + assert.True(t, ok) + assert.Equal(t, aliyunClient.BackendAPIECS, task.BackendAPI) + + // Test EFLO backend + efloCtx := aliyunClient.SetBackendAPI(context.Background(), aliyunClient.BackendAPIEFLO) + q.SubmitAttach(efloCtx, "leni-eflo-1", "i-eflo", "", "eflo-node", 2, 0) + + task, ok = q.GetTaskStatus("leni-eflo-1") + assert.True(t, ok) + assert.Equal(t, aliyunClient.BackendAPIEFLO, task.BackendAPI) + + // Test default (no backend set in context should default to ECS) + defaultCtx := context.Background() + q.SubmitAttach(defaultCtx, "eni-default-1", "i-default", "", "default-node", 3, 0) + + task, ok = q.GetTaskStatus("eni-default-1") + assert.True(t, ok) + // GetBackendAPI returns BackendAPIECS as default when not set + assert.Equal(t, aliyunClient.BackendAPIECS, task.BackendAPI) +} diff --git a/pkg/controller/multi-ip/node/metric.go b/pkg/controller/multi-ip/node/metric.go index 848283a5..2266a1a8 100644 --- a/pkg/controller/multi-ip/node/metric.go +++ b/pkg/controller/multi-ip/node/metric.go @@ -31,4 +31,23 @@ var ( }, []string{"method", "node"}, ) + + // ENITaskQueueSize shows the current size of ENI task queue by status + ENITaskQueueSize = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "terway_eni_task_queue_size", + Help: "Current size of ENI task queue", + }, + []string{"status"}, + ) + + // ENIAttachDuration records the duration of ENI attach operations + ENIAttachDuration = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "terway_eni_attach_duration_seconds", + Help: "Duration of ENI attach operations", + Buckets: []float64{1, 3, 5, 10, 20, 30, 60, 120, 180}, + }, + []string{"result", "eni_type"}, + ) ) diff --git a/pkg/controller/multi-ip/node/pool.go b/pkg/controller/multi-ip/node/pool.go index 7336bb98..d950b4e4 100644 --- a/pkg/controller/multi-ip/node/pool.go +++ b/pkg/controller/multi-ip/node/pool.go @@ -50,6 +50,7 @@ import ( networkv1beta1 "github.com/AliyunContainerService/terway/pkg/apis/network.alibabacloud.com/v1beta1" "github.com/AliyunContainerService/terway/pkg/backoff" register "github.com/AliyunContainerService/terway/pkg/controller" + "github.com/AliyunContainerService/terway/pkg/eni/ops" "github.com/AliyunContainerService/terway/pkg/utils" "github.com/AliyunContainerService/terway/pkg/vswitch" "github.com/AliyunContainerService/terway/types" @@ -64,6 +65,12 @@ const ( ecsBatchSize = 10 efloBatchSize = 1 + // Maximum concurrent ENI attach operations per instance + // ECS API limits concurrent attach operations to 5 + ecsMaxConcurrentAttach = 5 + // EFLO API limits concurrent attach operations to 2 + efloMaxConcurrentAttach = 2 + // Event reasons EventAllocIPFailed = "AllocIPFailed" EventSyncOpenAPISuccess = "SyncOpenAPISuccess" @@ -108,8 +115,28 @@ func init() { metrics.Registry.MustRegister(ResourcePoolTotal) metrics.Registry.MustRegister(SyncOpenAPITotal) metrics.Registry.MustRegister(ReconcileLatency) + metrics.Registry.MustRegister(ENITaskQueueSize) + metrics.Registry.MustRegister(ENIAttachDuration) tracer := ctrlCtx.TracerProvider.Tracer(ControllerName) + // Create ENI task queue for async attach operations + eniNotifyCh := make(chan string, ctrlCtx.Config.ENIMaxConcurrent) + executor := ops.NewExecutor(ctrlCtx.AliyunClient, tracer) + eniTaskQueue := NewENITaskQueue(ctrlCtx.Context, executor, eniNotifyCh) + + // Start a goroutine to forward eniNotifyCh to EventCh + // This ensures Reconcile is triggered when async ENI attach tasks complete + go func() { + for { + select { + case <-ctrlCtx.Done(): + return + case name := <-eniNotifyCh: + Notify(ctrlCtx, name) + } + } + }() + ctrl, err := controller.New(ControllerName, mgr, controller.Options{ MaxConcurrentReconciles: ctrlCtx.Config.MultiIPNodeMaxConcurrent, Reconciler: &ReconcileNode{ @@ -121,8 +148,9 @@ func init() { fullSyncNodePeriod: fullSyncPeriod, gcPeriod: gcPeriod, tracer: tracer, - eniBatchSize: 5, // operate eni on one reconcile + eniBatchSize: ecsMaxConcurrentAttach, // operate eni on one reconcile v: controlplane.GetViper(), + eniTaskQueue: eniTaskQueue, }, RateLimiter: workqueue.NewTypedMaxOfRateLimiter( workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](minSyncPeriod, maxSyncPeriod), @@ -151,7 +179,8 @@ type NodeStatus struct { LastGCTime time.Time LastReconcileTime time.Time - Mutex sync.Mutex + ProcessedTaskIDs []string + Mutex sync.Mutex } type ReconcileNode struct { @@ -172,6 +201,9 @@ type ReconcileNode struct { eniBatchSize int v *viper.Viper + + // eniTaskQueue manages async ENI attach operations + eniTaskQueue *ENITaskQueue } type ctxMetaKey struct{} @@ -204,6 +236,14 @@ func (n *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request l.V(2).Info("reconcile node") + now := time.Now() + defer func() { + took := time.Since(now) + if took > 5*time.Second { + l.Info("slow reconcile", "took", took) + } + }() + node := &networkv1beta1.Node{} err := n.client.Get(ctx, client.ObjectKey{Name: request.Name}, node) @@ -233,6 +273,11 @@ func (n *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request SyncOpenAPITotal.Delete(prometheus.Labels{"node": request.Name}) ReconcileLatency.Delete(prometheus.Labels{"node": request.Name}) + // Clean up async ENI tasks + if n.eniTaskQueue != nil { + n.eniTaskQueue.RemoveTasks(request.Name) + } + if changed { err = n.client.Patch(ctx, node, patch) } @@ -279,6 +324,8 @@ func (n *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request } else { nodeStatus = prev.(*NodeStatus) } + // Reset processed tasks for this reconciliation loop + nodeStatus.ProcessedTaskIDs = nil if nodeStatus.LastReconcileTime.Add(1 * time.Second).After(time.Now()) { return reconcile.Result{RequeueAfter: 1 * time.Second}, nil @@ -323,6 +370,11 @@ func (n *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request // initialize warm-up if needed (for new nodes or existing nodes without warm-up status) n.initializeWarmUp(node) + // ensure async tasks are running for Attaching ENIs (recovery) + n.ensureAsyncTasks(ctx, node) + + // do not block ipam + // do not block ipam err = n.syncWithAPI(ctx, node) if err != nil { @@ -348,6 +400,10 @@ func (n *ReconcileNode) Reconcile(ctx context.Context, request reconcile.Request err = n.client.Status().Update(ctx, node) + if err == nil && n.eniTaskQueue != nil && len(nodeStatus.ProcessedTaskIDs) > 0 { + n.eniTaskQueue.DeleteTasks(nodeStatus.ProcessedTaskIDs) + } + if err != nil && nodeStatus.StatusChanged.CompareAndSwap(true, false) { nodeStatus.NeedSyncOpenAPI.Store(true) } @@ -416,12 +472,26 @@ func (n *ReconcileNode) syncWithAPI(ctx context.Context, node *networkv1beta1.No eniIDMap[item.NetworkInterfaceID] = struct{}{} remote := newENIFromAPI(item) - if item.Status == aliyunClient.LENIStatusExecuting || - item.Status == aliyunClient.LENIStatusDeleting || - item.Status == aliyunClient.ENIStatusAttaching || - item.Status == aliyunClient.ENIStatusDetaching { + // Handle different ENI statuses + switch item.Status { + case aliyunClient.LENIStatusExecuting, + aliyunClient.LENIStatusDeleting, + aliyunClient.ENIStatusAttaching, + aliyunClient.ENIStatusDetaching: + // Middle statuses that need faster re-sync + // Note: LENIStatusAttaching/LENIStatusDetaching have the same value as ENIStatusAttaching/ENIStatusDetaching + hasMiddleStatus = true + + case aliyunClient.LENIStatusCreateFailed, + aliyunClient.LENIStatusAttachFailed, + aliyunClient.LENIStatusDetachFailed, + aliyunClient.LENIStatusDeleteFailed: + // EFLO terminal failure statuses - mark for deletion + log.Info("EFLO ENI in failed state, marking for deletion") hasMiddleStatus = true + remote.Status = aliyunClient.ENIStatusDeleting } + crENI, ok := node.Status.NetworkInterfaces[item.NetworkInterfaceID] if !ok { log.Info("sync eni with remote, new eni added") @@ -446,7 +516,10 @@ func (n *ReconcileNode) syncWithAPI(ctx context.Context, node *networkv1beta1.No mergeIPMap(log, remote.IPv6, crENI.IPv6) // nb(l1b0k): use Deleting status in cr for eni we don't wanted - if crENI.Status != aliyunClient.ENIStatusDeleting { + // Note: Don't overwrite Attaching status if we're managing it via queue + // The queue will update the status when attach completes + if crENI.Status != aliyunClient.ENIStatusDeleting && + crENI.Status != aliyunClient.ENIStatusAttaching { crENI.Status = remote.Status } } @@ -807,6 +880,118 @@ func assignIPFromLocalPool(log logr.Logger, podsMapper map[string]*PodRequest, i // addIP is called when there is no enough ip for current pods // for cases, eni is attaching, we need to wait +// syncTaskQueueStatus syncs completed tasks from queue to Node CR +func (n *ReconcileNode) syncTaskQueueStatus(ctx context.Context, node *networkv1beta1.Node) { + if n.eniTaskQueue == nil { + return + } + + l := logf.FromContext(ctx).WithName("syncTaskQueueStatus") + + // Process completed tasks + completedTasks := n.eniTaskQueue.PeekCompletedTasks(node.Name) + if len(completedTasks) == 0 { + return + } + + l.Info("processing completed tasks", "count", len(completedTasks)) + + for _, task := range completedTasks { + nic, ok := node.Status.NetworkInterfaces[task.ENIID] + + if !ok { + l.Info("ENI not found in Node CR, skip", "eni", task.ENIID) + continue + } + + switch task.Status { + case TaskStatusCompleted: + // Update ENI info from task result + if task.ENIInfo != nil { + nic.Status = aliyunClient.ENIStatusInUse + nic.MacAddress = task.ENIInfo.MacAddress + nic.SecurityGroupIDs = task.ENIInfo.SecurityGroupIDs + nic.PrimaryIPAddress = task.ENIInfo.PrivateIPAddress + nic.NetworkInterfaceTrafficMode = networkv1beta1.NetworkInterfaceTrafficMode(task.ENIInfo.NetworkInterfaceTrafficMode) + + // Convert IP sets using existing helper function + nic.IPv4 = convertIPSet(task.ENIInfo.PrivateIPSets) + nic.IPv6 = convertIPSet(task.ENIInfo.IPv6Set) + + // Track OpenAPI allocations for warm-up + if !node.Status.WarmUpCompleted && node.Status.WarmUpTarget > 0 { + node.Status.WarmUpAllocatedCount += max(len(nic.IPv4), len(nic.IPv6)) + } + + l.Info("ENI attach completed", "eni", task.ENIID, + "ipv4Count", len(nic.IPv4), "ipv6Count", len(nic.IPv6)) + n.record.Event(node, corev1.EventTypeNormal, "ENIAttachSuccess", + fmt.Sprintf("ENI %s is now ready with %d IPv4 and %d IPv6", + task.ENIID, len(nic.IPv4), len(nic.IPv6))) + } else { + // should not happen + nic.Status = aliyunClient.ENIStatusDeleting + } + case TaskStatusFailed, TaskStatusTimeout: + // Mark for deletion + nic.Status = aliyunClient.ENIStatusDeleting + + errMsg := "unknown error" + if task.Error != nil { + errMsg = task.Error.Error() + } + + l.Error(task.Error, "ENI attach failed", "eni", task.ENIID, "status", task.Status) + n.record.Event(node, corev1.EventTypeWarning, "ENIAttachFailed", + fmt.Sprintf("ENI %s attach failed: %s", task.ENIID, errMsg)) + } + + MetaCtx(ctx).StatusChanged.Store(true) + MetaCtx(ctx).ProcessedTaskIDs = append(MetaCtx(ctx).ProcessedTaskIDs, task.ENIID) + } +} + +// staleTaskThreshold defines how long a completed task can stay in queue before being cleaned up +const staleTaskThreshold = 30 * time.Minute + +// ensureAsyncTasks checks for Attaching ENIs that are not in the queue and submits them (recovery) +// It also cleans up orphaned and stale tasks to prevent task leakage +func (n *ReconcileNode) ensureAsyncTasks(ctx context.Context, node *networkv1beta1.Node) { + if n.eniTaskQueue == nil { + return + } + + l := logf.FromContext(ctx) + + // Build set of valid ENI IDs from current CR status + validENIIDs := make(map[string]struct{}, len(node.Status.NetworkInterfaces)) + for eniID := range node.Status.NetworkInterfaces { + validENIIDs[eniID] = struct{}{} + } + + // Clean up orphaned and stale tasks + // - Orphaned: ENI no longer exists in CR (was deleted externally or by syncWithAPI) + // - Stale: Completed tasks sitting in queue for more than 30 minutes without being consumed + removedTasks := n.eniTaskQueue.CleanupStaleTasks(node.Name, validENIIDs, staleTaskThreshold) + if len(removedTasks) > 0 { + l.Info("cleaned up stale/orphaned tasks", "count", len(removedTasks), "enis", removedTasks) + } + + // Submit recovery tasks for Attaching ENIs that are not in the queue + for eniID, nic := range node.Status.NetworkInterfaces { + if nic.Status == aliyunClient.ENIStatusAttaching { + // Check if task exists + if _, ok := n.eniTaskQueue.GetTaskStatus(eniID); !ok { + // Task missing, submit recovery task + // We don't know the original requested IP count, so we start with 1 + // The task will update the count after checking the API + n.eniTaskQueue.SubmitAttach(ctx, eniID, node.Spec.NodeMetadata.InstanceID, "", node.Name, 1, 1) + l.Info("submitted recovery attach task", "eni", eniID) + } + } + } +} + func (n *ReconcileNode) addIP(ctx context.Context, unSucceedPods map[string]*PodRequest, node *networkv1beta1.Node) error { ctx, span := n.tracer.Start(ctx, "addIP") defer span.End() @@ -817,6 +1002,9 @@ func (n *ReconcileNode) addIP(ctx context.Context, unSucceedPods map[string]*Pod ReconcileLatency.WithLabelValues("addIP", node.Name).Observe(float64(time.Since(startTime).Milliseconds())) }() + // 1. Sync completed tasks from queue to Node CR + n.syncTaskQueueStatus(ctx, node) + if n.getDegradation() == controlplane.DegradationL0 { logr.FromContextOrDiscard(ctx).Info("degradation to L0, skip addIP") return nil @@ -842,10 +1030,10 @@ func (n *ReconcileNode) addIP(ctx context.Context, unSucceedPods map[string]*Pod } // handle trunk/secondary eni - assignEniWithOptions(ctx, node, totalDemand, options, func(option *eniOptions) bool { + assignEniWithOptions(ctx, node, totalDemand, options, n.eniTaskQueue, func(option *eniOptions) bool { return n.validateENI(ctx, option, []eniTypeKey{secondaryKey, trunkKey}) }) - assignEniWithOptions(ctx, node, len(rdmaPods), options, func(option *eniOptions) bool { + assignEniWithOptions(ctx, node, len(rdmaPods), options, n.eniTaskQueue, func(option *eniOptions) bool { return n.validateENI(ctx, option, []eniTypeKey{rdmaKey}) }) @@ -1010,7 +1198,9 @@ func (n *ReconcileNode) validateENI(ctx context.Context, option *eniOptions, eni } if option.eniRef != nil { - if option.eniRef.Status != aliyunClient.ENIStatusInUse { + switch option.eniRef.Status { + case aliyunClient.ENIStatusInUse, aliyunClient.ENIStatusAttaching: + default: return false } @@ -1033,7 +1223,8 @@ func (n *ReconcileNode) validateENI(ctx context.Context, option *eniOptions, eni // assignEniWithOptions determine how many ip should be added to this eni. // In dual stack, ip on eni is automatically balanced. -func assignEniWithOptions(ctx context.Context, node *networkv1beta1.Node, toAdd int, options []*eniOptions, filterFunc func(option *eniOptions) bool) { +func assignEniWithOptions(ctx context.Context, node *networkv1beta1.Node, toAdd int, options []*eniOptions, taskQueue *ENITaskQueue, filterFunc func(option *eniOptions) bool) { + l := logf.FromContext(ctx) eniSpec := node.Spec.ENISpec toAddIPv4, toAddIPv6 := 0, 0 @@ -1051,7 +1242,43 @@ func assignEniWithOptions(ctx context.Context, node *networkv1beta1.Node, toAdd } if option.eniRef != nil { - // exist eni + // For Attaching status ENI, check requested IP count from task queue + if option.eniRef.Status == aliyunClient.ENIStatusAttaching { + task, found := taskQueue.GetTaskStatus(option.eniRef.ID) + if found { + // Requested IP counts from the task + requestedIPv4 := task.RequestedIPv4Count + requestedIPv6 := task.RequestedIPv6Count + + l.V(4).Info("found Attaching ENI in task queue", + "eni", option.eniRef.ID, + "requestedIPv4", requestedIPv4, + "requestedIPv6", requestedIPv6, + "toAddIPv4", toAddIPv4, + "toAddIPv6", toAddIPv6) + + // If requested count >= needed count, no need to request more + if toAddIPv4 > 0 && requestedIPv4 >= toAddIPv4 { + toAddIPv4 = 0 + } else if toAddIPv4 > 0 { + // Partially satisfied, subtract already requested + toAddIPv4 -= requestedIPv4 + } + + if toAddIPv6 > 0 && requestedIPv6 >= toAddIPv6 { + toAddIPv6 = 0 + } else if toAddIPv6 > 0 { + toAddIPv6 -= requestedIPv6 + } + // for not found , this may happen when controller is restarted , + } + // Clear addIP request for Attaching ENI (can't add IPs to Attaching ENI) + option.addIPv4N = 0 + option.addIPv6N = 0 + continue // Skip Attaching status ENI + } + + // For InUse status ENI, normal processing (existing logic) if toAddIPv4 > 0 { toAddIPv4 -= len(getAllocatable(option.eniRef.IPv4)) @@ -1110,6 +1337,8 @@ func (n *ReconcileNode) allocateFromOptions(ctx context.Context, node *networkv1 ctx, span := n.tracer.Start(ctx, "allocateFromOptions") defer span.End() + l := logf.FromContext(ctx).WithName("allocateFromOptions") + wg := wait.Group{} lock := sync.Mutex{} var errs []error @@ -1118,8 +1347,26 @@ func (n *ReconcileNode) allocateFromOptions(ctx context.Context, node *networkv1 createBatchSize := min(n.eniBatchSize, 1) if isEFLO(ctx) { - createBatchSize = 2 + createBatchSize = efloMaxConcurrentAttach } + + // Get current attaching count from task queue + currentAttachingCount := 0 + if n.eniTaskQueue != nil { + currentAttachingCount = n.eniTaskQueue.GetAttachingCount(node.Name) + } + + // Calculate available slots for new ENI attach operations + availableSlots := createBatchSize - currentAttachingCount + if availableSlots <= 0 { + l.Info("max concurrent attach limit reached, skip creating new ENIs", + "maxConcurrent", createBatchSize, "currentAttaching", currentAttachingCount) + availableSlots = 0 + } + + // Track how many new ENI create operations we've submitted + newENICreateCount := 0 + for _, option := range options { if option.addIPv6N <= 0 && option.addIPv4N <= 0 { continue @@ -1128,6 +1375,17 @@ func (n *ReconcileNode) allocateFromOptions(ctx context.Context, node *networkv1 continue } + // For new ENI creation (async attach), check concurrent limit + if option.eniRef == nil { + if newENICreateCount >= availableSlots { + l.V(4).Info("skip creating new ENI due to concurrent attach limit", + "maxConcurrent", createBatchSize, "currentAttaching", currentAttachingCount, + "newENICreateCount", newENICreateCount) + continue + } + newENICreateCount++ + } + node.Status.LastModifiedTime = metav1.Now() inFlight++ @@ -1179,6 +1437,20 @@ func (n *ReconcileNode) handleStatus(ctx context.Context, node *networkv1beta1.N log := l.WithValues("eni", eni.ID, "status", eni.Status) switch eni.Status { + case aliyunClient.LENIStatusCreateFailed, + aliyunClient.LENIStatusAttachFailed, + aliyunClient.LENIStatusDetachFailed, + aliyunClient.LENIStatusDeleteFailed: + // EFLO failed statuses - directly delete + if !isEFLO(ctx) { + log.Info("non-EFLO ENI with LENI failed status, skipping") + continue + } + + log.Info("cleaning up EFLO ENI in failed state") + // mark as deleting + eni.Status = aliyunClient.ENIStatusDeleting + case aliyunClient.ENIStatusDeleting, aliyunClient.ENIStatusDetaching: if !isEFLO(ctx) { err := n.aliyun.GetECS().DetachNetworkInterface(ctx, eni.ID, node.Spec.NodeMetadata.InstanceID, "") @@ -1330,6 +1602,8 @@ func (n *ReconcileNode) createENI(ctx context.Context, node *networkv1beta1.Node ReconcileLatency.WithLabelValues("createENI", node.Name).Observe(float64(time.Since(startTime).Milliseconds())) }() + l := logf.FromContext(ctx) + // for new eni typeOption, ok := EniOptions[opt.eniTypeKey] if !ok { @@ -1370,6 +1644,7 @@ func (n *ReconcileNode) createENI(ctx context.Context, node *networkv1beta1.Node Backoff: &bo.Backoff, } + // 1. Create ENI via OpenAPI result, err := n.aliyun.CreateNetworkInterfaceV2(ctx, typeOption, createOpts) if err != nil { if apiErr.ErrorCodeIs(err, apiErr.InvalidVSwitchIDIPNotEnough, apiErr.QuotaExceededPrivateIPAddress) { @@ -1381,84 +1656,44 @@ func (n *ReconcileNode) createENI(ctx context.Context, node *networkv1beta1.Node return err } - MetaCtx(ctx).Mutex.Lock() - if node.Status.NetworkInterfaces == nil { - node.Status.NetworkInterfaces = make(map[string]*networkv1beta1.Nic) - } - MetaCtx(ctx).Mutex.Unlock() + l.Info("ENI created, submitting async attach task", "eni", result.NetworkInterfaceID, + "requestedIPv4", opt.addIPv4N, "requestedIPv6", opt.addIPv6N) - defer func() { - if err != nil { - rollbackCtx, rollbackCancel := context.WithTimeout(context.Background(), 60*time.Second) - defer rollbackCancel() - - if isEFLO(ctx) { - rollbackCtx = aliyunClient.SetBackendAPI(rollbackCtx, aliyunClient.BackendAPIEFLO) - } + // 2. Submit attach task to queue (async, non-blocking) + // This never fails - it only adds a task to in-memory queue + n.eniTaskQueue.SubmitAttach(ctx, + result.NetworkInterfaceID, + node.Spec.NodeMetadata.InstanceID, + "", // trunkENIID - not used for secondary ENI + node.Name, + opt.addIPv4N, + opt.addIPv6N) - innerErr := n.aliyun.DeleteNetworkInterfaceV2(rollbackCtx, result.NetworkInterfaceID) - if innerErr == nil { - return - } - logf.FromContext(ctx).Error(innerErr, "failed to delete eni, this may result leak", "eni", result.NetworkInterfaceID) - // if we failed to delete the eni , we need to store the eni - MetaCtx(ctx).Mutex.Lock() - node.Status.NetworkInterfaces[result.NetworkInterfaceID] = &networkv1beta1.Nic{ - ID: result.NetworkInterfaceID, - Status: aliyunClient.ENIStatusDeleting, - NetworkInterfaceType: networkv1beta1.ENIType(result.Type), - NetworkInterfaceTrafficMode: networkv1beta1.NetworkInterfaceTrafficMode(result.NetworkInterfaceTrafficMode), - } - MetaCtx(ctx).Mutex.Unlock() - - MetaCtx(ctx).StatusChanged.Store(true) - } - }() - - if !isEFLO(ctx) { - err = n.aliyun.GetECS().AttachNetworkInterface(ctx, &aliyunClient.AttachNetworkInterfaceOptions{ - NetworkInterfaceID: &result.NetworkInterfaceID, - InstanceID: &node.Spec.NodeMetadata.InstanceID, - TrunkNetworkInstanceID: nil, - NetworkCardIndex: nil, - Backoff: nil, - }) - if err != nil { - n.record.Event(node, corev1.EventTypeWarning, types.EventCreateENIFailed, - fmt.Sprintf("Failed to attach ENI %s: %v", result.NetworkInterfaceID, err)) - return err - } - - time.Sleep(3 * time.Second) - } - - eni, err := n.aliyun.WaitForNetworkInterfaceV2(ctx, result.NetworkInterfaceID, aliyunClient.ENIStatusInUse, backoff.Backoff(backoff.WaitENIStatus).Backoff, false) - if err != nil { - n.record.Event(node, corev1.EventTypeWarning, types.EventCreateENIFailed, - fmt.Sprintf("Failed to wait ENI %s ready: %v", result.NetworkInterfaceID, err)) - return err + // 3. Mark ENI as Attaching in Node CR + networkInterface := &networkv1beta1.Nic{ + ID: result.NetworkInterfaceID, + Status: aliyunClient.ENIStatusAttaching, + VSwitchID: vsw.ID, + IPv4CIDR: vsw.IPv4CIDR, + IPv6CIDR: vsw.IPv6CIDR, + NetworkInterfaceType: networkv1beta1.ENIType(result.Type), + NetworkInterfaceTrafficMode: networkv1beta1.NetworkInterfaceTrafficMode(result.NetworkInterfaceTrafficMode), } - networkInterface := newENIFromAPI(eni) - // update vsw - networkInterface.IPv4CIDR = vsw.IPv4CIDR - networkInterface.IPv6CIDR = vsw.IPv6CIDR - MetaCtx(ctx).Mutex.Lock() - node.Status.NetworkInterfaces[eni.NetworkInterfaceID] = networkInterface - // if changed , but we update failed , that case ,need to sync openAPI... - - // Track OpenAPI allocations for warm-up - if !node.Status.WarmUpCompleted && node.Status.WarmUpTarget > 0 { - node.Status.WarmUpAllocatedCount += max(len(networkInterface.IPv4), len(networkInterface.IPv6)) + if node.Status.NetworkInterfaces == nil { + node.Status.NetworkInterfaces = make(map[string]*networkv1beta1.Nic) } + node.Status.NetworkInterfaces[result.NetworkInterfaceID] = networkInterface MetaCtx(ctx).Mutex.Unlock() MetaCtx(ctx).StatusChanged.Store(true) n.record.Event(node, corev1.EventTypeNormal, types.EventCreateENISucceed, - fmt.Sprintf("Successfully created ENI %s type=%s with %d IPv4 and %d IPv6 addresses", - eni.NetworkInterfaceID, opt.eniTypeKey.ENIType, opt.addIPv4N, opt.addIPv6N)) + fmt.Sprintf("ENI %s created with %d IPv4, attach in progress", + result.NetworkInterfaceID, opt.addIPv4N)) + + // Return immediately, don't block waiting for attach return nil } diff --git a/pkg/controller/multi-ip/node/pool_test.go b/pkg/controller/multi-ip/node/pool_test.go index f85f1c26..e0ea8c61 100644 --- a/pkg/controller/multi-ip/node/pool_test.go +++ b/pkg/controller/multi-ip/node/pool_test.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -31,6 +30,7 @@ import ( "github.com/AliyunContainerService/terway/pkg/aliyun/client/mocks" networkv1beta1 "github.com/AliyunContainerService/terway/pkg/apis/network.alibabacloud.com/v1beta1" "github.com/AliyunContainerService/terway/pkg/backoff" + "github.com/AliyunContainerService/terway/pkg/eni/ops" vswpool "github.com/AliyunContainerService/terway/pkg/vswitch" ) @@ -1215,13 +1215,161 @@ func Test_assignEniWithOptions(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - assignEniWithOptions(context.Background(), tt.args.node, tt.args.toAdd, tt.args.options, tt.args.filterFunc) + assignEniWithOptions(context.Background(), tt.args.node, tt.args.toAdd, tt.args.options, nil, tt.args.filterFunc) tt.checkResult(t, tt.args.options) }) } } +func Test_assignEniWithOptions_AttachingENI(t *testing.T) { + // Setup mock task queue + notifyCh := make(chan string, 10) + mockAPI := mocks.NewOpenAPI(t) + tracer := noop.NewTracerProvider().Tracer("test") + executor := ops.NewExecutor(mockAPI, tracer) + taskQueue := NewENITaskQueue(context.Background(), executor, notifyCh) + + // Add an Attaching ENI with 5 requested IPs to the task queue + taskQueue.tasks["eni-attaching"] = &ENITaskRecord{ + ENIID: "eni-attaching", + Status: TaskStatusRunning, + RequestedIPv4Count: 5, + RequestedIPv6Count: 0, + } + + t.Run("attaching ENI satisfies full demand", func(t *testing.T) { + node := &networkv1beta1.Node{ + Spec: networkv1beta1.NodeSpec{ + NodeCap: networkv1beta1.NodeCap{ + IPv4PerAdapter: 10, + IPv6PerAdapter: 10, + }, + ENISpec: &networkv1beta1.ENISpec{ + EnableIPv4: true, + EnableIPv6: false, + }, + }, + Status: networkv1beta1.NodeStatus{ + NetworkInterfaces: map[string]*networkv1beta1.Nic{ + "eni-attaching": { + ID: "eni-attaching", + Status: aliyunClient.ENIStatusAttaching, + }, + }, + }, + } + + options := []*eniOptions{ + { + eniTypeKey: secondaryKey, + eniRef: node.Status.NetworkInterfaces["eni-attaching"], + }, + { + eniTypeKey: secondaryKey, // New ENI slot + }, + } + + // Need 3 IPs, but Attaching ENI already requested 5 + assignEniWithOptions(context.Background(), node, 3, options, taskQueue, func(o *eniOptions) bool { return true }) + + // Attaching ENI should not request more IPs + assert.Equal(t, 0, options[0].addIPv4N) + // New ENI slot should also not request (demand is satisfied) + assert.Equal(t, 0, options[1].addIPv4N) + }) + + t.Run("attaching ENI partially satisfies demand", func(t *testing.T) { + // Add a task with 3 requested IPs + taskQueue.tasks["eni-partial"] = &ENITaskRecord{ + ENIID: "eni-partial", + Status: TaskStatusRunning, + RequestedIPv4Count: 3, + RequestedIPv6Count: 0, + } + + node := &networkv1beta1.Node{ + Spec: networkv1beta1.NodeSpec{ + NodeCap: networkv1beta1.NodeCap{ + IPv4PerAdapter: 10, + IPv6PerAdapter: 10, + }, + ENISpec: &networkv1beta1.ENISpec{ + EnableIPv4: true, + EnableIPv6: false, + }, + }, + Status: networkv1beta1.NodeStatus{ + NetworkInterfaces: map[string]*networkv1beta1.Nic{ + "eni-partial": { + ID: "eni-partial", + Status: aliyunClient.ENIStatusAttaching, + }, + }, + }, + } + + options := []*eniOptions{ + { + eniTypeKey: secondaryKey, + eniRef: node.Status.NetworkInterfaces["eni-partial"], + }, + { + eniTypeKey: secondaryKey, // New ENI slot + }, + } + + // Need 5 IPs, but Attaching ENI only requested 3, so need 2 more + assignEniWithOptions(context.Background(), node, 5, options, taskQueue, func(o *eniOptions) bool { return true }) + + // Attaching ENI should not request more IPs (can't add to Attaching) + assert.Equal(t, 0, options[0].addIPv4N) + // New ENI slot should request the remaining 2 IPs + assert.Equal(t, 2, options[1].addIPv4N) + }) + + t.Run("attaching ENI without task queue entry", func(t *testing.T) { + node := &networkv1beta1.Node{ + Spec: networkv1beta1.NodeSpec{ + NodeCap: networkv1beta1.NodeCap{ + IPv4PerAdapter: 10, + IPv6PerAdapter: 10, + }, + ENISpec: &networkv1beta1.ENISpec{ + EnableIPv4: true, + EnableIPv6: false, + }, + }, + Status: networkv1beta1.NodeStatus{ + NetworkInterfaces: map[string]*networkv1beta1.Nic{ + "eni-unknown": { + ID: "eni-unknown", + Status: aliyunClient.ENIStatusAttaching, + }, + }, + }, + } + + options := []*eniOptions{ + { + eniTypeKey: secondaryKey, + eniRef: node.Status.NetworkInterfaces["eni-unknown"], + }, + { + eniTypeKey: secondaryKey, // New ENI slot + }, + } + + // Need 3 IPs, Attaching ENI not in queue (e.g., controller restart) + assignEniWithOptions(context.Background(), node, 3, options, taskQueue, func(o *eniOptions) bool { return true }) + + // Attaching ENI should not request more IPs + assert.Equal(t, 0, options[0].addIPv4N) + // New ENI slot should request all 3 IPs + assert.Equal(t, 3, options[1].addIPv4N) + }) +} + func TestReconcileNode_adjustPool(t *testing.T) { type args struct { ctx context.Context @@ -2209,90 +2357,111 @@ var _ = Describe("Test ReconcileNode", func() { }).WithTimeout(10 * time.Second).Should(Succeed()) }) - Context("Test create err", func() { - It("Test create err", func() { + Context("Test attach failure - syncTaskQueueStatus with failed task", func() { + It("Should mark ENI as Deleting when task queue reports failure", func() { + ctx := context.TODO() + ctx = MetaIntoCtx(ctx) - instanceID := "instanceID" - openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, &aliyunClient.DescribeNetworkInterfaceOptions{ - InstanceID: &instanceID, - }).Return([]*aliyunClient.NetworkInterface{}, nil).Maybe() - vpcClient.On("DescribeVSwitchByID", mock.Anything, "vsw-1").Return(&vpc.VSwitch{ - VSwitchId: "vsw-1", - ZoneId: "zone-1", - AvailableIpAddressCount: 10, - CidrBlock: "172.0.0.0/16", - Ipv6CidrBlock: "fd00::/64", - }, nil).Maybe() + // Setup mock API + mockHelper := NewMockAPIHelperWithT(GinkgoT()) + openAPI, _, _ = mockHelper.GetMocks() + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{}, nil).Maybe() - openAPI.On("CreateNetworkInterfaceV2", mock.Anything, mock.Anything, mock.Anything).Return(&aliyunClient.NetworkInterface{ - NetworkInterfaceID: "eni-1", - Type: "Secondary", - NetworkInterfaceTrafficMode: "Standard", - }, nil).Once() - openAPI.On("CreateNetworkInterfaceV2", mock.Anything, mock.Anything, mock.Anything).Return(&aliyunClient.NetworkInterface{ - NetworkInterfaceID: "eni-2", - Type: "Secondary", - NetworkInterfaceTrafficMode: "Standard", - }, nil).Once() - openAPI.On("DeleteNetworkInterfaceV2", mock.Anything, "eni-1").Return(fmt.Errorf("faile to del eni")) - openAPI.On("WaitForNetworkInterfaceV2", mock.Anything, "eni-1", mock.Anything, mock.Anything, mock.Anything).Return(nil, fmt.Errorf("timeout")) - openAPI.On("WaitForNetworkInterfaceV2", mock.Anything, "eni-2", mock.Anything, mock.Anything, mock.Anything).Return(&aliyunClient.NetworkInterface{ - Status: "InUse", - MacAddress: "", - NetworkInterfaceID: "eni-2", - VSwitchID: "vsw-1", - PrivateIPAddress: "127.0.0.2", - PrivateIPSets: []aliyunClient.IPSet{ - { - IPAddress: "127.0.0.2", - Primary: true, - }, - }, - ZoneID: "zone-1", - SecurityGroupIDs: nil, - ResourceGroupID: "", - Type: "Secondary", - InstanceID: "", - TrunkNetworkInterfaceID: "", - NetworkInterfaceTrafficMode: "Standard", - DeviceIndex: 0, - CreationTime: "", - }, nil) + // Create reconciler with task queue + reconciler := NewReconcilerBuilder(). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() - By("reconcile") - controllerReconciler := &ReconcileNode{ - client: k8sClient, - scheme: k8sClient.Scheme(), - aliyun: openAPI, - tracer: noop.NewTracerProvider().Tracer(""), - vswpool: switchPool, - record: record.NewFakeRecorder(100), - } + By("Setting up node with Attaching ENI") + eniAttaching := BuildENIWithCustomIPs("eni-1", aliyunClient.ENIStatusAttaching, nil, nil) + eniAttaching.NetworkInterfaceType = networkv1beta1.ENITypeSecondary - _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, - }) - Expect(err).NotTo(HaveOccurred()) + node := NewNodeFactory("test-node"). + WithECS(). + WithInstanceID("i-test"). + WithExistingENIs(eniAttaching). + Build() - By("check this cr") - node := &networkv1beta1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: name}, + By("Injecting failed task into queue") + now := time.Now() + reconciler.eniTaskQueue.tasks["eni-1"] = &ENITaskRecord{ + ENIID: "eni-1", + Operation: OpAttach, + InstanceID: "i-test", + NodeName: "test-node", + Status: TaskStatusFailed, + CreatedAt: now.Add(-1 * time.Minute), + CompletedAt: &now, + RequestedIPv4Count: 5, + RequestedIPv6Count: 0, + ENIInfo: nil, + Error: fmt.Errorf("attach timeout"), } - err = k8sClient.Get(ctx, typeNamespacedName, node) - Expect(err).NotTo(HaveOccurred()) - Expect(len(node.Status.NetworkInterfaces)).To(Equal(2)) - - By("delete failed eni should be kept") - Expect(node.Status.NetworkInterfaces["eni-1"].Status).To(Equal("Deleting")) - Expect(node.Status.NetworkInterfaces["eni-2"].Status).To(Equal("InUse")) + By("Calling syncTaskQueueStatus to process failed task") + reconciler.syncTaskQueueStatus(ctx, node) + By("Verifying ENI status changed to Deleting") + Expect(node.Status.NetworkInterfaces).To(HaveKey("eni-1")) + Expect(node.Status.NetworkInterfaces["eni-1"].Status).To(Equal(aliyunClient.ENIStatusDeleting)) Expect(node.Status.NetworkInterfaces["eni-1"].NetworkInterfaceType).To(Equal(networkv1beta1.ENITypeSecondary)) - Expect(node.Status.NetworkInterfaces["eni-2"].NetworkInterfaceType).To(Equal(networkv1beta1.ENITypeSecondary)) - By("check pods ip should allocated") - Expect(node.Status.NetworkInterfaces["eni-2"].IPv4["127.0.0.2"].Primary).To(Equal(true)) - Expect(node.Status.NetworkInterfaces["eni-2"].IPv4["127.0.0.2"].PodID).To(Not(BeEmpty())) + By("Verifying status changed flag is set") + Expect(MetaCtx(ctx).StatusChanged.Load()).To(BeTrue()) + }) + + It("Should mark ENI as Deleting when task queue reports timeout", func() { + ctx := context.TODO() + ctx = MetaIntoCtx(ctx) + + // Setup mock API + mockHelper := NewMockAPIHelperWithT(GinkgoT()) + openAPI, _, _ = mockHelper.GetMocks() + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{}, nil).Maybe() + + // Create reconciler with task queue + reconciler := NewReconcilerBuilder(). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() + + By("Setting up node with Attaching ENI") + eniAttaching := BuildENIWithCustomIPs("eni-1", aliyunClient.ENIStatusAttaching, nil, nil) + eniAttaching.NetworkInterfaceType = networkv1beta1.ENITypeSecondary + + node := NewNodeFactory("test-node"). + WithECS(). + WithInstanceID("i-test"). + WithExistingENIs(eniAttaching). + Build() + + By("Injecting timeout task into queue") + now := time.Now() + reconciler.eniTaskQueue.tasks["eni-1"] = &ENITaskRecord{ + ENIID: "eni-1", + Operation: OpAttach, + InstanceID: "i-test", + NodeName: "test-node", + Status: TaskStatusTimeout, + CreatedAt: now.Add(-5 * time.Minute), + CompletedAt: &now, + RequestedIPv4Count: 3, + RequestedIPv6Count: 0, + ENIInfo: nil, + Error: fmt.Errorf("attach timeout after 5m"), + } + + By("Calling syncTaskQueueStatus to process timeout task") + reconciler.syncTaskQueueStatus(ctx, node) + + By("Verifying ENI status changed to Deleting") + Expect(node.Status.NetworkInterfaces).To(HaveKey("eni-1")) + Expect(node.Status.NetworkInterfaces["eni-1"].Status).To(Equal(aliyunClient.ENIStatusDeleting)) }) }) @@ -2392,14 +2561,13 @@ var _ = Describe("Test ReconcileNode", func() { }, nil).Once() By("reconcile") - controllerReconciler := &ReconcileNode{ - client: k8sClient, - scheme: k8sClient.Scheme(), - aliyun: openAPI, - tracer: noop.NewTracerProvider().Tracer(""), - vswpool: switchPool, - record: record.NewFakeRecorder(100), - } + controllerReconciler := NewReconcilerBuilder(). + WithClient(k8sClient). + WithScheme(k8sClient.Scheme()). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ NamespacedName: typeNamespacedName, @@ -2647,8 +2815,8 @@ var _ = Describe("Test ReconcileNode", func() { }) }) - Context("Test createENI", func() { - It("Should create ENI successfully", func() { + Context("Test createENI with async queue", func() { + It("Should create ENI and submit async attach task", func() { ctx := context.TODO() ctx = MetaIntoCtx(ctx) @@ -2663,25 +2831,20 @@ var _ = Describe("Test ReconcileNode", func() { // Use MockAPIHelper to setup API expectations mockHelper := NewMockAPIHelperWithT(GinkgoT()) - openAPI, vpcClient, ecsClient = mockHelper.GetMocks() + openAPI, _, _ = mockHelper.GetMocks() - // Setup the ENI creation flow + // Setup the ENI creation flow - only CreateNetworkInterfaceV2 is called synchronously mockHelper.SetupCreateENI("eni-1", aliyunClient.ENITypeSecondary, WithMacAddress("00:00:00:00:00:01"), WithIPv4("192.168.0.1"), - WithIPv6("fd00::1"), ) - ecsClient.On("AttachNetworkInterface", mock.Anything, mock.Anything).Return(nil) - - // Build the complete mock ENI with all fields - completeENI := BuildMockENI("eni-1", aliyunClient.ENITypeSecondary, aliyunClient.ENIStatusInUse, - "vsw-1", "cn-hangzhou-k", - []string{"192.168.0.1"}, - []string{"fd00::1"}) - completeENI.MacAddress = "00:00:00:00:00:01" - - mockHelper.SetupWaitForENI("eni-1", aliyunClient.ENIStatusInUse, completeENI, nil) + // AttachNetworkInterfaceV2 and DescribeNetworkInterfaceV2 are used by async queue + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + BuildMockENI("eni-1", aliyunClient.ENITypeSecondary, aliyunClient.ENIStatusInUse, + "vsw-1", "cn-hangzhou-k", []string{"192.168.0.1"}, nil), + }, nil).Maybe() // Setup vSwitch in pool switchPool.Add(&vswpool.Switch{ @@ -2703,23 +2866,35 @@ var _ = Describe("Test ReconcileNode", func() { By("Creating ENI") opt := &eniOptions{ eniTypeKey: secondaryKey, - addIPv4N: 0, + addIPv4N: 5, addIPv6N: 0, } err := reconciler.createENI(ctx, node, opt) Expect(err).NotTo(HaveOccurred()) - By("Verifying the ENI was created correctly") + By("Verifying ENI is in Attaching status in node CRD") Expect(node.Status.NetworkInterfaces).To(HaveKey("eni-1")) eni := node.Status.NetworkInterfaces["eni-1"] Expect(eni.ID).To(Equal("eni-1")) - Expect(eni.MacAddress).To(Equal("00:00:00:00:00:01")) - Expect(eni.IPv4).To(HaveKey("192.168.0.1")) - Expect(eni.IPv6).To(HaveKey("fd00::1")) + Expect(eni.Status).To(Equal(aliyunClient.ENIStatusAttaching)) + Expect(eni.VSwitchID).To(Equal("vsw-1")) + Expect(eni.NetworkInterfaceType).To(Equal(networkv1beta1.ENITypeSecondary)) + + By("Verifying task was submitted to queue") + task, ok := reconciler.eniTaskQueue.GetTaskStatus("eni-1") + Expect(ok).To(BeTrue(), "Task should be in queue") + Expect(task.ENIID).To(Equal("eni-1")) + Expect(task.InstanceID).To(Equal("i-1")) + Expect(task.NodeName).To(Equal("test-node")) + Expect(task.RequestedIPv4Count).To(Equal(5)) + Expect(task.RequestedIPv6Count).To(Equal(0)) + + By("Verifying status changed flag is set") + Expect(MetaCtx(ctx).StatusChanged.Load()).To(BeTrue()) }) - It("Should handle ENI creation failure", func() { + It("Should handle ENI creation API failure", func() { ctx := context.TODO() ctx = MetaIntoCtx(ctx) @@ -2735,7 +2910,7 @@ var _ = Describe("Test ReconcileNode", func() { // Setup mock to return error createErr := fmt.Errorf("create eni failed") mockHelper := NewMockAPIHelperWithT(GinkgoT()) - openAPI, vpcClient, ecsClient = mockHelper.GetMocks() + openAPI, _, _ = mockHelper.GetMocks() mockHelper.SetupCreateENIWithError(createErr) // Setup vSwitch in pool @@ -2764,10 +2939,14 @@ var _ = Describe("Test ReconcileNode", func() { err := reconciler.createENI(ctx, node, opt) Expect(err).To(HaveOccurred()) - Expect(err).To(Equal(createErr)) + Expect(err.Error()).To(ContainSubstring("create eni failed")) By("Verifying the ENI was not added to node status") Expect(node.Status.NetworkInterfaces).To(BeEmpty()) + + By("Verifying no task was submitted to queue") + _, ok := reconciler.eniTaskQueue.GetTaskStatus("eni-1") + Expect(ok).To(BeFalse(), "No task should be in queue after creation failure") }) It("Should handle VSwitchID not found error", func() { @@ -2786,7 +2965,7 @@ var _ = Describe("Test ReconcileNode", func() { // Use ReconcilerBuilder to create reconciler // Note: Not adding any vSwitch to switchPool, so GetOne will fail mockHelper := NewMockAPIHelperWithT(GinkgoT()) - openAPI, vpcClient, ecsClient = mockHelper.GetMocks() + openAPI, _, _ = mockHelper.GetMocks() reconciler := NewReconcilerBuilder(). WithAliyun(openAPI). @@ -2811,6 +2990,146 @@ var _ = Describe("Test ReconcileNode", func() { }) }) + Context("Test ensureAsyncTasks - Recovery on Restart", func() { + It("Should submit recovery task for Attaching ENI after restart", func() { + ctx := context.TODO() + ctx = MetaIntoCtx(ctx) + + // Simulate restart scenario: Node CR has Attaching ENI but task queue is empty + eniAttaching := BuildENIWithCustomIPs("eni-restart-1", aliyunClient.ENIStatusAttaching, nil, nil) + eniInUse := BuildENIWithCustomIPs("eni-inuse", aliyunClient.ENIStatusInUse, nil, nil) + + node := NewNodeFactory("test-node"). + WithECS(). + WithInstanceID("i-test"). + WithExistingENIs(eniAttaching, eniInUse). + Build() + + // Setup mock API - ECS attach is idempotent, won't fail on retry + mockHelper := NewMockAPIHelperWithT(GinkgoT()) + openAPI, _, _ = mockHelper.GetMocks() + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + { + NetworkInterfaceID: "eni-restart-1", + Status: aliyunClient.ENIStatusInUse, + }, + }, nil).Maybe() + + // Create reconciler with task queue + reconciler := NewReconcilerBuilder(). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() + + // Verify task queue is empty initially + Expect(reconciler.eniTaskQueue.HasPendingTasks("test-node")).To(BeFalse()) + + By("Calling ensureAsyncTasks to trigger recovery") + reconciler.ensureAsyncTasks(ctx, node) + + By("Verifying recovery task was submitted for Attaching ENI") + task, ok := reconciler.eniTaskQueue.GetTaskStatus("eni-restart-1") + Expect(ok).To(BeTrue(), "Recovery task should be submitted") + Expect(task.ENIID).To(Equal("eni-restart-1")) + Expect(task.InstanceID).To(Equal("i-test")) + + By("Verifying no task was submitted for InUse ENI") + _, ok = reconciler.eniTaskQueue.GetTaskStatus("eni-inuse") + Expect(ok).To(BeFalse(), "No task should be submitted for InUse ENI") + }) + + It("Should not submit duplicate task if task already exists", func() { + ctx := context.TODO() + ctx = MetaIntoCtx(ctx) + + // Setup mock API + mockHelper := NewMockAPIHelperWithT(GinkgoT()) + openAPI, _, _ = mockHelper.GetMocks() + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + { + NetworkInterfaceID: "eni-existing", + Status: aliyunClient.ENIStatusInUse, + }, + }, nil).Maybe() + + // Create reconciler with task queue + reconciler := NewReconcilerBuilder(). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() + + // Pre-add a task to the queue + reconciler.eniTaskQueue.SubmitAttach(ctx, "eni-existing", "i-test", "", "test-node", 5, 0) + + // Wait for task to be added + task, ok := reconciler.eniTaskQueue.GetTaskStatus("eni-existing") + Expect(ok).To(BeTrue()) + originalIPv4Count := task.RequestedIPv4Count + + // Create node with Attaching ENI + eniAttaching := BuildENIWithCustomIPs("eni-existing", aliyunClient.ENIStatusAttaching, nil, nil) + node := NewNodeFactory("test-node"). + WithECS(). + WithInstanceID("i-test"). + WithExistingENIs(eniAttaching). + Build() + + By("Calling ensureAsyncTasks - should not submit duplicate") + reconciler.ensureAsyncTasks(ctx, node) + + By("Verifying task was not overwritten") + task, ok = reconciler.eniTaskQueue.GetTaskStatus("eni-existing") + Expect(ok).To(BeTrue()) + // The original task should not be replaced - IP count should remain the same + Expect(task.RequestedIPv4Count).To(Equal(originalIPv4Count)) + }) + + It("Should handle multiple Attaching ENIs on restart", func() { + ctx := context.TODO() + ctx = MetaIntoCtx(ctx) + + // Create multiple Attaching ENIs + eni1 := BuildENIWithCustomIPs("eni-multi-1", aliyunClient.ENIStatusAttaching, nil, nil) + eni2 := BuildENIWithCustomIPs("eni-multi-2", aliyunClient.ENIStatusAttaching, nil, nil) + eni3 := BuildENIWithCustomIPs("eni-multi-3", aliyunClient.ENIStatusAttaching, nil, nil) + + node := NewNodeFactory("test-node"). + WithECS(). + WithInstanceID("i-test"). + WithExistingENIs(eni1, eni2, eni3). + Build() + + // Setup mock API + mockHelper := NewMockAPIHelperWithT(GinkgoT()) + openAPI, _, _ = mockHelper.GetMocks() + openAPI.On("AttachNetworkInterfaceV2", mock.Anything, mock.Anything).Return(nil).Maybe() + openAPI.On("DescribeNetworkInterfaceV2", mock.Anything, mock.Anything).Return([]*aliyunClient.NetworkInterface{ + {NetworkInterfaceID: "eni-multi-1", Status: aliyunClient.ENIStatusInUse}, + }, nil).Maybe() + + // Create reconciler + reconciler := NewReconcilerBuilder(). + WithAliyun(openAPI). + WithVSwitchPool(switchPool). + WithDefaults(). + Build() + + By("Calling ensureAsyncTasks") + reconciler.ensureAsyncTasks(ctx, node) + + By("Verifying all Attaching ENIs got recovery tasks") + for _, eniID := range []string{"eni-multi-1", "eni-multi-2", "eni-multi-3"} { + task, ok := reconciler.eniTaskQueue.GetTaskStatus(eniID) + Expect(ok).To(BeTrue(), "Recovery task should be submitted for %s", eniID) + Expect(task.InstanceID).To(Equal("i-test")) + } + }) + }) + Context("Test handleStatus", func() { It("Should handle ENI status correctly", func() { ctx := context.TODO() diff --git a/pkg/controller/multi-ip/node/pool_test_factory_test.go b/pkg/controller/multi-ip/node/pool_test_factory_test.go index dc09f156..fc72c6fc 100644 --- a/pkg/controller/multi-ip/node/pool_test_factory_test.go +++ b/pkg/controller/multi-ip/node/pool_test_factory_test.go @@ -1,6 +1,7 @@ package node import ( + "context" "fmt" "time" @@ -13,6 +14,7 @@ import ( aliyunClient "github.com/AliyunContainerService/terway/pkg/aliyun/client" networkv1beta1 "github.com/AliyunContainerService/terway/pkg/apis/network.alibabacloud.com/v1beta1" + "github.com/AliyunContainerService/terway/pkg/eni/ops" vswpool "github.com/AliyunContainerService/terway/pkg/vswitch" "github.com/spf13/viper" ) @@ -517,6 +519,7 @@ type ReconcilerBuilder struct { gcPeriod time.Duration syncPeriod time.Duration v *viper.Viper + eniTaskQueue *ENITaskQueue } // NewReconcilerBuilder creates a new ReconcilerBuilder with minimal defaults. @@ -588,6 +591,12 @@ func (b *ReconcilerBuilder) WithViper(v *viper.Viper) *ReconcilerBuilder { return b } +// WithENITaskQueue sets the ENI task queue. +func (b *ReconcilerBuilder) WithENITaskQueue(queue *ENITaskQueue) *ReconcilerBuilder { + b.eniTaskQueue = queue + return b +} + // WithDefaults sets sensible defaults for testing (fake recorder, noop tracer, etc.). func (b *ReconcilerBuilder) WithDefaults() *ReconcilerBuilder { if b.record == nil { @@ -596,6 +605,12 @@ func (b *ReconcilerBuilder) WithDefaults() *ReconcilerBuilder { if b.tracer == nil { b.tracer = noop.NewTracerProvider().Tracer("") } + // Auto-create ENI task queue if aliyun client is provided but queue is not set + if b.eniTaskQueue == nil && b.aliyun != nil { + eniNotifyCh := make(chan string, 10) + executor := ops.NewExecutor(b.aliyun, b.tracer) + b.eniTaskQueue = NewENITaskQueue(context.Background(), executor, eniNotifyCh) + } return b } @@ -615,5 +630,6 @@ func (b *ReconcilerBuilder) Build() *ReconcileNode { gcPeriod: b.gcPeriod, fullSyncNodePeriod: b.syncPeriod, v: b.v, + eniTaskQueue: b.eniTaskQueue, } } diff --git a/pkg/eni/ops/executor.go b/pkg/eni/ops/executor.go new file mode 100644 index 00000000..0fee3b3c --- /dev/null +++ b/pkg/eni/ops/executor.go @@ -0,0 +1,274 @@ +/* +Copyright 2025 Terway Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ops + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/go-logr/logr" + "go.opentelemetry.io/otel/trace" + "k8s.io/apimachinery/pkg/util/wait" + logf "sigs.k8s.io/controller-runtime/pkg/log" + + aliyunClient "github.com/AliyunContainerService/terway/pkg/aliyun/client" + apiErr "github.com/AliyunContainerService/terway/pkg/aliyun/client/errors" + "github.com/AliyunContainerService/terway/pkg/backoff" +) + +// Executor provides shared ENI operation primitives for both +// Pool controller (shared ENI) and ENI controller (exclusive ENI) +type Executor struct { + aliyun aliyunClient.OpenAPI + tracer trace.Tracer +} + +// NewExecutor creates a new ENI operation executor +func NewExecutor(aliyun aliyunClient.OpenAPI, tracer trace.Tracer) *Executor { + return &Executor{ + aliyun: aliyun, + tracer: tracer, + } +} + +// AttachAsync initiates attach and returns immediately (non-blocking) +// Used by Pool controller for async ENI attach +func (e *Executor) AttachAsync(ctx context.Context, eniID, instanceID, trunkENIID string) error { + l := logf.FromContext(ctx).WithValues("eni", eniID, "instance", instanceID) + + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "AttachAsync") + defer span.End() + } + + l.Info("initiating async ENI attach") + + err := e.aliyun.AttachNetworkInterfaceV2(ctx, &aliyunClient.AttachNetworkInterfaceOptions{ + NetworkInterfaceID: toPtr(eniID), + InstanceID: toPtr(instanceID), + TrunkNetworkInstanceID: toPtr(trunkENIID), + }) + if err != nil { + l.Error(err, "failed to initiate ENI attach") + return fmt.Errorf("attach ENI failed: %w", err) + } + + l.Info("ENI attach initiated successfully") + return nil +} + +// AttachAndWait attaches ENI and waits for it to be ready (blocking) +// Used by ENI controller for synchronous ENI attach +func (e *Executor) AttachAndWait(ctx context.Context, eniID, instanceID, trunkENIID string) (*aliyunClient.NetworkInterface, error) { + l := logf.FromContext(ctx).WithValues("eni", eniID, "instance", instanceID) + + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "AttachAndWait") + defer span.End() + } + + // 1. Initiate attach + err := e.AttachAsync(ctx, eniID, instanceID, trunkENIID) + if err != nil { + return nil, err + } + + // 2. Wait for ready + bo := e.getBackoff(eniID) + time.Sleep(bo.InitialDelay) + + eni, err := e.waitForStatus(ctx, eniID, aliyunClient.ENIStatusInUse, bo.Backoff) + if err != nil { + return nil, fmt.Errorf("wait ENI ready failed: %w", err) + } + + l.Info("ENI attach completed", "status", eni.Status) + return eni, nil +} + +// CheckStatus checks the current status of an ENI +func (e *Executor) CheckStatus(ctx context.Context, eniID string) (*aliyunClient.NetworkInterface, error) { + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "CheckStatus") + defer span.End() + } + + enis, err := e.aliyun.DescribeNetworkInterfaceV2(ctx, &aliyunClient.DescribeNetworkInterfaceOptions{ + NetworkInterfaceIDs: &[]string{eniID}, + }) + if err != nil { + return nil, fmt.Errorf("describe ENI failed: %w", err) + } + if len(enis) == 0 { + return nil, apiErr.ErrNotFound + } + return enis[0], nil +} + +// DetachAsync initiates detach and returns immediately (non-blocking) +func (e *Executor) DetachAsync(ctx context.Context, eniID, instanceID, trunkENIID string) error { + l := logf.FromContext(ctx).WithValues("eni", eniID, "instance", instanceID) + + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "DetachAsync") + defer span.End() + } + + l.Info("initiating async ENI detach") + + err := e.aliyun.DetachNetworkInterfaceV2(ctx, &aliyunClient.DetachNetworkInterfaceOptions{ + NetworkInterfaceID: toPtr(eniID), + InstanceID: toPtr(instanceID), + TrunkID: toPtr(trunkENIID), + }) + if err != nil { + l.Error(err, "failed to initiate ENI detach") + return fmt.Errorf("detach ENI failed: %w", err) + } + + l.Info("ENI detach initiated successfully") + return nil +} + +// DetachAndWait detaches ENI and waits for it to be available (blocking) +func (e *Executor) DetachAndWait(ctx context.Context, eniID, instanceID, trunkENIID string) error { + l := logf.FromContext(ctx).WithValues("eni", eniID, "instance", instanceID) + + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "DetachAndWait") + defer span.End() + } + + // 1. Initiate detach + err := e.DetachAsync(ctx, eniID, instanceID, trunkENIID) + if err != nil { + return err + } + + // 2. Wait for available + bo := e.getBackoff(eniID) + time.Sleep(bo.InitialDelay) + + _, err = e.waitForStatus(ctx, eniID, aliyunClient.ENIStatusAvailable, bo.Backoff) + if err != nil { + return fmt.Errorf("wait ENI available failed: %w", err) + } + + l.Info("ENI detach completed") + return nil +} + +// Delete deletes an ENI +func (e *Executor) Delete(ctx context.Context, eniID string) error { + l := logf.FromContext(ctx).WithValues("eni", eniID) + + if e.tracer != nil { + var span trace.Span + ctx, span = e.tracer.Start(ctx, "Delete") + defer span.End() + } + + l.Info("deleting ENI") + + err := e.aliyun.DeleteNetworkInterfaceV2(ctx, eniID) + if err != nil { + l.Error(err, "failed to delete ENI") + return fmt.Errorf("delete ENI failed: %w", err) + } + + l.Info("ENI deleted successfully") + return nil +} + +// WaitForStatus waits for ENI to reach the specified status +func (e *Executor) WaitForStatus(ctx context.Context, eniID, status string) (*aliyunClient.NetworkInterface, error) { + bo := e.getBackoff(eniID) + return e.waitForStatus(ctx, eniID, status, bo.Backoff) +} + +// waitForStatus internal helper to wait for ENI status with custom backoff +func (e *Executor) waitForStatus(ctx context.Context, eniID, status string, bo wait.Backoff) (*aliyunClient.NetworkInterface, error) { + l := logf.FromContext(ctx).WithValues("eni", eniID, "targetStatus", status) + + var result *aliyunClient.NetworkInterface + err := wait.ExponentialBackoffWithContext(ctx, bo, func(ctx context.Context) (bool, error) { + eni, err := e.CheckStatus(ctx, eniID) + if err != nil { + l.V(4).Info("check status failed, retrying", "error", err) + return false, nil // retry + } + + l.V(4).Info("current ENI status", "status", eni.Status) + + if eni.Status == status { + result = eni + return true, nil + } + return false, nil + }) + + if err != nil { + return nil, fmt.Errorf("timeout waiting for ENI %s to reach status %s: %w", eniID, status, err) + } + + return result, nil +} + +// getBackoff returns the appropriate backoff configuration based on ENI type +func (e *Executor) getBackoff(eniID string) backoff.ExtendedBackoff { + if strings.HasPrefix(eniID, "leni-") { + return backoff.Backoff(backoff.WaitLENIStatus) + } + if strings.HasPrefix(eniID, "hdeni-") { + return backoff.Backoff(backoff.WaitHDENIStatus) + } + return backoff.Backoff(backoff.WaitENIStatus) +} + +// GetTimeout returns the attach timeout based on ENI type +func (e *Executor) GetTimeout(eniID string) time.Duration { + if strings.HasPrefix(eniID, "leni-") || strings.HasPrefix(eniID, "hdeni-") { + return 5 * time.Minute // EFLO timeout + } + return 2 * time.Minute // ECS timeout +} + +// GetInitialDelay returns the initial delay before checking status based on ENI type +func (e *Executor) GetInitialDelay(eniID string) time.Duration { + bo := e.getBackoff(eniID) + return bo.InitialDelay +} + +// GetLogger returns a logger with ENI context +func GetLogger(ctx context.Context, eniID string) logr.Logger { + return logf.FromContext(ctx).WithValues("eni", eniID) +} + +// toPtr converts string to pointer, returns nil for empty string +func toPtr(s string) *string { + if s == "" { + return nil + } + return &s +} diff --git a/pkg/eni/ops/executor_test.go b/pkg/eni/ops/executor_test.go new file mode 100644 index 00000000..cd76fbd7 --- /dev/null +++ b/pkg/eni/ops/executor_test.go @@ -0,0 +1,64 @@ +/* +Copyright 2025 Terway Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ops + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestExecutor_GetTimeout(t *testing.T) { + exec := NewExecutor(nil, nil) + + // ECS ENI + timeout := exec.GetTimeout("eni-12345") + assert.Equal(t, 2*time.Minute, timeout) + + // LENI (EFLO) + timeout = exec.GetTimeout("leni-12345") + assert.Equal(t, 5*time.Minute, timeout) + + // HDENI (EFLO) + timeout = exec.GetTimeout("hdeni-12345") + assert.Equal(t, 5*time.Minute, timeout) +} + +func TestExecutor_GetInitialDelay(t *testing.T) { + exec := NewExecutor(nil, nil) + + // ECS ENI should have smaller initial delay + ecsDelay := exec.GetInitialDelay("eni-12345") + + // LENI should have larger initial delay + leniDelay := exec.GetInitialDelay("leni-12345") + + // EFLO should have larger delay than ECS + assert.Less(t, ecsDelay, leniDelay) +} + +func Test_toPtr(t *testing.T) { + // Empty string should return nil + result := toPtr("") + assert.Nil(t, result) + + // Non-empty string should return pointer + result = toPtr("test") + assert.NotNil(t, result) + assert.Equal(t, "test", *result) +} diff --git a/tests/ip_allocation_perf_test.go b/tests/ip_allocation_perf_test.go new file mode 100644 index 00000000..496a3d1a --- /dev/null +++ b/tests/ip_allocation_perf_test.go @@ -0,0 +1,573 @@ +//go:build e2e + +package tests + +import ( + "context" + "fmt" + "strings" + "testing" + "time" + + "github.com/AliyunContainerService/terway/tests/utils" + "github.com/Jeffail/gabs/v2" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/e2e-framework/klient/wait" + "sigs.k8s.io/e2e-framework/klient/wait/conditions" + "sigs.k8s.io/e2e-framework/pkg/envconf" + "sigs.k8s.io/e2e-framework/pkg/features" +) + +const ( + // Test configuration + perfTestDeploymentCount = 5 + perfTestPodsPerDeploy = 6 + perfTestTotalPods = perfTestDeploymentCount * perfTestPodsPerDeploy + perfTestIterations = 1 // Number of test iterations for averaging +) + +// PoolConfig represents the IP pool configuration +type PoolConfig struct { + Name string + MinPoolSize int + MaxPoolSize int +} + +var ( + // Pool configurations to test + defaultPoolConfig = PoolConfig{Name: "default", MinPoolSize: 0, MaxPoolSize: 5} + warmUpPoolConfig = PoolConfig{Name: "warmup", MinPoolSize: 30, MaxPoolSize: 30} +) + +// IPAllocationPerfTestSuite runs the IP allocation performance test +type IPAllocationPerfTestSuite struct { + NodeType NodeType + PoolConfig PoolConfig + Deployments []*appsv1.Deployment + LatencyStats utils.LatencyStats + OriginalEnablePatchPodIPs *bool // nil means not set in original config + + // Per-iteration results for averaging + IterationStats []utils.LatencyStats + IterationFailedCount []int + AllocIPFailedCount int // Failed IP allocation count for current iteration +} + +// NewIPAllocationPerfTestSuite creates a new test suite +func NewIPAllocationPerfTestSuite(nodeType NodeType, poolConfig PoolConfig) *IPAllocationPerfTestSuite { + return &IPAllocationPerfTestSuite{ + NodeType: nodeType, + PoolConfig: poolConfig, + } +} + +// configurePoolSize configures the IP pool size and disable enable_patch_pod_ips in eni-config +func (s *IPAllocationPerfTestSuite) configurePoolSize(ctx context.Context, t *testing.T, config *envconf.Config) error { + t.Logf("Configuring pool size: min_pool_size=%d, max_pool_size=%d, enable_patch_pod_ips=false", + s.PoolConfig.MinPoolSize, s.PoolConfig.MaxPoolSize) + + cm := &corev1.ConfigMap{} + err := config.Client().Resources().Get(ctx, "eni-config", "kube-system", cm) + if err != nil { + return err + } + + eniJson, err := gabs.ParseJSON([]byte(cm.Data["eni_conf"])) + if err != nil { + return err + } + + // Save original enable_patch_pod_ips value + if eniJson.Exists("enable_patch_pod_ips") { + originalValue := eniJson.Path("enable_patch_pod_ips").Data().(bool) + s.OriginalEnablePatchPodIPs = &originalValue + t.Logf("Original enable_patch_pod_ips: %v", originalValue) + } + + _, err = eniJson.Set(s.PoolConfig.MaxPoolSize, "max_pool_size") + if err != nil { + return err + } + _, err = eniJson.Set(s.PoolConfig.MinPoolSize, "min_pool_size") + if err != nil { + return err + } + // Disable enable_patch_pod_ips for performance test + _, err = eniJson.Set(false, "enable_patch_pod_ips") + if err != nil { + return err + } + + cm.Data["eni_conf"] = eniJson.String() + err = config.Client().Resources().Update(ctx, cm) + if err != nil { + return err + } + + // Restart terway to apply new config + return restartTerway(ctx, config) +} + +// Setup performs initial checks and configures the pool size +func (s *IPAllocationPerfTestSuite) Setup(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + // Check if the required node type is available + nodeInfo, err := DiscoverNodeTypes(ctx, config.Client()) + if err != nil { + t.Fatalf("failed to discover node types: %v", err) + } + + nodes := nodeInfo.GetNodesByType(s.NodeType) + if len(nodes) == 0 { + t.Skipf("No nodes of type %s found, skipping test", s.NodeType) + return ctx + } + t.Logf("Found %d nodes of type %s", len(nodes), s.NodeType) + + // Configure pool size + err = s.configurePoolSize(ctx, t, config) + if err != nil { + t.Fatalf("failed to configure pool size: %v", err) + } + + // Wait for terway to be ready after config change + time.Sleep(10 * time.Second) + + return ctx +} + +// WaitForPodsReady waits for all pods to be ready +func (s *IPAllocationPerfTestSuite) WaitForPodsReady(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Logf("Waiting for %d pods to be ready...", perfTestTotalPods) + + for _, deploy := range s.Deployments { + err := wait.For(conditions.New(config.Client().Resources()).DeploymentConditionMatch( + deploy, appsv1.DeploymentAvailable, corev1.ConditionTrue), + wait.WithTimeout(10*time.Minute), + wait.WithInterval(5*time.Second)) + if err != nil { + t.Fatalf("failed waiting for deployment %s to be ready: %v", deploy.Name, err) + } + t.Logf("Deployment %s is ready", deploy.Name) + } + + return ctx +} + +// CollectLatencies collects IP allocation latencies from pod events +func (s *IPAllocationPerfTestSuite) CollectLatencies(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Log("Collecting AllocIPSucceed and AllocIPFailed events...") + + // First, collect all current pod UIDs from our deployments + // This ensures we only count events from the current test run's pods + podUIDs := make(map[string]bool) + for _, deploy := range s.Deployments { + pods := &corev1.PodList{} + err := config.Client().Resources(config.Namespace()).List(ctx, pods) + if err != nil { + t.Fatalf("failed to list pods: %v", err) + } + for _, pod := range pods.Items { + if strings.HasPrefix(pod.Name, deploy.Name) { + podUIDs[string(pod.UID)] = true + } + } + } + t.Logf("Found %d pods from current deployments", len(podUIDs)) + + // Get all events in the namespace + events := &corev1.EventList{} + err := config.Client().Resources(config.Namespace()).List(ctx, events) + if err != nil { + t.Fatalf("failed to list events: %v", err) + } + + var latencies []time.Duration + s.AllocIPFailedCount = 0 + + for _, event := range events.Items { + // Check if this event is for one of our current pods by UID + if !podUIDs[string(event.InvolvedObject.UID)] { + continue + } + + switch event.Reason { + case "AllocIPSucceed": + latency, err := utils.ParseAllocIPSucceedLatency(event.Message) + if err != nil { + t.Logf("Warning: failed to parse latency from event: %v", err) + continue + } + latencies = append(latencies, latency) + case "AllocIPFailed": + s.AllocIPFailedCount++ + t.Logf("AllocIPFailed event: pod=%s, message=%s", event.InvolvedObject.Name, event.Message) + } + } + + if len(latencies) == 0 { + t.Error("No AllocIPSucceed events found") + return ctx + } + + s.LatencyStats = utils.CalculateLatencyStats(latencies) + t.Logf("IP Allocation Latency Stats for %s/%s: %s (AllocIPFailed: %d)", + s.NodeType, s.PoolConfig.Name, s.LatencyStats.String(), s.AllocIPFailedCount) + + return ctx +} + +// ScaleDown scales down all deployments to 0 +func (s *IPAllocationPerfTestSuite) ScaleDown(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Log("Scaling down deployments...") + + for _, deploy := range s.Deployments { + // Refresh the deployment + err := config.Client().Resources().Get(ctx, deploy.Name, deploy.Namespace, deploy) + if err != nil { + t.Logf("Warning: failed to get deployment %s: %v", deploy.Name, err) + continue + } + + zero := int32(0) + deploy.Spec.Replicas = &zero + err = config.Client().Resources().Update(ctx, deploy) + if err != nil { + t.Logf("Warning: failed to scale down deployment %s: %v", deploy.Name, err) + } + } + + // Wait for all pods to be deleted + for _, deploy := range s.Deployments { + err := wait.For(conditions.New(config.Client().Resources()).ResourceScaled( + deploy, func(object k8s.Object) int32 { + return object.(*appsv1.Deployment).Status.Replicas + }, 0), + wait.WithTimeout(5*time.Minute), + wait.WithInterval(5*time.Second)) + if err != nil { + t.Logf("Warning: failed waiting for deployment %s to scale down: %v", deploy.Name, err) + } + } + + t.Log("All deployments scaled down") + return ctx +} + +// PrintResults prints the final test results +func (s *IPAllocationPerfTestSuite) PrintResults(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Log("========================================") + t.Logf("IP Allocation Performance Test Results") + t.Logf("Node Type: %s", s.NodeType) + t.Logf("Pool Config: %s (min=%d, max=%d)", s.PoolConfig.Name, s.PoolConfig.MinPoolSize, s.PoolConfig.MaxPoolSize) + t.Logf("Deployments: %d x %d pods = %d total pods", perfTestDeploymentCount, perfTestPodsPerDeploy, perfTestTotalPods) + t.Log("----------------------------------------") + t.Logf("Samples: %d", s.LatencyStats.N) + t.Logf("P99: %v", s.LatencyStats.P99) + t.Logf("P90: %v", s.LatencyStats.P90) + t.Logf("Max: %v", s.LatencyStats.Max) + t.Logf("Min: %v", s.LatencyStats.Min) + t.Logf("Avg: %v", s.LatencyStats.Avg) + t.Logf("AllocIPFailed: %d", s.AllocIPFailedCount) + t.Log("========================================") + + return MarkTestSuccess(ctx) +} + +// RunIteration runs a single iteration of create -> wait -> collect -> scale down +func (s *IPAllocationPerfTestSuite) RunIteration(ctx context.Context, t *testing.T, config *envconf.Config, iteration int) context.Context { + t.Logf("=== Starting iteration %d/%d ===", iteration+1, perfTestIterations) + + // Get node affinity labels based on node type + affinityLabels := GetNodeAffinityForType(s.NodeType) + excludeLabels := GetNodeAffinityExcludeForType(s.NodeType) + + // Define the anchor deployment name and label for pod affinity + anchorDeployName := fmt.Sprintf("perf-test-%s-%s-0", s.NodeType, s.PoolConfig.Name) + anchorPodLabel := map[string]string{"app": anchorDeployName} + + // Create deployments + s.Deployments = make([]*appsv1.Deployment, perfTestDeploymentCount) + for i := 0; i < perfTestDeploymentCount; i++ { + name := fmt.Sprintf("perf-test-%s-%s-%d", s.NodeType, s.PoolConfig.Name, i) + deploy := utils.NewDeployment(name, config.Namespace(), int32(perfTestPodsPerDeploy)). + WithNodeAffinity(affinityLabels). + WithNodeAffinityExclude(excludeLabels). + WithPodAffinity(anchorPodLabel) + + if utils.IsLingjunNodeType(string(s.NodeType)) { + deploy = deploy.WithLingjunToleration() + } + + err := config.Client().Resources().Create(ctx, deploy.Deployment) + if err != nil { + t.Fatalf("failed to create deployment %s: %v", name, err) + } + s.Deployments[i] = deploy.Deployment + } + t.Logf("Created %d deployments for iteration %d", perfTestDeploymentCount, iteration+1) + + // Wait for pods ready + ctx = s.WaitForPodsReady(ctx, t, config) + + // Collect latencies + ctx = s.CollectLatencies(ctx, t, config) + + // Store iteration results + s.IterationStats = append(s.IterationStats, s.LatencyStats) + s.IterationFailedCount = append(s.IterationFailedCount, s.AllocIPFailedCount) + + // Scale down and delete deployments + ctx = s.ScaleDown(ctx, t, config) + + // Delete deployments to clean up for next iteration + for _, deploy := range s.Deployments { + err := config.Client().Resources().Delete(ctx, deploy) + if err != nil { + t.Logf("Warning: failed to delete deployment %s: %v", deploy.Name, err) + } + } + + // Wait a bit for cleanup + time.Sleep(5 * time.Second) + + t.Logf("=== Completed iteration %d/%d ===", iteration+1, perfTestIterations) + return ctx +} + +// RunAllIterations runs all test iterations and calculates averaged results +func (s *IPAllocationPerfTestSuite) RunAllIterations(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + s.IterationStats = make([]utils.LatencyStats, 0, perfTestIterations) + s.IterationFailedCount = make([]int, 0, perfTestIterations) + + for i := 0; i < perfTestIterations; i++ { + ctx = s.RunIteration(ctx, t, config, i) + } + + // Calculate averaged results + s.calculateAveragedResults(t) + + return ctx +} + +// calculateAveragedResults calculates averaged statistics from all iterations +func (s *IPAllocationPerfTestSuite) calculateAveragedResults(t *testing.T) { + if len(s.IterationStats) == 0 { + return + } + + var totalP99, totalP90, totalMax, totalMin, totalAvg time.Duration + var totalN, totalFailed int + + for i, stats := range s.IterationStats { + totalP99 += stats.P99 + totalP90 += stats.P90 + totalMax += stats.Max + totalMin += stats.Min + totalAvg += stats.Avg + totalN += stats.N + totalFailed += s.IterationFailedCount[i] + } + + n := len(s.IterationStats) + s.LatencyStats = utils.LatencyStats{ + N: totalN / n, + P99: totalP99 / time.Duration(n), + P90: totalP90 / time.Duration(n), + Max: totalMax / time.Duration(n), + Min: totalMin / time.Duration(n), + Avg: totalAvg / time.Duration(n), + } + s.AllocIPFailedCount = totalFailed + + t.Log("----------------------------------------") + t.Logf("Averaged results from %d iterations:", n) + for i, stats := range s.IterationStats { + t.Logf(" Iteration %d: P99=%v, P90=%v, Avg=%v, Failed=%d", + i+1, stats.P99, stats.P90, stats.Avg, s.IterationFailedCount[i]) + } +} + +// PrintAveragedResults prints the averaged results from all iterations +func (s *IPAllocationPerfTestSuite) PrintAveragedResults(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Log("========================================") + t.Logf("IP Allocation Performance Test Results (Averaged over %d iterations)", perfTestIterations) + t.Logf("Node Type: %s", s.NodeType) + t.Logf("Pool Config: %s (min=%d, max=%d)", s.PoolConfig.Name, s.PoolConfig.MinPoolSize, s.PoolConfig.MaxPoolSize) + t.Logf("Deployments per iteration: %d x %d pods = %d total pods", perfTestDeploymentCount, perfTestPodsPerDeploy, perfTestTotalPods) + t.Log("----------------------------------------") + t.Logf("Avg Samples: %d", s.LatencyStats.N) + t.Logf("Avg P99: %v", s.LatencyStats.P99) + t.Logf("Avg P90: %v", s.LatencyStats.P90) + t.Logf("Avg Max: %v", s.LatencyStats.Max) + t.Logf("Avg Min: %v", s.LatencyStats.Min) + t.Logf("Avg Latency: %v", s.LatencyStats.Avg) + t.Logf("Total AllocIPFailed: %d", s.AllocIPFailedCount) + t.Log("========================================") + + return MarkTestSuccess(ctx) +} + +// RestoreConfig restores the default pool configuration and enable_patch_pod_ips +func (s *IPAllocationPerfTestSuite) RestoreConfig(ctx context.Context, t *testing.T, config *envconf.Config) context.Context { + t.Log("Restoring default pool configuration and enable_patch_pod_ips...") + + cm := &corev1.ConfigMap{} + err := config.Client().Resources().Get(ctx, "eni-config", "kube-system", cm) + if err != nil { + t.Logf("Warning: failed to get eni-config: %v", err) + return ctx + } + + eniJson, err := gabs.ParseJSON([]byte(cm.Data["eni_conf"])) + if err != nil { + t.Logf("Warning: failed to parse eni_conf: %v", err) + return ctx + } + + // Restore default pool values: min_pool_size=0, max_pool_size=5 + _, _ = eniJson.Set(5, "max_pool_size") + _, _ = eniJson.Set(0, "min_pool_size") + + // Restore enable_patch_pod_ips to original value + if s.OriginalEnablePatchPodIPs != nil { + _, _ = eniJson.Set(*s.OriginalEnablePatchPodIPs, "enable_patch_pod_ips") + t.Logf("Restored enable_patch_pod_ips to: %v", *s.OriginalEnablePatchPodIPs) + } else { + // If it wasn't set originally, delete it + _ = eniJson.Delete("enable_patch_pod_ips") + t.Log("Removed enable_patch_pod_ips (was not set originally)") + } + + cm.Data["eni_conf"] = eniJson.String() + err = config.Client().Resources().Update(ctx, cm) + if err != nil { + t.Logf("Warning: failed to update eni-config: %v", err) + } + + return ctx +} + +// TestIPAllocationPerf tests IP allocation performance +// This test creates 5 deployments with 30 pods each, measures IP allocation latency, +// and reports P99, P90, Max, Min statistics. +// Tests are run on both ECS and Lingjun shared nodes with two pool configurations: +// - Default: min_pool_size=0, max_pool_size=5 +// - Warm-up: min_pool_size=30, max_pool_size=30 +func TestIPAllocationPerf(t *testing.T) { + // Pre-check: only test centralized IPAM mode + if eniConfig == nil || eniConfig.IPAMType != "crd" { + ipamType := "" + if eniConfig != nil { + ipamType = eniConfig.IPAMType + } + t.Skipf("skip: ipam type is not crd, current type: %s", ipamType) + return + } + + // Pre-check: terway daemonset name must be terway-eniip + if GetCachedTerwayDaemonSetName() != "terway-eniip" { + t.Skipf("TestIPAllocationPerf requires terway-eniip daemonset, current: %s", GetCachedTerwayDaemonSetName()) + return + } + + // Run ECS tests + t.Run("ECS", func(t *testing.T) { + // Test with default pool config + t.Run("DefaultPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeECSSharedENI, defaultPoolConfig) + }) + + // Test with warm-up pool config + t.Run("WarmUpPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeECSSharedENI, warmUpPoolConfig) + }) + }) + + // Run Lingjun tests + t.Run("Lingjun", func(t *testing.T) { + // Test with default pool config + t.Run("DefaultPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeLingjunSharedENI, defaultPoolConfig) + }) + + // Test with warm-up pool config + t.Run("WarmUpPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeLingjunSharedENI, warmUpPoolConfig) + }) + }) +} + +// TestIPAllocationPerfECS tests IP allocation performance specifically for ECS shared ENI nodes +func TestIPAllocationPerfECS(t *testing.T) { + // Pre-check: only test centralized IPAM mode + if eniConfig == nil || eniConfig.IPAMType != "crd" { + ipamType := "" + if eniConfig != nil { + ipamType = eniConfig.IPAMType + } + t.Skipf("skip: ipam type is not crd, current type: %s", ipamType) + return + } + + // Pre-check: terway daemonset name must be terway-eniip + if GetCachedTerwayDaemonSetName() != "terway-eniip" { + t.Skipf("TestIPAllocationPerfECS requires terway-eniip daemonset, current: %s", GetCachedTerwayDaemonSetName()) + return + } + + // Test with default pool config + t.Run("DefaultPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeECSSharedENI, defaultPoolConfig) + }) + + // Test with warm-up pool config + t.Run("WarmUpPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeECSSharedENI, warmUpPoolConfig) + }) +} + +// TestIPAllocationPerfLingjun tests IP allocation performance specifically for Lingjun shared ENI nodes +func TestIPAllocationPerfLingjun(t *testing.T) { + // Pre-check: only test centralized IPAM mode + if eniConfig == nil || eniConfig.IPAMType != "crd" { + ipamType := "" + if eniConfig != nil { + ipamType = eniConfig.IPAMType + } + t.Skipf("skip: ipam type is not crd, current type: %s", ipamType) + return + } + + // Pre-check: terway daemonset name must be terway-eniip + if GetCachedTerwayDaemonSetName() != "terway-eniip" { + t.Skipf("TestIPAllocationPerfLingjun requires terway-eniip daemonset, current: %s", GetCachedTerwayDaemonSetName()) + return + } + + // Test with default pool config + t.Run("DefaultPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeLingjunSharedENI, defaultPoolConfig) + }) + + // Test with warm-up pool config + t.Run("WarmUpPool", func(t *testing.T) { + runIPAllocationPerfTest(t, NodeTypeLingjunSharedENI, warmUpPoolConfig) + }) +} + +// runIPAllocationPerfTest runs IP allocation performance test with multiple iterations +func runIPAllocationPerfTest(t *testing.T, nodeType NodeType, poolConfig PoolConfig) { + suite := NewIPAllocationPerfTestSuite(nodeType, poolConfig) + + feature := features.New(fmt.Sprintf("IPAllocationPerf/%s-%s", nodeType, poolConfig.Name)). + WithLabel("env", "performance"). + Setup(suite.Setup). + Assess("run all iterations", suite.RunAllIterations). + Assess("print averaged results", suite.PrintAveragedResults). + Teardown(suite.RestoreConfig). + Feature() + + testenv.Test(t, feature) +} diff --git a/tests/main_test.go b/tests/main_test.go index b991e9d6..ff764f4e 100644 --- a/tests/main_test.go +++ b/tests/main_test.go @@ -106,6 +106,7 @@ func TestMain(m *testing.M) { envfuncs.CreateNamespace(envCfg.Namespace()), patchNamespace, checkENIConfig, + configureKubeClientQPS, printClusterEnvironment, ) testenv.AfterEachFeature(func(ctx context.Context, config *envconf.Config, t *testing.T, feature features.Feature) (context.Context, error) { @@ -315,6 +316,47 @@ type Config struct { IPAMType string `json:"ipam_type"` } +// configureKubeClientQPS configures kube_client_qps and kube_client_burst for all tests +func configureKubeClientQPS(ctx context.Context, config *envconf.Config) (context.Context, error) { + cm := &corev1.ConfigMap{} + err := config.Client().Resources().Get(ctx, "eni-config", "kube-system", cm) + if err != nil { + if errors.IsNotFound(err) { + return ctx, nil + } + return ctx, err + } + + eniConf := cm.Data["eni_conf"] + if eniConf == "" { + return ctx, nil + } + + // Parse and update eni_conf + var eniConfMap map[string]interface{} + if err := json.Unmarshal([]byte(eniConf), &eniConfMap); err != nil { + return ctx, fmt.Errorf("failed to parse eni_conf: %v", err) + } + + // Set kube_client_qps and kube_client_burst + eniConfMap["kube_client_qps"] = 50 + eniConfMap["kube_client_burst"] = 100 + + updatedConf, err := json.Marshal(eniConfMap) + if err != nil { + return ctx, fmt.Errorf("failed to marshal eni_conf: %v", err) + } + + cm.Data["eni_conf"] = string(updatedConf) + err = config.Client().Resources().Update(ctx, cm) + if err != nil { + return ctx, fmt.Errorf("failed to update eni-config: %v", err) + } + + fmt.Printf("Configured kube_client_qps=50, kube_client_burst=100\n") + return ctx, nil +} + // printClusterEnvironment prints the cluster environment information including node capacities func printClusterEnvironment(ctx context.Context, config *envconf.Config) (context.Context, error) { fmt.Println("=== Cluster Environment Information ===") diff --git a/tests/stress/network_interface_stress_test.go b/tests/stress/network_interface_stress_test.go new file mode 100644 index 00000000..1c24fdb2 --- /dev/null +++ b/tests/stress/network_interface_stress_test.go @@ -0,0 +1,266 @@ +//go:build e2e + +package stress + +import ( + "context" + "fmt" + "math/rand" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/AliyunContainerService/terway/pkg/apis/network.alibabacloud.com/v1beta1" + "golang.org/x/sync/errgroup" + "golang.org/x/time/rate" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/e2e-framework/pkg/envconf" + "sigs.k8s.io/e2e-framework/pkg/features" +) + +func TestNetworkInterfaceStress(t *testing.T) { + const ( + totalCRs = 150000 + updateQPS = 1000 + updateDuration = 10 * time.Minute + ) + + f := features.New("NetworkInterfaceStress"). + Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { + restConfig := cfg.Client().RESTConfig() + restConfig.QPS = 3000 + restConfig.Burst = 5000 + c, err := client.New(restConfig, client.Options{ + Scheme: cfg.Client().Resources().GetScheme(), + }) + if err != nil { + t.Fatal(err) + } + + t.Logf("Creating %d NetworkInterface CRs...", totalCRs) + + g, _ := errgroup.WithContext(ctx) + g.SetLimit(100) + + start := time.Now() + for i := 0; i < totalCRs; i++ { + idx := i + g.Go(func() error { + name := fmt.Sprintf("stress-eni-%d", idx) + trunk := rand.Intn(2) == 0 + ni := &v1beta1.NetworkInterface{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1beta1.NetworkInterfaceSpec{ + ENI: v1beta1.ENI{ + ID: fmt.Sprintf("eni-%d", idx), + VPCID: fmt.Sprintf("vpc-%d", rand.Intn(100)), + MAC: randomMAC(), + Zone: fmt.Sprintf("zone-%d", rand.Intn(10)), + VSwitchID: fmt.Sprintf("vsw-%d", rand.Intn(100)), + ResourceGroupID: fmt.Sprintf("rg-%d", rand.Intn(100)), + SecurityGroupIDs: []string{ + fmt.Sprintf("sg-%d", rand.Intn(100)), + fmt.Sprintf("sg-%d", rand.Intn(100)), + }, + AttachmentOptions: v1beta1.AttachmentOptions{ + Trunk: &trunk, + }, + }, + }, + } + if err := c.Create(ctx, ni); err != nil { + // Ignore already exists + if client.IgnoreAlreadyExists(err) != nil { + return fmt.Errorf("failed to create ni %s: %v", name, err) + } + } + return nil + }) + } + if err := g.Wait(); err != nil { + t.Fatal(err) + } + t.Logf("Created %d CRs in %v", totalCRs, time.Since(start)) + + return ctx + }). + Assess("List Performance (No Cache)", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { + // Create a non-cached client + uncachedClient, err := client.New(cfg.Client().RESTConfig(), client.Options{ + Scheme: cfg.Client().Resources().GetScheme(), + }) + if err != nil { + t.Fatal(err) + } + + var totalDuration time.Duration + for i := 0; i < 5; i++ { + start := time.Now() + var list v1beta1.NetworkInterfaceList + if err := uncachedClient.List(ctx, &list); err != nil { + t.Fatalf("failed to list: %v", err) + } + duration := time.Since(start) + t.Logf("List %d took %v, items: %d", i+1, duration, len(list.Items)) + totalDuration += duration + } + t.Logf("Average List Latency: %v", totalDuration/5) + return ctx + }). + Assess("Update Performance", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { + restConfig := cfg.Client().RESTConfig() + restConfig.QPS = 3000 + restConfig.Burst = 5000 + c, err := client.New(restConfig, client.Options{ + Scheme: cfg.Client().Resources().GetScheme(), + }) + if err != nil { + t.Fatal(err) + } + + limiter := rate.NewLimiter(rate.Limit(updateQPS), updateQPS) + + var ( + successCount int64 + failCount int64 + totalLatency int64 // microseconds + ) + + ctx, cancel := context.WithTimeout(ctx, updateDuration) + defer cancel() + + var wg sync.WaitGroup + t.Logf("Starting update stress test for %v at %d QPS...", updateDuration, updateQPS) + + startTest := time.Now() + + // Use a pool of workers to execute updates + workCh := make(chan int, 1000) + for i := 0; i < 100; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for idx := range workCh { + name := fmt.Sprintf("stress-eni-%d", idx) + + start := time.Now() + + patch := []byte(fmt.Sprintf(`{"status":{"phase":"%s","eniInfo":{"id":"%s"}}}`, "Bind", fmt.Sprintf("eni-%d", idx))) + if rand.Intn(2) == 0 { + patch = []byte(fmt.Sprintf(`{"status":{"phase":"%s","eniInfo":{"id":"%s"}}}`, "Unbind", fmt.Sprintf("eni-%d", idx))) + } + + // Using MergePatch + ni := &v1beta1.NetworkInterface{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + err := c.Status().Patch(context.Background(), ni, client.RawPatch(types.MergePatchType, patch)) + + latency := time.Since(start).Microseconds() + atomic.AddInt64(&totalLatency, latency) + + if err != nil { + atomic.AddInt64(&failCount, 1) + if atomic.LoadInt64(&failCount) <= 10 { + t.Logf("Update failed for %s: %v", name, err) + } + } else { + atomic.AddInt64(&successCount, 1) + } + } + }() + } + + go func() { + for { + select { + case <-ctx.Done(): + close(workCh) + return + default: + if err := limiter.Wait(ctx); err != nil { + // Context canceled + close(workCh) + return + } + // Pick random ID + idx := rand.Intn(totalCRs) + select { + case workCh <- idx: + case <-ctx.Done(): + close(workCh) + return + } + } + } + }() + + wg.Wait() + + duration := time.Since(startTest) + totalOps := successCount + failCount + avgLatency := time.Duration(0) + if totalOps > 0 { + avgLatency = time.Duration(totalLatency/totalOps) * time.Microsecond + } + + t.Logf("Update Stress Test Finished:") + t.Logf("Duration: %v", duration) + t.Logf("Total Ops: %d", totalOps) + t.Logf("Success: %d", successCount) + t.Logf("Failure: %d", failCount) + t.Logf("Average Latency: %v", avgLatency) + t.Logf("Actual QPS: %.2f", float64(totalOps)/duration.Seconds()) + + return ctx + }). + Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { + // Cleanup + restConfig := cfg.Client().RESTConfig() + restConfig.QPS = 3000 + restConfig.Burst = 5000 + c, err := client.New(restConfig, client.Options{ + Scheme: cfg.Client().Resources().GetScheme(), + }) + if err != nil { + t.Fatal(err) + } + t.Logf("Cleaning up %d NetworkInterface CRs...", totalCRs) + + g, _ := errgroup.WithContext(ctx) + g.SetLimit(100) + + for i := 0; i < totalCRs; i++ { + idx := i + g.Go(func() error { + name := fmt.Sprintf("stress-eni-%d", idx) + ni := &v1beta1.NetworkInterface{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + } + _ = c.Delete(ctx, ni) + return nil + }) + } + _ = g.Wait() + t.Log("Cleanup finished") + return ctx + }).Feature() + + testenv.Test(t, f) +} + +func randomMAC() string { + buf := make([]byte, 6) + _, _ = rand.Read(buf) + buf[0] |= 2 // locally administered + return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]) +} diff --git a/tests/stress/setup_test.go b/tests/stress/setup_test.go index 90cafd64..8ba81f98 100644 --- a/tests/stress/setup_test.go +++ b/tests/stress/setup_test.go @@ -3,24 +3,24 @@ package stress import ( - "context" "os" "path/filepath" "testing" "k8s.io/client-go/kubernetes/scheme" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/e2e-framework/klient" "sigs.k8s.io/e2e-framework/pkg/env" "sigs.k8s.io/e2e-framework/pkg/envconf" "sigs.k8s.io/e2e-framework/pkg/envfuncs" - "github.com/AliyunContainerService/terway/tests/utils" + "k8s.io/client-go/tools/clientcmd" + + "github.com/AliyunContainerService/terway/pkg/apis/network.alibabacloud.com/v1beta1" ) var ( testenv env.Environment - - containerNetworkPods int ) func TestMain(m *testing.M) { @@ -29,19 +29,26 @@ func TestMain(m *testing.M) { panic("error get home path") } - envCfg := envconf.NewWithKubeConfig(filepath.Join(home, ".kube", "config")). - WithRandomNamespace() + restConfig, err := clientcmd.BuildConfigFromFlags("", filepath.Join(home, ".kube", "config")) + if err != nil { + panic(err) + } + restConfig.QPS = 3000 + restConfig.Burst = 5000 + + client, err := klient.New(restConfig) + if err != nil { + panic(err) + } + envCfg := envconf.New().WithRandomNamespace().WithClient(client) testenv = env.NewWithConfig(envCfg) _ = clientgoscheme.AddToScheme(scheme.Scheme) + _ = v1beta1.AddToScheme(scheme.Scheme) testenv.Setup( envfuncs.CreateNamespace(envCfg.Namespace()), - func(ctx context.Context, cfg *envconf.Config) (context.Context, error) { - containerNetworkPods, err = utils.GetAvailableContainerNetworkPods(ctx, cfg.Client()) - return ctx, err - }, ) testenv.Finish( diff --git a/tests/stress/stress_test.go b/tests/stress/stress_test.go deleted file mode 100644 index 7b699807..00000000 --- a/tests/stress/stress_test.go +++ /dev/null @@ -1,110 +0,0 @@ -//go:build e2e - -package stress - -import ( - "context" - "testing" - "time" - - "github.com/AliyunContainerService/terway/tests/utils" - - appsv1 "k8s.io/api/apps/v1" - "sigs.k8s.io/e2e-framework/klient/k8s" - "sigs.k8s.io/e2e-framework/klient/wait" - "sigs.k8s.io/e2e-framework/klient/wait/conditions" - "sigs.k8s.io/e2e-framework/pkg/envconf" - "sigs.k8s.io/e2e-framework/pkg/features" -) - -func TestStress(t *testing.T) { - stress1 := features.New("stress1"). - Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var err error - count := int32(containerNetworkPods / 2) - deploy := utils.DeploymentPause("stress1", cfg.Namespace(), count) - err = cfg.Client().Resources().Create(ctx, deploy) - if err != nil { - t.Fatal(err) - } - ctx = context.WithValue(ctx, "COUNT", count) - ctx = context.WithValue(ctx, "DEPLOYMENT", deploy) - return ctx - }). - Assess("scale 20 times", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - count := ctx.Value("COUNT").(int32) - deployment := ctx.Value("DEPLOYMENT").(*appsv1.Deployment) - - current := count - var zero int32 = 0 - for i := 0; i < 20; i++ { - err := wait.For(conditions.New(cfg.Client().Resources()).ResourceScaled(deployment, func(object k8s.Object) int32 { - return object.(*appsv1.Deployment).Status.ReadyReplicas - }, current), wait.WithTimeout(2*time.Minute)) - if err != nil { - t.Error("failed waiting for deployment to be scaled up") - } - err = cfg.Client().Resources().Get(ctx, deployment.Name, deployment.Namespace, deployment) - if err != nil { - t.Error("failed get deployment") - } - if current > 0 { - deployment.Spec.Replicas = &zero - current = zero - } else { - deployment.Spec.Replicas = &count - current = count - } - err = cfg.Client().Resources().Update(ctx, deployment) - if err != nil { - t.Error("failed scale deployment") - } - } - return ctx - }).Feature() - stress2 := features.New("stress2"). - Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - var err error - count := int32(containerNetworkPods - containerNetworkPods/2) - deploy := utils.DeploymentPause("stress2", cfg.Namespace(), count) - err = cfg.Client().Resources().Create(ctx, deploy) - if err != nil { - t.Fatal(err) - } - ctx = context.WithValue(ctx, "COUNT", count) - ctx = context.WithValue(ctx, "DEPLOYMENT", deploy) - return ctx - }). - Assess("scale 20 times", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { - count := ctx.Value("COUNT").(int32) - deployment := ctx.Value("DEPLOYMENT").(*appsv1.Deployment) - - current := count - var zero int32 = 0 - for i := 0; i < 20; i++ { - err := wait.For(conditions.New(cfg.Client().Resources()).ResourceScaled(deployment, func(object k8s.Object) int32 { - return object.(*appsv1.Deployment).Status.ReadyReplicas - }, current), wait.WithTimeout(2*time.Minute)) - if err != nil { - t.Error("failed waiting for deployment to be scaled up") - } - err = cfg.Client().Resources().Get(ctx, deployment.Name, deployment.Namespace, deployment) - if err != nil { - t.Error("failed get deployment") - } - if current > 0 { - deployment.Spec.Replicas = &zero - current = zero - } else { - deployment.Spec.Replicas = &count - current = count - } - err = cfg.Client().Resources().Update(ctx, deployment) - if err != nil { - t.Error("failed scale deployment") - } - } - return ctx - }).Feature() - testenv.TestInParallel(t, stress1, stress2) -} diff --git a/tests/utils/deployment.go b/tests/utils/deployment.go new file mode 100644 index 00000000..263a52a9 --- /dev/null +++ b/tests/utils/deployment.go @@ -0,0 +1,191 @@ +package utils + +import ( + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Deployment is a builder for creating deployment configurations +type Deployment struct { + *appsv1.Deployment +} + +// NewDeployment creates a new deployment builder +func NewDeployment(name, namespace string, replicas int32) *Deployment { + return &Deployment{ + Deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + }, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: func() *int64 { + i := int64(0) + return &i + }(), + Containers: []corev1.Container{ + { + Name: "pause", + Image: "registry.cn-hangzhou.aliyuncs.com/acs/pause:3.2", + Command: []string{"/pause"}, + ImagePullPolicy: corev1.PullIfNotPresent, + }, + }, + }, + }, + }, + }, + } +} + +// WithNodeAffinity adds node affinity to the deployment +func (d *Deployment) WithNodeAffinity(labels map[string]string) *Deployment { + var nodeSelectorTerms []corev1.NodeSelectorRequirement + for k, v := range labels { + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: k, + Operator: corev1.NodeSelectorOpIn, + Values: []string{v}, + }) + } + + if len(nodeSelectorTerms) == 0 { + return d + } + + if d.Spec.Template.Spec.Affinity == nil { + d.Spec.Template.Spec.Affinity = &corev1.Affinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + if len(d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []corev1.NodeSelectorTerm{ + {MatchExpressions: nodeSelectorTerms}, + } + } else { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions = append( + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions, + nodeSelectorTerms..., + ) + } + return d +} + +// WithNodeAffinityExclude adds node affinity exclusion to the deployment +func (d *Deployment) WithNodeAffinityExclude(excludeLabels map[string]string) *Deployment { + var nodeSelectorTerms []corev1.NodeSelectorRequirement + for k, v := range excludeLabels { + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: k, + Operator: corev1.NodeSelectorOpNotIn, + Values: []string{v}, + }) + } + + // Always exclude virtual-kubelet nodes + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: "type", + Operator: corev1.NodeSelectorOpNotIn, + Values: []string{"virtual-kubelet"}, + }) + + if d.Spec.Template.Spec.Affinity == nil { + d.Spec.Template.Spec.Affinity = &corev1.Affinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + if len(d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []corev1.NodeSelectorTerm{ + {MatchExpressions: nodeSelectorTerms}, + } + } else { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions = append( + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions, + nodeSelectorTerms..., + ) + } + return d +} + +// WithTolerations adds tolerations to the deployment +func (d *Deployment) WithTolerations(tolerations []corev1.Toleration) *Deployment { + d.Spec.Template.Spec.Tolerations = append(d.Spec.Template.Spec.Tolerations, tolerations...) + return d +} + +// WithLingjunToleration adds toleration for Lingjun nodes +func (d *Deployment) WithLingjunToleration() *Deployment { + return d.WithTolerations(LingjunTolerations()) +} + +// WithLabels adds labels to the pod template +func (d *Deployment) WithLabels(labels map[string]string) *Deployment { + if d.Spec.Template.Labels == nil { + d.Spec.Template.Labels = make(map[string]string) + } + for k, v := range labels { + d.Spec.Template.Labels[k] = v + } + return d +} + +// WithAnnotations adds annotations to the pod template +func (d *Deployment) WithAnnotations(annotations map[string]string) *Deployment { + if d.Spec.Template.Annotations == nil { + d.Spec.Template.Annotations = make(map[string]string) + } + for k, v := range annotations { + d.Spec.Template.Annotations[k] = v + } + return d +} + +// WithPodAffinity adds pod affinity to schedule pods to the same node as pods with specified labels +func (d *Deployment) WithPodAffinity(labels map[string]string) *Deployment { + if len(labels) == 0 { + return d + } + + if d.Spec.Template.Spec.Affinity == nil { + d.Spec.Template.Spec.Affinity = &corev1.Affinity{} + } + if d.Spec.Template.Spec.Affinity.PodAffinity == nil { + d.Spec.Template.Spec.Affinity.PodAffinity = &corev1.PodAffinity{} + } + + d.Spec.Template.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution = append( + d.Spec.Template.Spec.Affinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution, + corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + TopologyKey: "kubernetes.io/hostname", + }, + ) + return d +} diff --git a/tests/utils/deployment_test.go b/tests/utils/deployment_test.go new file mode 100644 index 00000000..53c1fa17 --- /dev/null +++ b/tests/utils/deployment_test.go @@ -0,0 +1,100 @@ +package utils + +import ( + "testing" +) + +func TestNewDeployment(t *testing.T) { + deploy := NewDeployment("test-deploy", "test-ns", 10) + + if deploy.Deployment == nil { + t.Fatal("Deployment is nil") + } + if deploy.Name != "test-deploy" { + t.Errorf("Name = %s, want test-deploy", deploy.Name) + } + if deploy.Namespace != "test-ns" { + t.Errorf("Namespace = %s, want test-ns", deploy.Namespace) + } + if *deploy.Spec.Replicas != 10 { + t.Errorf("Replicas = %d, want 10", *deploy.Spec.Replicas) + } +} + +func TestDeploymentWithNodeAffinity(t *testing.T) { + deploy := NewDeployment("test-deploy", "test-ns", 1). + WithNodeAffinity(map[string]string{ + "node-type": "test", + }) + + affinity := deploy.Spec.Template.Spec.Affinity + if affinity == nil { + t.Fatal("Affinity is nil") + } + if affinity.NodeAffinity == nil { + t.Fatal("NodeAffinity is nil") + } + + terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + if len(terms) == 0 { + t.Fatal("NodeSelectorTerms is empty") + } + + found := false + for _, expr := range terms[0].MatchExpressions { + if expr.Key == "node-type" && len(expr.Values) > 0 && expr.Values[0] == "test" { + found = true + break + } + } + if !found { + t.Error("Expected node affinity not found") + } +} + +func TestDeploymentWithNodeAffinityExclude(t *testing.T) { + deploy := NewDeployment("test-deploy", "test-ns", 1). + WithNodeAffinityExclude(map[string]string{ + "exclude-label": "value", + }) + + affinity := deploy.Spec.Template.Spec.Affinity + if affinity == nil { + t.Fatal("Affinity is nil") + } + if affinity.NodeAffinity == nil { + t.Fatal("NodeAffinity is nil") + } + + terms := affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + if len(terms) == 0 { + t.Fatal("NodeSelectorTerms is empty") + } + + // Should have at least 2 expressions: the exclude label and virtual-kubelet exclusion + if len(terms[0].MatchExpressions) < 2 { + t.Error("Expected at least 2 match expressions") + } +} + +func TestDeploymentWithLabels(t *testing.T) { + deploy := NewDeployment("test-deploy", "test-ns", 1). + WithLabels(map[string]string{ + "custom-label": "value", + }) + + if deploy.Spec.Template.Labels["custom-label"] != "value" { + t.Errorf("Expected label custom-label=value, got %s", deploy.Spec.Template.Labels["custom-label"]) + } +} + +func TestDeploymentWithAnnotations(t *testing.T) { + deploy := NewDeployment("test-deploy", "test-ns", 1). + WithAnnotations(map[string]string{ + "custom-annotation": "value", + }) + + if deploy.Spec.Template.Annotations["custom-annotation"] != "value" { + t.Errorf("Expected annotation custom-annotation=value, got %s", deploy.Spec.Template.Annotations["custom-annotation"]) + } +} diff --git a/tests/utils/k8s.go b/tests/utils/k8s.go deleted file mode 100644 index da1c42f8..00000000 --- a/tests/utils/k8s.go +++ /dev/null @@ -1,305 +0,0 @@ -package utils - -import ( - "context" - - appsv1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "sigs.k8s.io/e2e-framework/klient" -) - -// GetAvailableContainerNetworkPods return number of container pods can create in this cluster -// not very accurate but is enough for tests -func GetAvailableContainerNetworkPods(ctx context.Context, client klient.Client) (int, error) { - var nodes corev1.NodeList - err := client.Resources().List(ctx, &nodes) - if err != nil { - return 0, err - } - count := 0 - for _, node := range nodes.Items { - count += int(node.Status.Allocatable.Pods().Value()) - } - var pods corev1.PodList - err = client.Resources().List(ctx, &pods) - if err != nil { - return 0, err - } - for _, pod := range pods.Items { - if pod.Status.Phase == corev1.PodRunning { - count-- - } - } - return count, nil -} - -func DeploymentPause(name, namespace string, count int32) *appsv1.Deployment { - return &appsv1.Deployment{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - Spec: appsv1.DeploymentSpec{ - Strategy: appsv1.DeploymentStrategy{ - Type: appsv1.RecreateDeploymentStrategyType, - }, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": name, - }, - }, - Replicas: func(a int32) *int32 { return &a }(count), - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - "app": name, - }, - }, - Spec: corev1.PodSpec{ - Tolerations: []corev1.Toleration{ - { - Key: "node-role.kubernetes.io/master", - Operator: corev1.TolerationOpExists, - Effect: corev1.TaintEffectNoSchedule, - }, - }, - TerminationGracePeriodSeconds: func(a int64) *int64 { return &a }(0), - Containers: []corev1.Container{ - { - Name: "pause", - Image: "registry.cn-hangzhou.aliyuncs.com/acs/pause:3.2", - Command: []string{"/pause"}, - ImagePullPolicy: corev1.PullIfNotPresent, - }, - }, - Affinity: &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "type", - Operator: corev1.NodeSelectorOpNotIn, - Values: []string{ - "virtual-kubelet", - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - }, - } -} - -type Pod struct { - Pod *corev1.Pod -} - -// NewPod create pod with custom group id, pod is anti-affinity with in same group -func NewPod(name, namespace, group, image string) *Pod { - var zero int64 - return &Pod{Pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{ - "app": name, - "group": group, - }, - Annotations: map[string]string{}, - }, - Spec: corev1.PodSpec{ - Affinity: &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "type", - Operator: corev1.NodeSelectorOpNotIn, - Values: []string{"virtual-kubelet"}, - }, { - Key: "kubernetes.io/arch", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"amd64", "arm64"}, - }, { - Key: "kubernetes.io/os", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"linux"}, - }, - }, - }, - }, - }, - }, - PodAntiAffinity: &corev1.PodAntiAffinity{RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "group": group, - }, - }, - TopologyKey: "kubernetes.io/hostname", - }, - }}, - }, - Containers: []corev1.Container{ - { - Name: name, - Image: image, - ImagePullPolicy: corev1.PullIfNotPresent, - }, - }, - TerminationGracePeriodSeconds: &zero, - }, - }} -} - -func (p *Pod) Expose(svcType string) *corev1.Service { - policy := corev1.IPFamilyPolicyPreferDualStack - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: p.Pod.Name, - Namespace: p.Pod.Namespace, - Labels: p.Pod.Labels, - Annotations: map[string]string{}, - }, - Spec: corev1.ServiceSpec{ - Ports: nil, - Selector: p.Pod.Labels, - IPFamilyPolicy: &policy, - }, - } - switch svcType { - case "headless": - svc.Spec.ClusterIP = corev1.ClusterIPNone - case "nodePort": - svc.Spec.Type = corev1.ServiceTypeNodePort - case "loadBalancer": - svc.Spec.Type = corev1.ServiceTypeLoadBalancer - default: - // default clusterIP - } - return svc -} - -type Sts struct { - Sts *appsv1.StatefulSet -} - -func NewSts(name, namespace, group, image string, replicas int) *Sts { - var zero int64 - return &Sts{Sts: &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: map[string]string{ - "app": name, - "group": group, - }, - Annotations: map[string]string{}, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: func(r int) *int32 { - a := int32(r) - return &a - }(replicas), - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": name, - "group": group, - }}, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{ - "app": name, - "group": group, - }}, - Spec: corev1.PodSpec{ - Affinity: &corev1.Affinity{ - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{ - { - MatchExpressions: []corev1.NodeSelectorRequirement{ - { - Key: "type", - Operator: corev1.NodeSelectorOpNotIn, - Values: []string{"virtual-kubelet"}, - }, { - Key: "kubernetes.io/arch", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"amd64", "arm64"}, - }, { - Key: "kubernetes.io/os", - Operator: corev1.NodeSelectorOpIn, - Values: []string{"linux"}, - }, - }, - }, - }, - }, - }, - PodAntiAffinity: &corev1.PodAntiAffinity{RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ - { - LabelSelector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "group": group, - }, - }, - TopologyKey: "kubernetes.io/hostname", - }, - }}, - }, - Containers: []corev1.Container{ - { - Name: name, - Image: image, - ImagePullPolicy: corev1.PullIfNotPresent, - }, - }, - TerminationGracePeriodSeconds: &zero, - }, - }, - ServiceName: name, - UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ - Type: appsv1.OnDeleteStatefulSetStrategyType, - }, - }, - }} -} - -func (p *Sts) Expose(svcType string) *corev1.Service { - policy := corev1.IPFamilyPolicyPreferDualStack - svc := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: p.Sts.Name, - Namespace: p.Sts.Namespace, - Labels: p.Sts.Labels, - Annotations: map[string]string{}, - }, - Spec: corev1.ServiceSpec{ - Ports: nil, - Selector: p.Sts.Labels, - IPFamilyPolicy: &policy, - }, - } - switch svcType { - case "headless": - svc.Spec.ClusterIP = corev1.ClusterIPNone - case "nodePort": - svc.Spec.Type = corev1.ServiceTypeNodePort - case "loadBalancer": - svc.Spec.Type = corev1.ServiceTypeLoadBalancer - default: - // default clusterIP - } - return svc -} diff --git a/tests/utils/latency.go b/tests/utils/latency.go new file mode 100644 index 00000000..24baf640 --- /dev/null +++ b/tests/utils/latency.go @@ -0,0 +1,75 @@ +package utils + +import ( + "fmt" + "regexp" + "sort" + "time" +) + +// LatencyStats contains latency statistics +type LatencyStats struct { + P99 time.Duration + P90 time.Duration + Max time.Duration + Min time.Duration + Avg time.Duration + N int +} + +// String returns a formatted string of latency stats +func (s LatencyStats) String() string { + return fmt.Sprintf("N=%d, P99=%v, P90=%v, Max=%v, Min=%v, Avg=%v", + s.N, s.P99, s.P90, s.Max, s.Min, s.Avg) +} + +// ParseAllocIPSucceedLatency parses the latency from AllocIPSucceed event message +// Example message: "Alloc IP 10.186.243.34/16-2001:db8:0000:0000:40ce:fc69:8c7:210a/64 took 37.358159ms" +func ParseAllocIPSucceedLatency(message string) (time.Duration, error) { + // Regex to match "took XXXms" or "took XXXs" or "took XXX.XXXms" etc. + re := regexp.MustCompile(`took\s+([0-9.]+(?:ns|us|µs|ms|s|m|h))`) + matches := re.FindStringSubmatch(message) + if len(matches) < 2 { + return 0, fmt.Errorf("failed to parse latency from message: %s", message) + } + return time.ParseDuration(matches[1]) +} + +// CalculateLatencyStats calculates latency statistics from a slice of durations +func CalculateLatencyStats(latencies []time.Duration) LatencyStats { + if len(latencies) == 0 { + return LatencyStats{} + } + + // Sort latencies for percentile calculation + sorted := make([]time.Duration, len(latencies)) + copy(sorted, latencies) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i] < sorted[j] + }) + + n := len(sorted) + var total time.Duration + for _, d := range sorted { + total += d + } + + // Calculate percentiles + p99Index := int(float64(n) * 0.99) + p90Index := int(float64(n) * 0.90) + if p99Index >= n { + p99Index = n - 1 + } + if p90Index >= n { + p90Index = n - 1 + } + + return LatencyStats{ + P99: sorted[p99Index], + P90: sorted[p90Index], + Max: sorted[n-1], + Min: sorted[0], + Avg: total / time.Duration(n), + N: n, + } +} diff --git a/tests/utils/latency_test.go b/tests/utils/latency_test.go new file mode 100644 index 00000000..12c90af4 --- /dev/null +++ b/tests/utils/latency_test.go @@ -0,0 +1,191 @@ +package utils + +import ( + "strings" + "testing" + "time" +) + +func TestParseAllocIPSucceedLatency(t *testing.T) { + tests := []struct { + name string + message string + expected time.Duration + wantErr bool + }{ + { + name: "milliseconds", + message: "Alloc IP 10.186.243.34/16-2001:db8:0000:0000:40ce:fc69:8c7:210a/64 took 37.358159ms", + expected: 37358159 * time.Nanosecond, // 37.358159ms + wantErr: false, + }, + { + name: "seconds", + message: "Alloc IP 10.186.243.124/16 took 1.5s", + expected: 1500 * time.Millisecond, + wantErr: false, + }, + { + name: "microseconds", + message: "Alloc IP 10.0.0.1/24 took 500us", + expected: 500 * time.Microsecond, + wantErr: false, + }, + { + name: "nanoseconds", + message: "Alloc IP 10.0.0.1/24 took 1000000ns", + expected: 1 * time.Millisecond, + wantErr: false, + }, + { + name: "integer milliseconds", + message: "Alloc IP 10.186.243.124/16 took 724ms", + expected: 724 * time.Millisecond, + wantErr: false, + }, + { + name: "complex message with long duration", + message: "Alloc IP 10.186.243.124/16-2001:db8:0000:0000:40ce:fc69:8c7:2109/64 took 724.537079ms", + expected: 724537079 * time.Nanosecond, + wantErr: false, + }, + { + name: "no latency info", + message: "Alloc IP 10.186.243.34/16 success", + wantErr: true, + }, + { + name: "empty message", + message: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseAllocIPSucceedLatency(tt.message) + if (err != nil) != tt.wantErr { + t.Errorf("ParseAllocIPSucceedLatency() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && got != tt.expected { + t.Errorf("ParseAllocIPSucceedLatency() = %v, want %v", got, tt.expected) + } + }) + } +} + +func TestCalculateLatencyStats(t *testing.T) { + tests := []struct { + name string + latencies []time.Duration + expected LatencyStats + }{ + { + name: "empty slice", + latencies: []time.Duration{}, + expected: LatencyStats{}, + }, + { + name: "single value", + latencies: []time.Duration{100 * time.Millisecond}, + expected: LatencyStats{ + P99: 100 * time.Millisecond, + P90: 100 * time.Millisecond, + Max: 100 * time.Millisecond, + Min: 100 * time.Millisecond, + Avg: 100 * time.Millisecond, + N: 1, + }, + }, + { + name: "multiple values", + latencies: []time.Duration{ + 10 * time.Millisecond, + 20 * time.Millisecond, + 30 * time.Millisecond, + 40 * time.Millisecond, + 50 * time.Millisecond, + 60 * time.Millisecond, + 70 * time.Millisecond, + 80 * time.Millisecond, + 90 * time.Millisecond, + 100 * time.Millisecond, + }, + expected: LatencyStats{ + P99: 100 * time.Millisecond, + P90: 100 * time.Millisecond, // index = int(10 * 0.90) = 9, which is 100ms + Max: 100 * time.Millisecond, + Min: 10 * time.Millisecond, + Avg: 55 * time.Millisecond, + N: 10, + }, + }, + { + name: "unsorted input", + latencies: []time.Duration{ + 50 * time.Millisecond, + 10 * time.Millisecond, + 90 * time.Millisecond, + 30 * time.Millisecond, + 70 * time.Millisecond, + }, + expected: LatencyStats{ + P99: 90 * time.Millisecond, + P90: 90 * time.Millisecond, + Max: 90 * time.Millisecond, + Min: 10 * time.Millisecond, + Avg: 50 * time.Millisecond, + N: 5, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := CalculateLatencyStats(tt.latencies) + if got.N != tt.expected.N { + t.Errorf("N = %d, want %d", got.N, tt.expected.N) + } + if got.Min != tt.expected.Min { + t.Errorf("Min = %v, want %v", got.Min, tt.expected.Min) + } + if got.Max != tt.expected.Max { + t.Errorf("Max = %v, want %v", got.Max, tt.expected.Max) + } + if got.Avg != tt.expected.Avg { + t.Errorf("Avg = %v, want %v", got.Avg, tt.expected.Avg) + } + if got.P90 != tt.expected.P90 { + t.Errorf("P90 = %v, want %v", got.P90, tt.expected.P90) + } + if got.P99 != tt.expected.P99 { + t.Errorf("P99 = %v, want %v", got.P99, tt.expected.P99) + } + }) + } +} + +func TestLatencyStatsString(t *testing.T) { + stats := LatencyStats{ + P99: 100 * time.Millisecond, + P90: 80 * time.Millisecond, + Max: 120 * time.Millisecond, + Min: 10 * time.Millisecond, + Avg: 50 * time.Millisecond, + N: 100, + } + + result := stats.String() + if result == "" { + t.Error("String() returned empty string") + } + + // Check that it contains all the expected values + expectedParts := []string{"N=100", "P99=100ms", "P90=80ms", "Max=120ms", "Min=10ms", "Avg=50ms"} + for _, part := range expectedParts { + if !strings.Contains(result, part) { + t.Errorf("String() = %s, expected to contain %s", result, part) + } + } +} diff --git a/tests/utils/node.go b/tests/utils/node.go new file mode 100644 index 00000000..9aaa9755 --- /dev/null +++ b/tests/utils/node.go @@ -0,0 +1,37 @@ +package utils + +import ( + corev1 "k8s.io/api/core/v1" +) + +const ( + // LingjunTaintKey is the taint key for Lingjun nodes + LingjunTaintKey = "node-role.alibabacloud.com/lingjun" + + // LingjunWorkerLabelKey is the label key for Lingjun worker nodes + LingjunWorkerLabelKey = "alibabacloud.com/lingjun-worker" + + // ExclusiveENILabelKey is the label key for exclusive ENI mode nodes + ExclusiveENILabelKey = "k8s.aliyun.com/exclusive-mode-eni-type" + + // ExclusiveENILabelValue is the label value for exclusive ENI mode nodes + ExclusiveENILabelValue = "eniOnly" +) + +// LingjunToleration returns the toleration for Lingjun nodes +func LingjunToleration() corev1.Toleration { + return corev1.Toleration{ + Key: LingjunTaintKey, + Operator: corev1.TolerationOpExists, + } +} + +// LingjunTolerations returns a slice containing the Lingjun toleration +func LingjunTolerations() []corev1.Toleration { + return []corev1.Toleration{LingjunToleration()} +} + +// IsLingjunNodeType checks if the given node type string represents a Lingjun node +func IsLingjunNodeType(nodeType string) bool { + return nodeType == "lingjun-shared-eni" || nodeType == "lingjun-exclusive-eni" +} diff --git a/tests/utils/node_test.go b/tests/utils/node_test.go new file mode 100644 index 00000000..2ef7aa6f --- /dev/null +++ b/tests/utils/node_test.go @@ -0,0 +1,74 @@ +package utils + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" +) + +func TestLingjunToleration(t *testing.T) { + toleration := LingjunToleration() + + if toleration.Key != LingjunTaintKey { + t.Errorf("Expected key %s, got %s", LingjunTaintKey, toleration.Key) + } + if toleration.Operator != corev1.TolerationOpExists { + t.Errorf("Expected operator %s, got %s", corev1.TolerationOpExists, toleration.Operator) + } +} + +func TestLingjunTolerations(t *testing.T) { + tolerations := LingjunTolerations() + + if len(tolerations) != 1 { + t.Fatalf("Expected 1 toleration, got %d", len(tolerations)) + } + if tolerations[0].Key != LingjunTaintKey { + t.Errorf("Expected key %s, got %s", LingjunTaintKey, tolerations[0].Key) + } +} + +func TestIsLingjunNodeType(t *testing.T) { + tests := []struct { + nodeType string + expected bool + }{ + {"lingjun-shared-eni", true}, + {"lingjun-exclusive-eni", true}, + {"ecs-shared-eni", false}, + {"ecs-exclusive-eni", false}, + {"", false}, + {"lingjun", false}, + } + + for _, tt := range tests { + t.Run(tt.nodeType, func(t *testing.T) { + result := IsLingjunNodeType(tt.nodeType) + if result != tt.expected { + t.Errorf("IsLingjunNodeType(%s) = %v, want %v", tt.nodeType, result, tt.expected) + } + }) + } +} + +func TestDeploymentWithLingjunToleration(t *testing.T) { + deploy := NewDeployment("test", "default", 1). + WithLingjunToleration() + + tolerations := deploy.Spec.Template.Spec.Tolerations + if len(tolerations) != 1 { + t.Fatalf("Expected 1 toleration, got %d", len(tolerations)) + } + + found := false + for _, tol := range tolerations { + if tol.Key == LingjunTaintKey && tol.Operator == corev1.TolerationOpExists { + found = true + break + } + } + if !found { + t.Error("Lingjun toleration not found in deployment") + } +} + diff --git a/tests/utils_test.go b/tests/utils_test.go index 89709394..7184dde2 100644 --- a/tests/utils_test.go +++ b/tests/utils_test.go @@ -305,6 +305,16 @@ func (p *Pod) WithTolerations(tolerations []corev1.Toleration) *Pod { return p } +// WithLingjunToleration adds toleration for Lingjun nodes +func (p *Pod) WithLingjunToleration() *Pod { + return p.WithTolerations([]corev1.Toleration{ + { + Key: "node-role.alibabacloud.com/lingjun", + Operator: corev1.TolerationOpExists, + }, + }) +} + type Service struct { *corev1.Service } @@ -1260,3 +1270,129 @@ func IsDatapathV2Enabled(ctx context.Context, config *envconf.Config) (bool, err return false, nil } + +// Deployment is a builder for creating deployment configurations +type Deployment struct { + *appsv1.Deployment +} + +// NewDeployment creates a new deployment builder +func NewDeployment(name, namespace string, replicas int32) *Deployment { + return &Deployment{ + Deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + Strategy: appsv1.DeploymentStrategy{ + Type: appsv1.RollingUpdateDeploymentStrategyType, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + }, + }, + Spec: corev1.PodSpec{ + TerminationGracePeriodSeconds: func() *int64 { + i := int64(0) + return &i + }(), + Containers: []corev1.Container{ + { + Name: "pause", + Image: "registry-cn-hangzhou.ack.aliyuncs.com/acs/pause:3.5", + Command: []string{"/pause"}, + ImagePullPolicy: corev1.PullIfNotPresent, + }, + }, + }, + }, + }, + }, + } +} + +// WithNodeAffinity adds node affinity to the deployment +func (d *Deployment) WithNodeAffinity(labels map[string]string) *Deployment { + var nodeSelectorTerms []corev1.NodeSelectorRequirement + for k, v := range labels { + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: k, + Operator: corev1.NodeSelectorOpIn, + Values: []string{v}, + }) + } + + if len(nodeSelectorTerms) == 0 { + return d + } + + if d.Spec.Template.Spec.Affinity == nil { + d.Spec.Template.Spec.Affinity = &corev1.Affinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + if len(d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []corev1.NodeSelectorTerm{ + {MatchExpressions: nodeSelectorTerms}, + } + } else { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions = append( + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions, + nodeSelectorTerms..., + ) + } + return d +} + +// WithNodeAffinityExclude adds node affinity exclusion to the deployment +func (d *Deployment) WithNodeAffinityExclude(excludeLabels map[string]string) *Deployment { + var nodeSelectorTerms []corev1.NodeSelectorRequirement + for k, v := range excludeLabels { + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: k, + Operator: corev1.NodeSelectorOpNotIn, + Values: []string{v}, + }) + } + + // Always exclude virtual-kubelet nodes + nodeSelectorTerms = append(nodeSelectorTerms, corev1.NodeSelectorRequirement{ + Key: "type", + Operator: corev1.NodeSelectorOpNotIn, + Values: []string{"virtual-kubelet"}, + }) + + if d.Spec.Template.Spec.Affinity == nil { + d.Spec.Template.Spec.Affinity = &corev1.Affinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + if d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + if len(d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms) == 0 { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []corev1.NodeSelectorTerm{ + {MatchExpressions: nodeSelectorTerms}, + } + } else { + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions = append( + d.Spec.Template.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms[0].MatchExpressions, + nodeSelectorTerms..., + ) + } + return d +}