allow parallel test execution via skip_queue flag

pk910 · pk910 · commit f8b2eabe8ed7 · 2025-04-17T18:52:56.000+02:00
diff --git a/pkg/coordinator/coordinator.go b/pkg/coordinator/coordinator.go
@@ -191,7 +191,10 @@ func (c *Coordinator) Run(ctx context.Context) error {
 	// start per epoch GC routine
 	go c.runEpochGC(ctx)
 
-	// run tests
+	// start off queue test execution loop
+	go c.runner.RunOffQueueTestExecutionLoop(ctx)
+
+	// run test execution loop for queued tests
 	c.runner.RunTestExecutionLoop(ctx, c.Config.Coordinator.MaxConcurrentTests)
 
 	return nil
@@ -289,8 +292,8 @@ func (c *Coordinator) DeleteTestRun(runID uint64) error {
 	return err
 }
 
-func (c *Coordinator) ScheduleTest(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool) (types.TestRunner, error) {
-	return c.runner.ScheduleTest(descriptor, configOverrides, allowDuplicate)
+func (c *Coordinator) ScheduleTest(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool, skipQueue bool) (types.TestRunner, error) {
+	return c.runner.ScheduleTest(descriptor, configOverrides, allowDuplicate, skipQueue)
 }
 
 func (c *Coordinator) startMetrics() error {
diff --git a/pkg/coordinator/testrunner.go b/pkg/coordinator/testrunner.go
@@ -16,24 +16,24 @@ type TestRunner struct {
 	coordinator types.Coordinator
 
 	runIDCounter       uint64
-	lastExecutedRunID  uint64
 	testSchedulerMutex sync.Mutex
 
-	testRunMap           map[uint64]types.Test
-	testQueue            []types.TestRunner
-	testRegistryMutex    sync.RWMutex
-	testNotificationChan chan bool
+	testRunMap               map[uint64]types.Test
+	testQueue                []types.TestRunner
+	testRegistryMutex        sync.RWMutex
+	queueNotificationChan    chan bool
+	offQueueNotificationChan chan types.TestRunner
 }
 
 func NewTestRunner(coordinator types.Coordinator, lastRunID uint64) *TestRunner {
 	return &TestRunner{
-		coordinator:       coordinator,
-		runIDCounter:      lastRunID,
-		lastExecutedRunID: lastRunID,
+		coordinator:  coordinator,
+		runIDCounter: lastRunID,
 
-		testRunMap:           map[uint64]types.Test{},
-		testQueue:            []types.TestRunner{},
-		testNotificationChan: make(chan bool, 1),
+		testRunMap:               map[uint64]types.Test{},
+		testQueue:                []types.TestRunner{},
+		queueNotificationChan:    make(chan bool, 1),
+		offQueueNotificationChan: make(chan types.TestRunner, 10),
 	}
 }
 
@@ -76,25 +76,29 @@ func (c *TestRunner) RemoveTestFromQueue(runID uint64) bool {
 	return false
 }
 
-func (c *TestRunner) ScheduleTest(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool) (types.TestRunner, error) {
+func (c *TestRunner) ScheduleTest(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool, skipQueue bool) (types.TestRunner, error) {
 	if descriptor.Err() != nil {
 		return nil, fmt.Errorf("cannot create test from failed test descriptor: %w", descriptor.Err())
 	}
 
-	testRef, err := c.createTestRun(descriptor, configOverrides, allowDuplicate)
+	testRef, err := c.createTestRun(descriptor, configOverrides, allowDuplicate, skipQueue)
 	if err != nil {
 		return nil, err
 	}
 
-	select {
-	case c.testNotificationChan <- true:
-	default:
+	if skipQueue {
+		c.offQueueNotificationChan <- testRef
+	} else {
+		select {
+		case c.queueNotificationChan <- true:
+		default:
+		}
 	}
 
 	return testRef, nil
 }
 
-func (c *TestRunner) createTestRun(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool) (types.TestRunner, error) {
+func (c *TestRunner) createTestRun(descriptor types.TestDescriptor, configOverrides map[string]any, allowDuplicate bool, skipQueue bool) (types.TestRunner, error) {
 	c.testSchedulerMutex.Lock()
 	defer c.testSchedulerMutex.Unlock()
 
@@ -115,7 +119,9 @@ func (c *TestRunner) createTestRun(descriptor types.TestDescriptor, configOverri
 	}
 
 	c.testRegistryMutex.Lock()
-	c.testQueue = append(c.testQueue, testRef)
+	if !skipQueue {
+		c.testQueue = append(c.testQueue, testRef)
+	}
 	c.testRunMap[runID] = testRef
 	c.testRegistryMutex.Unlock()
 
@@ -164,7 +170,7 @@ runLoop:
 			select {
 			case <-ctx.Done():
 				break runLoop
-			case <-c.testNotificationChan:
+			case <-c.queueNotificationChan:
 			case <-time.After(60 * time.Second):
 			}
 		}
@@ -173,9 +179,18 @@ runLoop:
 	waitGroup.Wait()
 }
 
-func (c *TestRunner) runTest(ctx context.Context, testRef types.TestRunner) {
-	c.lastExecutedRunID = testRef.RunID()
+func (c *TestRunner) RunOffQueueTestExecutionLoop(ctx context.Context) {
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case testRef := <-c.offQueueNotificationChan:
+			go c.runTest(ctx, testRef)
+		}
+	}
+}
 
+func (c *TestRunner) runTest(ctx context.Context, testRef types.TestRunner) {
 	if err := testRef.Validate(); err != nil {
 		testRef.Logger().Errorf("test validation failed: %v", err)
 		return
@@ -195,9 +210,10 @@ func (c *TestRunner) RunTestScheduler(ctx context.Context) {
 
 	// startup scheduler
 	for _, testDescr := range c.getStartupTests() {
-		_, err := c.ScheduleTest(testDescr, nil, false)
+		testConfig := testDescr.Config()
+		_, err := c.ScheduleTest(testDescr, nil, false, testConfig.Schedule.SkipQueue)
 		if err != nil {
-			c.coordinator.Logger().Errorf("could not schedule startup test execution for %v (%v): %v", testDescr.ID(), testDescr.Config().Name, err)
+			c.coordinator.Logger().Errorf("could not schedule startup test execution for %v (%v): %v", testDescr.ID(), testConfig.Name, err)
 		}
 	}
 
@@ -217,9 +233,10 @@ func (c *TestRunner) RunTestScheduler(ctx context.Context) {
 		}
 
 		for _, testDescr := range c.getCronTests(cronTime) {
-			_, err := c.ScheduleTest(testDescr, nil, false)
+			testConfig := testDescr.Config()
+			_, err := c.ScheduleTest(testDescr, nil, false, testConfig.Schedule.SkipQueue)
 			if err != nil {
-				c.coordinator.Logger().Errorf("could not schedule cron test execution for %v (%v): %v", testDescr.ID(), testDescr.Config().Name, err)
+				c.coordinator.Logger().Errorf("could not schedule cron test execution for %v (%v): %v", testDescr.ID(), testConfig.Name, err)
 			}
 		}
 	}
diff --git a/pkg/coordinator/types/coordinator.go b/pkg/coordinator/types/coordinator.go
@@ -24,7 +24,7 @@ type Coordinator interface {
 	GetTestByRunID(runID uint64) Test
 	GetTestQueue() []Test
 	GetTestHistory(testID string, firstRunID uint64, offset uint64, limit uint64) ([]Test, int)
-	ScheduleTest(descriptor TestDescriptor, configOverrides map[string]any, allowDuplicate bool) (TestRunner, error)
+	ScheduleTest(descriptor TestDescriptor, configOverrides map[string]any, allowDuplicate bool, skipQueue bool) (TestRunner, error)
 	DeleteTestRun(runID uint64) error
 }
 
diff --git a/pkg/coordinator/types/test.go b/pkg/coordinator/types/test.go
@@ -61,8 +61,9 @@ type ExternalTestConfig struct {
 }
 
 type TestSchedule struct {
-	Startup bool     `yaml:"startup" json:"startup"`
-	Cron    []string `yaml:"cron" json:"cron"`
+	Startup   bool     `yaml:"startup" json:"startup"`
+	Cron      []string `yaml:"cron" json:"cron"`
+	SkipQueue bool     `yaml:"skipQueue" json:"skipQueue"`
 }
 
 type TestDescriptor interface {
diff --git a/pkg/coordinator/web/api/docs/docs.go b/pkg/coordinator/web/api/docs/docs.go
@@ -1104,6 +1104,9 @@ const docTemplate = `{
                     "type": "object",
                     "additionalProperties": {}
                 },
+                "skip_queue": {
+                    "type": "boolean"
+                },
                 "test_id": {
                     "type": "string"
                 }
@@ -1272,6 +1275,9 @@ const docTemplate = `{
                         "type": "string"
                     }
                 },
+                "skipQueue": {
+                    "type": "boolean"
+                },
                 "startup": {
                     "type": "boolean"
                 }
diff --git a/pkg/coordinator/web/api/docs/swagger.json b/pkg/coordinator/web/api/docs/swagger.json
@@ -1096,6 +1096,9 @@
                     "type": "object",
                     "additionalProperties": {}
                 },
+                "skip_queue": {
+                    "type": "boolean"
+                },
                 "test_id": {
                     "type": "string"
                 }
@@ -1264,6 +1267,9 @@
                         "type": "string"
                     }
                 },
+                "skipQueue": {
+                    "type": "boolean"
+                },
                 "startup": {
                     "type": "boolean"
                 }
diff --git a/pkg/coordinator/web/api/docs/swagger.yaml b/pkg/coordinator/web/api/docs/swagger.yaml
@@ -222,6 +222,8 @@ definitions:
       config:
         additionalProperties: {}
         type: object
+      skip_queue:
+        type: boolean
       test_id:
         type: string
     type: object
@@ -333,6 +335,8 @@ definitions:
         items:
           type: string
         type: array
+      skipQueue:
+        type: boolean
       startup:
         type: boolean
     type: object
diff --git a/pkg/coordinator/web/api/post_test_run_api.go b/pkg/coordinator/web/api/post_test_run_api.go
@@ -13,6 +13,7 @@ type PostTestRunsScheduleRequest struct {
 	TestID         string         `json:"test_id"`
 	Config         map[string]any `json:"config"`
 	AllowDuplicate bool           `json:"allow_duplicate"`
+	SkipQueue      bool           `json:"skip_queue"`
 }
 
 type PostTestRunsScheduleResponse struct {
@@ -77,7 +78,7 @@ func (ah *APIHandler) PostTestRunsSchedule(w http.ResponseWriter, r *http.Reques
 	}
 
 	// create test run
-	testInstance, err := ah.coordinator.ScheduleTest(testDescriptor, req.Config, req.AllowDuplicate)
+	testInstance, err := ah.coordinator.ScheduleTest(testDescriptor, req.Config, req.AllowDuplicate, req.SkipQueue)
 	if err != nil {
 		ah.sendErrorResponse(w, r.URL.String(), fmt.Sprintf("failed creating test: %v", err), http.StatusInternalServerError)
 		return

Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ type Coordinator interface {`
`24`	`24`	`GetTestByRunID(runID uint64) Test`
`25`	`25`	`GetTestQueue() []Test`
`26`	`26`	`GetTestHistory(testID string, firstRunID uint64, offset uint64, limit uint64) ([]Test, int)`
`27`		`- ScheduleTest(descriptor TestDescriptor, configOverrides map[string]any, allowDuplicate bool) (TestRunner, error)`
	`27`	`+ ScheduleTest(descriptor TestDescriptor, configOverrides map[string]any, allowDuplicate bool, skipQueue bool) (TestRunner, error)`
`28`	`28`	`DeleteTestRun(runID uint64) error`
`29`	`29`	`}`
`30`	`30`
Original file line number	Diff line number	Diff line change
`@@ -61,8 +61,9 @@ type ExternalTestConfig struct {`
`61`	`61`	`}`
`62`	`62`
`63`	`63`	`type TestSchedule struct {`
`64`		- Startup bool `yaml:"startup" json:"startup"`
`65`		- Cron []string `yaml:"cron" json:"cron"`
	`64`	+ Startup bool `yaml:"startup" json:"startup"`
	`65`	+ Cron []string `yaml:"cron" json:"cron"`
	`66`	+ SkipQueue bool `yaml:"skipQueue" json:"skipQueue"`
`66`	`67`	`}`
`67`	`68`
`68`	`69`	`type TestDescriptor interface {`
Original file line number	Diff line number	Diff line change
@@ -1104,6 +1104,9 @@ const docTemplate = `{
`1104`	`1104`	`"type": "object",`
`1105`	`1105`	`"additionalProperties": {}`
`1106`	`1106`	`},`
	`1107`	`+ "skip_queue": {`
	`1108`	`+ "type": "boolean"`
	`1109`	`+ },`
`1107`	`1110`	`"test_id": {`
`1108`	`1111`	`"type": "string"`
`1109`	`1112`	`}`
@@ -1272,6 +1275,9 @@ const docTemplate = `{
`1272`	`1275`	`"type": "string"`
`1273`	`1276`	`}`
`1274`	`1277`	`},`
	`1278`	`+ "skipQueue": {`
	`1279`	`+ "type": "boolean"`
	`1280`	`+ },`
`1275`	`1281`	`"startup": {`
`1276`	`1282`	`"type": "boolean"`
`1277`	`1283`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1096,6 +1096,9 @@`
`1096`	`1096`	`"type": "object",`
`1097`	`1097`	`"additionalProperties": {}`
`1098`	`1098`	`},`
	`1099`	`+ "skip_queue": {`
	`1100`	`+ "type": "boolean"`
	`1101`	`+ },`
`1099`	`1102`	`"test_id": {`
`1100`	`1103`	`"type": "string"`
`1101`	`1104`	`}`
`@@ -1264,6 +1267,9 @@`
`1264`	`1267`	`"type": "string"`
`1265`	`1268`	`}`
`1266`	`1269`	`},`
	`1270`	`+ "skipQueue": {`
	`1271`	`+ "type": "boolean"`
	`1272`	`+ },`
`1267`	`1273`	`"startup": {`
`1268`	`1274`	`"type": "boolean"`
`1269`	`1275`	`}`
Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ type PostTestRunsScheduleRequest struct {`
`13`	`13`	TestID string `json:"test_id"`
`14`	`14`	Config map[string]any `json:"config"`
`15`	`15`	AllowDuplicate bool `json:"allow_duplicate"`
	`16`	+ SkipQueue bool `json:"skip_queue"`
`16`	`17`	`}`
`17`	`18`
`18`	`19`	`type PostTestRunsScheduleResponse struct {`
`@@ -77,7 +78,7 @@ func (ah APIHandler) PostTestRunsSchedule(w http.ResponseWriter, r http.Reques`
`77`	`78`	`}`
`78`	`79`
`79`	`80`	`// create test run`
`80`		`- testInstance, err := ah.coordinator.ScheduleTest(testDescriptor, req.Config, req.AllowDuplicate)`
	`81`	`+ testInstance, err := ah.coordinator.ScheduleTest(testDescriptor, req.Config, req.AllowDuplicate, req.SkipQueue)`
`81`	`82`	`if err != nil {`
`82`	`83`	`ah.sendErrorResponse(w, r.URL.String(), fmt.Sprintf("failed creating test: %v", err), http.StatusInternalServerError)`
`83`	`84`	`return`