Skip to content

Commit 0356a3a

Browse files
wilfred-scraigcondit
authored andcommitted
[YUNIKORN-2974] Expose preemption and priority settings via REST (apache#995)
Add preemption and priority details to the queue REST information. Currently the configuration is exposed via the properties. The properties contain the raw configuration values which need to be interpreted. Invalid configuration values are ignored and not set on the queue. This could cause an incorrect view of what is or should be active on the queue if based on the properties. Using the active values from the object to show the real state. Expose Partition level preemption flag as part of this change to complement the queue details. Closes: apache#995 Signed-off-by: Craig Condit <[email protected]>
1 parent 7c99e6b commit 0356a3a

File tree

8 files changed

+123
-82
lines changed

8 files changed

+123
-82
lines changed

pkg/scheduler/objects/queue.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,14 @@ func (sq *Queue) GetPartitionQueueDAOInfo(include bool) dao.PartitionQueueDAOInf
695695
queueInfo.IsManaged = sq.isManaged
696696
queueInfo.CurrentPriority = sq.getCurrentPriority()
697697
queueInfo.TemplateInfo = sq.template.GetTemplateInfo()
698-
queueInfo.AbsUsedCapacity = resources.CalculateAbsUsedCapacity(
699-
sq.maxResource, sq.allocatedResource).DAOMap()
698+
queueInfo.AbsUsedCapacity = resources.CalculateAbsUsedCapacity(sq.maxResource, sq.allocatedResource).DAOMap()
699+
queueInfo.SortingPolicy = sq.sortType.String()
700+
queueInfo.PrioritySorting = sq.prioritySortEnabled
701+
queueInfo.PreemptionEnabled = sq.preemptionPolicy != policies.DisabledPreemptionPolicy
702+
queueInfo.IsPreemptionFence = sq.preemptionPolicy == policies.FencePreemptionPolicy
703+
queueInfo.PreemptionDelay = sq.preemptionDelay.String()
704+
queueInfo.IsPriorityFence = sq.priorityPolicy == policies.FencePriorityPolicy
705+
queueInfo.PriorityOffset = sq.priorityOffset
700706
queueInfo.Properties = make(map[string]string)
701707
for k, v := range sq.properties {
702708
queueInfo.Properties[k] = v

pkg/scheduler/objects/queue_test.go

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,37 +1657,71 @@ func TestGetPartitionQueueDAOInfo(t *testing.T) {
16571657
},
16581658
})
16591659
assert.NilError(t, err)
1660-
assert.Equal(t, root.template.GetMaxApplications(), root.GetPartitionQueueDAOInfo(true).TemplateInfo.MaxApplications)
1661-
assert.DeepEqual(t, root.template.GetProperties(), root.GetPartitionQueueDAOInfo(true).TemplateInfo.Properties)
1662-
assert.DeepEqual(t, root.template.GetMaxResource().DAOMap(), root.GetPartitionQueueDAOInfo(true).TemplateInfo.MaxResource)
1663-
assert.DeepEqual(t, root.template.GetGuaranteedResource().DAOMap(), root.GetPartitionQueueDAOInfo(true).TemplateInfo.GuaranteedResource)
1660+
rootDAO := root.GetPartitionQueueDAOInfo(true)
1661+
assert.Equal(t, root.template.GetMaxApplications(), rootDAO.TemplateInfo.MaxApplications)
1662+
assert.DeepEqual(t, root.template.GetProperties(), rootDAO.TemplateInfo.Properties)
1663+
assert.DeepEqual(t, root.template.GetMaxResource().DAOMap(), rootDAO.TemplateInfo.MaxResource)
1664+
assert.DeepEqual(t, root.template.GetGuaranteedResource().DAOMap(), rootDAO.TemplateInfo.GuaranteedResource)
16641665

16651666
// test resources
16661667
root.maxResource = getResource(t)
16671668
root.guaranteedResource = getResource(t)
1668-
assert.DeepEqual(t, root.GetMaxResource().DAOMap(), root.GetPartitionQueueDAOInfo(true).MaxResource)
1669-
assert.DeepEqual(t, root.GetGuaranteedResource().DAOMap(), root.GetPartitionQueueDAOInfo(true).GuaranteedResource)
1670-
assert.DeepEqual(t, root.getHeadRoom().DAOMap(), root.GetPartitionQueueDAOInfo(true).HeadRoom)
1669+
rootDAO = root.GetPartitionQueueDAOInfo(true)
1670+
assert.DeepEqual(t, root.GetMaxResource().DAOMap(), rootDAO.MaxResource)
1671+
assert.DeepEqual(t, root.GetGuaranteedResource().DAOMap(), rootDAO.GuaranteedResource)
1672+
assert.DeepEqual(t, root.getHeadRoom().DAOMap(), rootDAO.HeadRoom)
16711673

16721674
// test allocatingAcceptedApps
16731675
root.allocatingAcceptedApps = getAllocatingAcceptedApps()
1676+
rootDAO = root.GetPartitionQueueDAOInfo(true)
16741677
assert.Equal(t, len(root.allocatingAcceptedApps), 2, "allocatingAcceptedApps size")
1675-
assert.Equal(t, len(root.GetPartitionQueueDAOInfo(true).AllocatingAcceptedApps), 1, "AllocatingAcceptedApps size")
1676-
assert.Equal(t, root.GetPartitionQueueDAOInfo(true).AllocatingAcceptedApps[0], appID1)
1678+
assert.Equal(t, len(rootDAO.AllocatingAcceptedApps), 1, "AllocatingAcceptedApps size")
1679+
assert.Equal(t, rootDAO.AllocatingAcceptedApps[0], appID1)
16771680

16781681
// Test specific queue
1679-
_, err = createManagedQueue(root, "leaf-queue", false, nil)
1682+
var leaf *Queue
1683+
leaf, err = createManagedQueue(root, "leaf-queue", false, nil)
16801684
assert.NilError(t, err, "failed to create managed queue")
1681-
assert.Equal(t, root.GetPartitionQueueDAOInfo(false).QueueName, "root")
1682-
assert.Equal(t, len(root.GetPartitionQueueDAOInfo(false).Children), 0)
1683-
assert.Equal(t, len(root.GetPartitionQueueDAOInfo(false).ChildNames), 1)
1684-
assert.Equal(t, root.GetPartitionQueueDAOInfo(false).ChildNames[0], "root.leaf-queue")
1685+
rootDAO = root.GetPartitionQueueDAOInfo(false)
1686+
assert.Equal(t, rootDAO.QueueName, "root")
1687+
assert.Equal(t, len(rootDAO.Children), 0)
1688+
assert.Equal(t, len(rootDAO.ChildNames), 1)
1689+
assert.Equal(t, rootDAO.ChildNames[0], "root.leaf-queue")
16851690
// Test hierarchy queue
1686-
assert.Equal(t, root.GetPartitionQueueDAOInfo(true).QueueName, "root")
1687-
assert.Equal(t, len(root.GetPartitionQueueDAOInfo(true).Children), 1)
1688-
assert.Equal(t, len(root.GetPartitionQueueDAOInfo(true).ChildNames), 1)
1689-
assert.Equal(t, root.GetPartitionQueueDAOInfo(true).Children[0].QueueName, "root.leaf-queue")
1690-
assert.Equal(t, root.GetPartitionQueueDAOInfo(true).ChildNames[0], "root.leaf-queue")
1691+
rootDAO = root.GetPartitionQueueDAOInfo(true)
1692+
assert.Equal(t, rootDAO.QueueName, "root")
1693+
assert.Equal(t, len(rootDAO.Children), 1)
1694+
assert.Equal(t, len(rootDAO.ChildNames), 1)
1695+
assert.Equal(t, rootDAO.Children[0].QueueName, "root.leaf-queue")
1696+
assert.Equal(t, rootDAO.ChildNames[0], "root.leaf-queue")
1697+
// special prop checks
1698+
leaf.properties = map[string]string{
1699+
configs.ApplicationSortPolicy: policies.FairSortPolicy.String(),
1700+
configs.PreemptionDelay: "3600s",
1701+
configs.PreemptionPolicy: policies.FencePreemptionPolicy.String(),
1702+
}
1703+
leaf.UpdateQueueProperties()
1704+
leafDAO := leaf.GetPartitionQueueDAOInfo(false)
1705+
assert.Equal(t, leafDAO.QueueName, "root.leaf-queue")
1706+
assert.Equal(t, len(leafDAO.Children), 0, "leaf has no children")
1707+
assert.Equal(t, len(leafDAO.ChildNames), 0, "leaf has no children (names)")
1708+
assert.Equal(t, leafDAO.PreemptionEnabled, true, "preemption should be enabled")
1709+
assert.Equal(t, leafDAO.IsPreemptionFence, true, "fence should have been set")
1710+
assert.Equal(t, leafDAO.PreemptionDelay, "1h0m0s", "incorrect delay returned")
1711+
assert.Equal(t, leafDAO.SortingPolicy, "fair", "incorrect policy returned")
1712+
1713+
// special prop checks
1714+
leaf.properties = map[string]string{
1715+
configs.ApplicationSortPolicy: policies.FifoSortPolicy.String(),
1716+
configs.PreemptionDelay: "10s",
1717+
configs.PreemptionPolicy: policies.DisabledPreemptionPolicy.String(),
1718+
}
1719+
leaf.UpdateQueueProperties()
1720+
leafDAO = leaf.GetPartitionQueueDAOInfo(false)
1721+
assert.Equal(t, leafDAO.PreemptionEnabled, false, "preemption should not be enabled")
1722+
assert.Equal(t, leafDAO.IsPreemptionFence, false, "queue should not be a fence")
1723+
assert.Equal(t, leafDAO.PreemptionDelay, "10s", "incorrect delay returned")
1724+
assert.Equal(t, leafDAO.SortingPolicy, "fifo", "incorrect policy returned")
16911725
}
16921726

16931727
func getAllocatingAcceptedApps() map[string]bool {

pkg/scheduler/partition.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ func (pc *PartitionContext) tryAllocate() *objects.AllocationResult {
807807
return nil
808808
}
809809
// try allocating from the root down
810-
result := pc.root.TryAllocate(pc.GetNodeIterator, pc.GetFullNodeIterator, pc.GetNode, pc.isPreemptionEnabled())
810+
result := pc.root.TryAllocate(pc.GetNodeIterator, pc.GetFullNodeIterator, pc.GetNode, pc.IsPreemptionEnabled())
811811
if result != nil {
812812
return pc.allocate(result)
813813
}
@@ -1609,7 +1609,7 @@ func (pc *PartitionContext) GetNodeSortingResourceWeights() map[string]float64 {
16091609
return policy.ResourceWeights()
16101610
}
16111611

1612-
func (pc *PartitionContext) isPreemptionEnabled() bool {
1612+
func (pc *PartitionContext) IsPreemptionEnabled() bool {
16131613
pc.RLock()
16141614
defer pc.RUnlock()
16151615
return pc.preemptionEnabled

pkg/scheduler/partition_test.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3778,29 +3778,27 @@ func TestUpdatePreemption(t *testing.T) {
37783778

37793779
partition, err := newBasePartition()
37803780
assert.NilError(t, err, "Partition creation failed")
3781-
assert.Assert(t, partition.isPreemptionEnabled(), "preeemption should be enabled by default")
3781+
assert.Assert(t, partition.IsPreemptionEnabled(), "preeemption should be enabled by default")
37823782

37833783
partition.updatePreemption(configs.PartitionConfig{})
3784-
assert.Assert(t, partition.isPreemptionEnabled(), "preeemption should be enabled by empty config")
3784+
assert.Assert(t, partition.IsPreemptionEnabled(), "preeemption should be enabled by empty config")
37853785

37863786
partition.updatePreemption(configs.PartitionConfig{Preemption: configs.PartitionPreemptionConfig{}})
3787-
assert.Assert(t, partition.isPreemptionEnabled(), "preeemption should be enabled by empty preemption section")
3787+
assert.Assert(t, partition.IsPreemptionEnabled(), "preeemption should be enabled by empty preemption section")
37883788

37893789
partition.updatePreemption(configs.PartitionConfig{Preemption: configs.PartitionPreemptionConfig{Enabled: nil}})
3790-
assert.Assert(t, partition.isPreemptionEnabled(), "preeemption should be enabled by explicit nil")
3790+
assert.Assert(t, partition.IsPreemptionEnabled(), "preeemption should be enabled by explicit nil")
37913791

37923792
partition.updatePreemption(configs.PartitionConfig{Preemption: configs.PartitionPreemptionConfig{Enabled: &True}})
3793-
assert.Assert(t, partition.isPreemptionEnabled(), "preeemption should be enabled by explicit true")
3793+
assert.Assert(t, partition.IsPreemptionEnabled(), "preeemption should be enabled by explicit true")
37943794

37953795
partition.updatePreemption(configs.PartitionConfig{Preemption: configs.PartitionPreemptionConfig{Enabled: &False}})
3796-
assert.Assert(t, !partition.isPreemptionEnabled(), "preeemption should be disabled by explicit false")
3796+
assert.Assert(t, !partition.IsPreemptionEnabled(), "preeemption should be disabled by explicit false")
37973797
}
37983798

37993799
func TestUpdateNodeSortingPolicy(t *testing.T) {
38003800
partition, err := newBasePartition()
3801-
if err != nil {
3802-
t.Errorf("Partition creation failed: %s", err.Error())
3803-
}
3801+
assert.NilError(t, err, "Partition creation failed unexpectedly")
38043802

38053803
if partition.nodes.GetNodeSortingPolicy().PolicyType().String() != policies.FairnessPolicy.String() {
38063804
t.Error("Node policy is not set with the default policy which is fair policy.")

pkg/webservice/dao/partition_info.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ type PartitionInfo struct {
2323
Name string `json:"name"` // no omitempty, name should not be empty
2424
Capacity PartitionCapacity `json:"capacity"` // no omitempty, omitempty doesn't work on a structure value
2525
NodeSortingPolicy NodeSortingPolicy `json:"nodeSortingPolicy"` // no omitempty, omitempty doesn't work on a structure value
26+
PreemptionEnabled bool `json:"preemptionEnabled"` // no omitempty, false shows preemption status better
2627
TotalNodes int `json:"totalNodes,omitempty"`
2728
Applications map[string]int `json:"applications,omitempty"`
2829
TotalContainers int `json:"totalContainers,omitempty"`

pkg/webservice/dao/queue_info.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1515
See the License for the specific language governing permissions and
1616
limitations under the License.
1717
*/
18+
1819
package dao
1920

2021
type TemplateInfo struct {
@@ -27,7 +28,7 @@ type TemplateInfo struct {
2728
type PartitionQueueDAOInfo struct {
2829
QueueName string `json:"queuename"` // no omitempty, queue name should not be empty
2930
Status string `json:"status,omitempty"`
30-
Partition string `json:"partition"` // no omitempty, queue name should not be empty
31+
Partition string `json:"partition"` // no omitempty, partition name should not be empty
3132
PendingResource map[string]int64 `json:"pendingResource,omitempty"`
3233
MaxResource map[string]int64 `json:"maxResource,omitempty"`
3334
GuaranteedResource map[string]int64 `json:"guaranteedResource,omitempty"`
@@ -46,4 +47,11 @@ type PartitionQueueDAOInfo struct {
4647
RunningApps uint64 `json:"runningApps,omitempty"`
4748
CurrentPriority int32 `json:"currentPriority"` // no omitempty, as the current priority value may be 0, which is a valid priority level
4849
AllocatingAcceptedApps []string `json:"allocatingAcceptedApps,omitempty"`
50+
SortingPolicy string `json:"sortingPolicy,omitempty"`
51+
PrioritySorting bool `json:"prioritySorting"` // no omitempty, false shows priority sorting status better
52+
PreemptionEnabled bool `json:"preemptionEnabled"` // no omitempty, false shows preemption status better
53+
IsPreemptionFence bool `json:"isPreemptionFence"` // no omitempty, a false value gives a quick way to understand whether it's fenced.
54+
PreemptionDelay string `json:"preemptionDelay,omitempty"`
55+
IsPriorityFence bool `json:"isPriorityFence"` // no omitempty, a false value gives a quick way to understand whether it's fenced.
56+
PriorityOffset int32 `json:"priorityOffset,omitempty"`
4957
}

pkg/webservice/handlers.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,7 @@ func getPartitionInfoDAO(lists map[string]*scheduler.PartitionContext) []*dao.Pa
10101010
partitionInfo.Name = common.GetPartitionNameWithoutClusterID(partitionContext.Name)
10111011
partitionInfo.State = partitionContext.GetCurrentState()
10121012
partitionInfo.LastStateTransitionTime = partitionContext.GetStateTime().UnixNano()
1013+
partitionInfo.PreemptionEnabled = partitionContext.IsPreemptionEnabled()
10131014

10141015
capacityInfo := dao.PartitionCapacity{}
10151016
capacity := partitionContext.GetTotalPartitionResource()

pkg/webservice/handlers_test.go

Lines changed: 42 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -116,71 +116,62 @@ partitions:
116116

117117
const configMultiPartitions = `
118118
partitions:
119-
-
120-
name: gpu
119+
- name: gpu
120+
preemption:
121+
enabled: false
121122
queues:
122-
-
123-
name: root
124-
-
125-
name: default
123+
- name: root
124+
- name: default
126125
nodesortpolicy:
127126
type: fair
128127
queues:
129-
-
130-
name: root
131-
queues:
132-
-
133-
name: default
134-
submitacl: "*"
128+
- name: root
129+
queues:
130+
- name: default
131+
submitacl: "*"
135132
`
136133

137134
const configTwoLevelQueues = `
138135
partitions:
139-
-
140-
name: gpu
136+
- name: gpu
141137
queues:
142-
-
143-
name: root
144-
-
145-
name: default
138+
- name: root
139+
- name: default
146140
nodesortpolicy:
147141
type: binpacking
148142
queues:
149-
-
150-
name: root
143+
- name: root
144+
properties:
145+
application.sort.policy: fifo
146+
childtemplate:
147+
maxapplications: 10
151148
properties:
152149
application.sort.policy: fifo
153-
childtemplate:
154-
maxapplications: 10
150+
resources:
151+
guaranteed:
152+
memory: 400000
153+
max:
154+
memory: 600000
155+
queues:
156+
- name: a
157+
queues:
158+
- name: a1
155159
properties:
156160
application.sort.policy: fifo
157-
resources:
158-
guaranteed:
159-
memory: 400000
160-
max:
161-
memory: 600000
162-
queues:
163-
-
164-
name: a
165-
queues:
166-
-
167-
name: a1
168-
properties:
169-
application.sort.policy: fifo
170-
resources:
171-
guaranteed:
172-
memory: 500000
173-
vcore: 50000
174-
max:
175-
memory: 800000
176-
vcore: 80000
177-
resources:
178-
guaranteed:
179-
memory: 500000
180-
vcore: 50000
181-
max:
182-
memory: 800000
183-
vcore: 80000
161+
resources:
162+
guaranteed:
163+
memory: 500000
164+
vcore: 50000
165+
max:
166+
memory: 800000
167+
vcore: 80000
168+
resources:
169+
guaranteed:
170+
memory: 500000
171+
vcore: 50000
172+
max:
173+
memory: 800000
174+
vcore: 80000
184175
`
185176

186177
const userGroupLimitsConfig = `
@@ -1092,6 +1083,7 @@ func TestPartitions(t *testing.T) { //nolint:funlen
10921083
assert.DeepEqual(t, cs["default"].Capacity.UsedCapacity, map[string]int64{"memory": 300, "vcore": 700})
10931084
assert.DeepEqual(t, cs["default"].Capacity.Utilization, map[string]int64{"memory": 30, "vcore": 70})
10941085
assert.Equal(t, cs["default"].State, "Active")
1086+
assert.Assert(t, cs["default"].PreemptionEnabled, "preemption should be enabled on default")
10951087

10961088
assert.Assert(t, cs["gpu"] != nil)
10971089
assert.Equal(t, cs["gpu"].ClusterID, "rm-123")
@@ -1100,6 +1092,7 @@ func TestPartitions(t *testing.T) { //nolint:funlen
11001092
assert.Equal(t, cs["default"].NodeSortingPolicy.ResourceWeights["vcore"], 1.0)
11011093
assert.Equal(t, cs["default"].NodeSortingPolicy.ResourceWeights["memory"], 1.0)
11021094
assert.Equal(t, cs["gpu"].Applications["total"], 0)
1095+
assert.Assert(t, !cs["gpu"].PreemptionEnabled, "preemption should be disabled on gpu")
11031096
}
11041097

11051098
func TestMetricsNotEmpty(t *testing.T) {

0 commit comments

Comments
 (0)