From 8814d07b7b8d5ac33c031c4e5f983434671ba5f8 Mon Sep 17 00:00:00 2001 From: ruimingxie Date: Tue, 19 Dec 2023 15:06:36 +0800 Subject: [PATCH] should filter nodes that do not have enough cpu when cross numa nodes --- pkg/plugins/noderesourcetopology/filter.go | 5 ++++- .../noderesourcetopology/filter_test.go | 21 +++++++++++++++++-- pkg/plugins/noderesourcetopology/helper.go | 12 ++++++++--- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/pkg/plugins/noderesourcetopology/filter.go b/pkg/plugins/noderesourcetopology/filter.go index 38cab25..4a4bf89 100644 --- a/pkg/plugins/noderesourcetopology/filter.go +++ b/pkg/plugins/noderesourcetopology/filter.go @@ -76,7 +76,10 @@ func (tm *TopologyMatch) Filter( return status } } - assignTopologyResult(nw, s.targetContainerResource.Clone()) + + if status := assignTopologyResult(nw, s.targetContainerResource.Clone()); status != nil { + return status + } s.Lock() defer s.Unlock() diff --git a/pkg/plugins/noderesourcetopology/filter_test.go b/pkg/plugins/noderesourcetopology/filter_test.go index 38118a1..8facca8 100644 --- a/pkg/plugins/noderesourcetopology/filter_test.go +++ b/pkg/plugins/noderesourcetopology/filter_test.go @@ -294,7 +294,7 @@ func TestTopologyMatch_Filter(t *testing.T) { want: framework.NewStatus(framework.Unschedulable, ErrReasonNUMAResourceNotEnough), }, { - name: "enough cpu resource in node with default none topology manager policy", + name: "not enough cpu resource in node with default none topology manager policy", args: args{ pod: newResourcePod(false, nil, framework.Resource{MilliCPU: 2 * CPUTestUnit, Memory: MemTestUnit}), nodeInfo: framework.NewNodeInfo( @@ -310,7 +310,7 @@ func TestTopologyMatch_Filter(t *testing.T) { }(), topologyAwareResources: sets.NewString(string(corev1.ResourceCPU)), }, - want: nil, + want: framework.NewStatus(framework.Unschedulable, ErrReasonNUMAResourceNotEnough), }, { name: "no enough cpu resource in one NUMA node with default single numa topology manager policy", @@ -357,6 +357,23 @@ func TestTopologyMatch_Filter(t *testing.T) { }, want: framework.NewStatus(framework.Unschedulable, ErrReasonNUMAResourceNotEnough), }, + { + name: "no enough cpu resource in one NUMA node, but enough with cross numa pods", + args: args{ + pod: newResourcePod(false, nil, framework.Resource{MilliCPU: 4 * CPUTestUnit}), + nodeInfo: framework.NewNodeInfo( + newResourcePod(true, newZoneList([]zone{{name: "node1", cpu: 1 * CPUTestUnit}}), + framework.Resource{MilliCPU: 1 * CPUTestUnit, Memory: 2 * MemTestUnit}), + ), + nrt: func() *topologyv1alpha1.NodeResourceTopology { + nrtCopy := nrt.DeepCopy() + nrtCopy.CraneManagerPolicy.TopologyManagerPolicy = topologyv1alpha1.TopologyManagerPolicyNone + return nrtCopy + }(), + topologyAwareResources: sets.NewString(string(corev1.ResourceCPU)), + }, + want: nil, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/plugins/noderesourcetopology/helper.go b/pkg/plugins/noderesourcetopology/helper.go index 9c94cf4..5418b09 100644 --- a/pkg/plugins/noderesourcetopology/helper.go +++ b/pkg/plugins/noderesourcetopology/helper.go @@ -170,7 +170,7 @@ func (nw *nodeWrapper) addNUMAResources(numaNodeResult topologyv1alpha1.ZoneList } } -func assignTopologyResult(nw *nodeWrapper, request *framework.Resource) { +func assignTopologyResult(nw *nodeWrapper, request *framework.Resource) *framework.Status { // sort by free CPU resource sort.Slice(nw.numaNodes, func(i, j int) bool { nodeI, nodeJ := nw.numaNodes[i], nw.numaNodes[j] @@ -187,10 +187,10 @@ func assignTopologyResult(nw *nodeWrapper, request *framework.Resource) { }, }, } - return + return nil } - for _, node := range nw.numaNodes { + for i, node := range nw.numaNodes { node.allocatable.MilliCPU = node.allocatable.MilliCPU / 1000 * 1000 res, finished := assignRequestForNUMANode(request, node) if capacity := ResourceListIgnoreZeroResources(res); len(capacity) != 0 { @@ -205,10 +205,16 @@ func assignTopologyResult(nw *nodeWrapper, request *framework.Resource) { if finished { break } + // if we reach the last numa node and still not finished, + // then we are done for this node. + if i == len(nw.numaNodes)-1 { + return framework.NewStatus(framework.Unschedulable, ErrReasonNUMAResourceNotEnough) + } } sort.Slice(nw.result, func(i, j int) bool { return nw.result[i].Name < nw.result[j].Name }) + return nil } func computeContainerSpecifiedResourceRequest(pod *corev1.Pod, indices []int, names sets.String) *framework.Resource {