From 5fe06a8ea0f3abb61133661af3f884706577c06f Mon Sep 17 00:00:00 2001 From: Jorge Date: Fri, 24 Jan 2025 13:56:22 +0100 Subject: [PATCH] fix(provisioner): avoid overprovisioning capacity for workloads on expired nodes --- pkg/controllers/provisioning/provisioner.go | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pkg/controllers/provisioning/provisioner.go b/pkg/controllers/provisioning/provisioner.go index 83e59b18cb..02a8e511ec 100644 --- a/pkg/controllers/provisioning/provisioner.go +++ b/pkg/controllers/provisioning/provisioner.go @@ -51,9 +51,14 @@ import ( "sigs.k8s.io/karpenter/pkg/scheduling" nodeutils "sigs.k8s.io/karpenter/pkg/utils/node" nodepoolutils "sigs.k8s.io/karpenter/pkg/utils/nodepool" + podutil "sigs.k8s.io/karpenter/pkg/utils/pod" "sigs.k8s.io/karpenter/pkg/utils/pretty" ) +const ( + avgProvisionTimeSeconds = 90 +) + // LaunchOptions are the set of options that can be used to trigger certain // actions and configuration during scheduling type LaunchOptions struct { @@ -319,11 +324,37 @@ func (p *Provisioner) Schedule(ctx context.Context) (scheduler.Results, error) { return scheduler.Results{}, err } + nodesMarkedForDeletion := nodes.Deleting() + + nodesMarkedForDeletion = lo.Filter(nodesMarkedForDeletion, func(n *state.StateNode, _ int) bool { + expirationTimeString, exists := n.NodeClaim.ObjectMeta.Annotations[v1.NodeClaimTerminationTimestampAnnotationKey] + if !exists { + pods, err := n.ReschedulablePods(ctx, p.kubeClient) + if err != nil { + return false + } + + evictablePods := lo.Filter(pods, func(p *corev1.Pod, _ int) bool { return podutil.IsEvictable(p) }) + if len(evictablePods) != len(pods) { + return false + } + + return true + } + + expirationTime, err := time.Parse(time.RFC3339, expirationTimeString) + if err != nil { + return false + } + + return time.Until(expirationTime) <= avgProvisionTimeSeconds*time.Second + }) + // Get pods from nodes that are preparing for deletion // We do this after getting the pending pods so that we undershoot if pods are // actively migrating from a node that is being deleted // NOTE: The assumption is that these nodes are cordoned and no additional pods will schedule to them - deletingNodePods, err := nodes.Deleting().ReschedulablePods(ctx, p.kubeClient) + deletingNodePods, err := nodesMarkedForDeletion.ReschedulablePods(ctx, p.kubeClient) if err != nil { return scheduler.Results{}, err }