Skip to content

Commit

Permalink
fix(provisioner): avoid overprovisioning capacity for workloads on ex…
Browse files Browse the repository at this point in the history
…pired nodes
  • Loading branch information
Jorge authored and jorgeperezc committed Jan 24, 2025
1 parent af00eb4 commit 5fe06a8
Showing 1 changed file with 32 additions and 1 deletion.
33 changes: 32 additions & 1 deletion pkg/controllers/provisioning/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,14 @@ import (
"sigs.k8s.io/karpenter/pkg/scheduling"
nodeutils "sigs.k8s.io/karpenter/pkg/utils/node"
nodepoolutils "sigs.k8s.io/karpenter/pkg/utils/nodepool"
podutil "sigs.k8s.io/karpenter/pkg/utils/pod"
"sigs.k8s.io/karpenter/pkg/utils/pretty"
)

const (
avgProvisionTimeSeconds = 90
)

// LaunchOptions are the set of options that can be used to trigger certain
// actions and configuration during scheduling
type LaunchOptions struct {
Expand Down Expand Up @@ -319,11 +324,37 @@ func (p *Provisioner) Schedule(ctx context.Context) (scheduler.Results, error) {
return scheduler.Results{}, err
}

nodesMarkedForDeletion := nodes.Deleting()

nodesMarkedForDeletion = lo.Filter(nodesMarkedForDeletion, func(n *state.StateNode, _ int) bool {
expirationTimeString, exists := n.NodeClaim.ObjectMeta.Annotations[v1.NodeClaimTerminationTimestampAnnotationKey]
if !exists {
pods, err := n.ReschedulablePods(ctx, p.kubeClient)
if err != nil {
return false
}

evictablePods := lo.Filter(pods, func(p *corev1.Pod, _ int) bool { return podutil.IsEvictable(p) })
if len(evictablePods) != len(pods) {

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.25.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.26.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.27.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.28.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.29.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.30.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)

Check failure on line 338 in pkg/controllers/provisioning/provisioner.go

View workflow job for this annotation

GitHub Actions / presubmit (1.31.x)

S1008: should use 'return len(evictablePods) == len(pods)' instead of 'if len(evictablePods) != len(pods) { return false }; return true' (gosimple)
return false
}

return true
}

expirationTime, err := time.Parse(time.RFC3339, expirationTimeString)
if err != nil {
return false
}

return time.Until(expirationTime) <= avgProvisionTimeSeconds*time.Second
})

// Get pods from nodes that are preparing for deletion
// We do this after getting the pending pods so that we undershoot if pods are
// actively migrating from a node that is being deleted
// NOTE: The assumption is that these nodes are cordoned and no additional pods will schedule to them
deletingNodePods, err := nodes.Deleting().ReschedulablePods(ctx, p.kubeClient)
deletingNodePods, err := nodesMarkedForDeletion.ReschedulablePods(ctx, p.kubeClient)
if err != nil {
return scheduler.Results{}, err
}
Expand Down

0 comments on commit 5fe06a8

Please sign in to comment.