Skip to content

Commit

Permalink
Allow karpenter to set arbitrary k8s labels on NodeClaim/Nodes
Browse files Browse the repository at this point in the history
nodeClaim Labels are the source for core karpenter to sync Node Labels in a centralized fashion.
Some bootstrap userdata provider implementations also consume this labels and pass them through kubelet self setting.
That results in a coupling between a centralized and a kubelet self setting approach.
This coupling results in conflicting criteria for validations and degraded UX.
This PR removes the coupling.
As a consequence, Node Labels are not unncessarily restricted for the centralized sync anymore.
This better empowers administrators reducing the reliance on self setting kubelet and minimizing the risk of a Node steering privileged workloads to itself.

A subset of labels, excluding those disallowed by the node restriction admission, is now stored in json within the "karpenter.sh/node-restricted-labels" NodeClaim annotation.
This enables bootstrap userdata providers to continue utilizing the labels if needed.
  • Loading branch information
enxebre committed Jan 14, 2025
1 parent 380bcc9 commit 0d848f7
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 28 deletions.
75 changes: 52 additions & 23 deletions pkg/apis/v1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,22 +58,17 @@ const (
)

var (
// RestrictedLabelDomains are either prohibited by the kubelet or reserved by karpenter
// RestrictedLabelDomains are reserved by karpenter
RestrictedLabelDomains = sets.New(
"kubernetes.io",
"k8s.io",
apis.Group,
)

// LabelDomainExceptions are sub-domains of the RestrictedLabelDomains but allowed because
// they are not used in a context where they may be passed as argument to kubelet.
LabelDomainExceptions = sets.New(
"kops.k8s.io",
v1.LabelNamespaceSuffixNode,
v1.LabelNamespaceNodeRestriction,
K8sLabelDomains = sets.New(
"kubernetes.io",
"k8s.io",
)

// WellKnownLabels are labels that belong to the RestrictedLabelDomains but allowed.
// WellKnownLabels are labels that belong to the RestrictedLabelDomains or K8sLabelDomains but allowed.
// Karpenter is aware of these labels, and they can be used to further narrow down
// the range of the corresponding values by either nodepool or pods.
WellKnownLabels = sets.New(
Expand Down Expand Up @@ -104,38 +99,72 @@ var (
}
)

// IsRestrictedLabel returns an error if the label is restricted.
// IsRestrictedLabel is used for runtime validation of requirements.
// Returns an error if the label is restricted. E.g. using .karpenter.sh suffix.
func IsRestrictedLabel(key string) error {
if WellKnownLabels.Has(key) {
return nil
}
if IsRestrictedNodeLabel(key) {

labelDomain := GetLabelDomain(key)
for restrictedLabelDomain := range RestrictedLabelDomains {
if labelDomain == restrictedLabelDomain || strings.HasSuffix(labelDomain, "."+restrictedLabelDomain) {
return fmt.Errorf("label %s is restricted; specify a well known label: %v, or a custom label that does not use a restricted domain: %v", key, sets.List(WellKnownLabels), sets.List(RestrictedLabelDomains))
}
}

if RestrictedLabels.Has(key) {
return fmt.Errorf("label %s is restricted; specify a well known label: %v, or a custom label that does not use a restricted domain: %v", key, sets.List(WellKnownLabels), sets.List(RestrictedLabelDomains))
}

return nil
}

// IsRestrictedNodeLabel returns true if a node label should not be injected by Karpenter.
// They are either known labels that will be injected by cloud providers,
// or label domain managed by other software (e.g., kops.k8s.io managed by kOps).
func IsRestrictedNodeLabel(key string) bool {
// IsValidLabelToSync returns true if the label key is allowed to be synced to the Node object centrally by Karpenter.
func IsValidToSyncCentrallyLabel(key string) bool {
// TODO(enxebre): consider this to be configurable with runtime flag.
notValidToSyncLabel := WellKnownLabels

return !notValidToSyncLabel.Has(key)
}

// IsKubeletLabel returns true if the label key is one that kubelets are allowed to set on their own Node object.
// This function is similar the one used by the node restriction admission https://github.com/kubernetes/kubernetes/blob/e319c541f144e9bee6160f1dd8671638a9029f4c/staging/src/k8s.io/kubelet/pkg/apis/well_known_labels.go#L67
// but karpenter also restricts the known labels to be passed to kubelet. Only the kubeletLabelNamespaces are allowed.
func IsKubeletLabel(key string) bool {
if WellKnownLabels.Has(key) {
return false
}

if !isKubernetesLabel(key) {
return true
}
labelDomain := GetLabelDomain(key)
for exceptionLabelDomain := range LabelDomainExceptions {
if strings.HasSuffix(labelDomain, exceptionLabelDomain) {
return false

namespace := GetLabelDomain(key)
for allowedNamespace := range kubeletLabelNamespaces {
if namespace == allowedNamespace || strings.HasSuffix(namespace, "."+allowedNamespace) {
return true
}
}
for restrictedLabelDomain := range RestrictedLabelDomains {
if strings.HasSuffix(labelDomain, restrictedLabelDomain) {

return false
}

func isKubernetesLabel(key string) bool {
for k8Domain := range K8sLabelDomains {
if key == k8Domain || strings.HasSuffix(key, "."+k8Domain) {
return true
}
}
return RestrictedLabels.Has(key)

return false
}

var kubeletLabelNamespaces = sets.NewString(
v1.LabelNamespaceSuffixKubelet,
v1.LabelNamespaceSuffixNode,
)

func GetLabelDomain(key string) string {
if parts := strings.SplitN(key, "/", 2); len(parts) == 2 {
return parts[0]
Expand Down
11 changes: 10 additions & 1 deletion pkg/controllers/nodeclaim/lifecycle/launch.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package lifecycle

import (
"context"
"encoding/json"
"errors"
"fmt"

Expand All @@ -28,6 +29,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"sigs.k8s.io/karpenter/pkg/apis"
v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/events"
Expand Down Expand Up @@ -127,7 +129,14 @@ func PopulateNodeClaimDetails(nodeClaim, retrieved *v1.NodeClaim) *v1.NodeClaim
scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).Labels(), // Single-value requirement resolved labels
nodeClaim.Labels, // User-defined labels
)
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, retrieved.Annotations)

kubeletLabels := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...).KubeletLabels()
json, err := json.Marshal(kubeletLabels)
if err != nil {
panic(err)
}
kubeletLabelsAnnotation := map[string]string{apis.Group + "/node-restricted-labels": string(json)}
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, retrieved.Annotations, kubeletLabelsAnnotation)
nodeClaim.Status.ProviderID = retrieved.Status.ProviderID
nodeClaim.Status.ImageID = retrieved.Status.ImageID
nodeClaim.Status.Allocatable = retrieved.Status.Allocatable
Expand Down
24 changes: 20 additions & 4 deletions pkg/scheduling/requirements.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,13 +303,29 @@ func (r Requirements) Intersects(requirements Requirements) (errs error) {
return errs
}

// Labels filters out labels from the requirements that are not allowed to be synced centrally by Karpenter to the Node.
func (r Requirements) Labels() map[string]string {
labels := map[string]string{}
for key, requirement := range r {
if !v1.IsRestrictedNodeLabel(key) {
if value := requirement.Any(); value != "" {
labels[key] = value
}
if !v1.IsValidToSyncCentrallyLabel(key) {
continue
}
if value := requirement.Any(); value != "" {
labels[key] = value
}
}
return labels
}

// KubeletLabels filters out labels from the requirements that will be rejected by the node restriction admission.
func (r Requirements) KubeletLabels() map[string]string {
labels := map[string]string{}
for key, requirement := range r {
if !v1.IsKubeletLabel(key) {
continue
}
if value := requirement.Any(); value != "" {
labels[key] = value
}
}
return labels
Expand Down

0 comments on commit 0d848f7

Please sign in to comment.