Skip to content

Commit

Permalink
Merge pull request #13 from smartnews/v1.0.1-patch
Browse files Browse the repository at this point in the history
chore: upgrade karpenter to v1.0.1
  • Loading branch information
Luke-Smartnews authored Sep 24, 2024
2 parents 174aa72 + 169388b commit 992886d
Show file tree
Hide file tree
Showing 10 changed files with 60 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ coverage.html
*.test
*.cpuprofile
*.heapprofile
*.swp
go.work
go.work.sum

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ licenses: download ## Verifies dependency licenses
! go-licenses csv ./... | grep -v -e 'MIT' -e 'Apache-2.0' -e 'BSD-3-Clause' -e 'BSD-2-Clause' -e 'ISC' -e 'MPL-2.0' -e 'github.com/awslabs/amazon-eks-ami/nodeadm'

image: ## Build the Karpenter controller images using ko build
$(eval CONTROLLER_IMG=$(shell $(WITH_GOFLAGS) KOCACHE=$(KOCACHE) KO_DOCKER_REPO="$(KO_DOCKER_REPO)" ko build --bare github.com/aws/karpenter-provider-aws/cmd/controller))
$(eval CONTROLLER_IMG=$(shell $(WITH_GOFLAGS) KOCACHE=$(KOCACHE) KO_DOCKER_REPO="$(KO_DOCKER_REPO)" ko build --platform linux/amd64 --bare github.com/aws/karpenter-provider-aws/cmd/controller))
$(eval IMG_REPOSITORY=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 1))
$(eval IMG_TAG=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 2 -s))
$(eval IMG_DIGEST=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 2))
Expand Down
7 changes: 7 additions & 0 deletions charts/karpenter-crd/templates/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ spec:
- WhenEmpty
- WhenEmptyOrUnderutilized
type: string
utilizationThreshold:
description: |-
UtilizationThreshold is defined as sum of requested resources divided by capacity
below which a node can be considered for disruption.
maximum: 100
minimum: 1
type: integer
required:
- consolidateAfter
type: object
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,5 @@ require (
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
)

replace sigs.k8s.io/karpenter v1.0.1-0.20240912184512-932d95819986 => github.com/smartnews/karpenter v1.0.1-sn-1
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc=
github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU=
github.com/smartnews/karpenter v1.0.1-sn-1 h1:1uJi8d7dxvNrLjLkpVn5CovpGLV/3AwHqH6NzzyRFK0=
github.com/smartnews/karpenter v1.0.1-sn-1/go.mod h1:6ZDf3x14sIbiMPe4KgeI7a39DJKg3c+ZHuE/TF4MqGs=
github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
Expand Down Expand Up @@ -276,8 +278,6 @@ sigs.k8s.io/controller-runtime v0.19.0 h1:nWVM7aq+Il2ABxwiCizrVDSlmDcshi9llbaFbC
sigs.k8s.io/controller-runtime v0.19.0/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v1.0.1-0.20240912184512-932d95819986 h1:i9QLisyAbANPGq07i5HaJ70cjBtZUoK9YGBDaPXCEMA=
sigs.k8s.io/karpenter v1.0.1-0.20240912184512-932d95819986/go.mod h1:bYlfeGdZBtTNmz0qlyi3VJ2n6/10HVv8dSmJ90zvyjs=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ spec:
- WhenEmpty
- WhenEmptyOrUnderutilized
type: string
utilizationThreshold:
description: |-
UtilizationThreshold is defined as sum of requested resources divided by capacity
below which a node can be considered for disruption.
maximum: 100
minimum: 1
type: integer
required:
- consolidateAfter
type: object
Expand Down
2 changes: 1 addition & 1 deletion pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const (
// resources. Cache hits enable faster provisioning and reduced API load on
// AWS APIs, which can have a serious impact on performance and scalability.
// DO NOT CHANGE THIS VALUE WITHOUT DUE CONSIDERATION
DefaultTTL = time.Minute
DefaultTTL = 5 * time.Minute
// UnavailableOfferingsTTL is the time before offerings that were marked as unavailable
// are removed from the cache and are available for launch again
UnavailableOfferingsTTL = 3 * time.Minute
Expand Down
24 changes: 24 additions & 0 deletions pkg/controllers/interruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/awslabs/operatorpkg/singleton"
"go.uber.org/multierr"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
Expand Down Expand Up @@ -198,13 +199,36 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message,
if zone != "" && instanceType != "" {
c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, karpv1.CapacityTypeSpot)
}
spotIntTotal.WithLabelValues(instanceType, zone, nodeClaim.Status.NodeName, nodeClaim.Labels["karpenter.sh/nodepool"]).Inc()
// try to create a new nodeclaim immediately but ignore error if it fails
if err := c.createNodeClaim(ctx, nodeClaim); err != nil {
log.FromContext(ctx).Error(err, "[interruption handling]failed to create a new nodeclaim")
} else {
log.FromContext(ctx).Info("Created new nodeclaim due to spot interruption")
// wait for the node provisioning before draining
time.Sleep(60 * time.Second)
}
}
if action != NoAction {
return c.deleteNodeClaim(ctx, msg, nodeClaim, node)
}
return nil
}

// createNodeClaim creates a new NodeClaim with the same spec of the interrupted one
func (c *Controller) createNodeClaim(ctx context.Context, oldNodeClaim *karpv1.NodeClaim) error {
newNodeClaim := &karpv1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
GenerateName: oldNodeClaim.ObjectMeta.GenerateName,
Annotations: oldNodeClaim.ObjectMeta.Annotations,
Labels: oldNodeClaim.ObjectMeta.Labels,
OwnerReferences: oldNodeClaim.ObjectMeta.OwnerReferences,
},
Spec: oldNodeClaim.Spec,
}
return c.kubeClient.Create(ctx, newNodeClaim)
}

// deleteNodeClaim removes the NodeClaim from the api-server
func (c *Controller) deleteNodeClaim(ctx context.Context, msg messages.Message, nodeClaim *karpv1.NodeClaim, node *corev1.Node) error {
if !nodeClaim.DeletionTimestamp.IsZero() {
Expand Down
15 changes: 14 additions & 1 deletion pkg/controllers/interruption/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ import (
const (
interruptionSubsystem = "interruption"
messageTypeLabel = "message_type"
instanceTypeLabel = "instance_type"
zoneLabel = "zone"
hostLabel = "node_name"
poolLabel = "node_pool"
)

var (
Expand Down Expand Up @@ -53,8 +57,17 @@ var (
Buckets: metrics.DurationBuckets(),
},
)
spotIntTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: interruptionSubsystem,
Name: "spot_int_total",
Help: "Number of the spot interruption. Labeled by AZ, instance type",
},
[]string{instanceTypeLabel, zoneLabel, hostLabel, poolLabel},
)
)

func init() {
crmetrics.Registry.MustRegister(receivedMessages, deletedMessages, messageLatency)
crmetrics.Registry.MustRegister(receivedMessages, deletedMessages, messageLatency, spotIntTotal)
}
1 change: 1 addition & 0 deletions pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.
karpv1.NodePoolLabelKey: nodeClaim.Labels[karpv1.NodePoolLabelKey],
v1.EKSClusterNameTagKey: options.FromContext(ctx).ClusterName,
v1.LabelNodeClass: nodeClass.Name,
"Component": nodeClaim.Labels[karpv1.NodePoolLabelKey], // used for aws explore
}
return lo.Assign(nodeClass.Spec.Tags, staticTags)
}
Expand Down

0 comments on commit 992886d

Please sign in to comment.