Skip to content

Commit

Permalink
feat: Add Support for Node Monitoring Agent (#7545)
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam authored Jan 10, 2025
1 parent b631d9e commit 10202fe
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 4 deletions.
4 changes: 4 additions & 0 deletions charts/karpenter-crd/templates/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- jsonPath: .status.imageID
name: ImageID
priority: 1
type: string
- jsonPath: .status.providerID
name: ID
priority: 1
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ require (
k8s.io/klog/v2 v2.130.1
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738
sigs.k8s.io/controller-runtime v0.19.3
sigs.k8s.io/karpenter v1.1.1
sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19
sigs.k8s.io/yaml v1.4.0
)

Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8b
sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
sigs.k8s.io/karpenter v1.1.1 h1:QPpVC8DsaLgJ/YWcFpZKE4m3jD+Qp88/GtSPvMfffck=
sigs.k8s.io/karpenter v1.1.1/go.mod h1:NQouOJNK6s1d4EIKa5cY7nAV3IG74qZ6gPzHBeCZNPw=
sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19 h1:nCaZE6O7772FEEPGgTef05IanE8AWMKf7DBh1LiU1ik=
sigs.k8s.io/karpenter v1.1.2-0.20241220005608-b3fa6ebffc19/go.mod h1:E1mtCutIoQJA05ClYYQo9y+5ujk6U2FxByauGSUXXZs=
sigs.k8s.io/structured-merge-diff/v4 v4.4.2 h1:MdmvkGuXi/8io6ixD5wud3vOLwc1rj0aNqRlpuvjmwA=
sigs.k8s.io/structured-merge-diff/v4 v4.4.2/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- jsonPath: .status.imageID
name: ImageID
priority: 1
type: string
- jsonPath: .status.providerID
name: ID
priority: 1
Expand Down
29 changes: 28 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.

func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy {
return []cloudprovider.RepairPolicy{
// Supported Kubelet fields
// Supported Kubelet Node Conditions
{
ConditionType: corev1.NodeReady,
ConditionStatus: corev1.ConditionFalse,
Expand All @@ -270,6 +270,33 @@ func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy {
ConditionStatus: corev1.ConditionUnknown,
TolerationDuration: 30 * time.Minute,
},
// Support Node Monitoring Agent Conditions
//
{
ConditionType: "AcceleratedHardwareReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 10 * time.Minute,
},
{
ConditionType: "StorageReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "NetworkingReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "KernelReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "ContainerRuntimeReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
}
}

Expand Down
27 changes: 27 additions & 0 deletions test/suites/integration/repair_policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ var _ = Describe("Repair Policy", func() {
env.EventuallyExpectNotFound(pod, node)
env.EventuallyExpectHealthyPodCount(selector, numPods)
},
// Kubelet Supported Conditions
Entry("Node Ready False", corev1.NodeCondition{
Type: corev1.NodeReady,
Status: corev1.ConditionFalse,
Expand All @@ -83,6 +84,32 @@ var _ = Describe("Repair Policy", func() {
Status: corev1.ConditionUnknown,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
// Node Monitoring Agent Supported Conditions
Entry("Node AcceleratedHardwareReady False", corev1.NodeCondition{
Type: "AcceleratedHardwareReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-11 * time.Minute)},
}),
Entry("Node StorageReady False", corev1.NodeCondition{
Type: "StorageReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node NetworkingReady False", corev1.NodeCondition{
Type: "NetworkingReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node KernelReady False", corev1.NodeCondition{
Type: "KernelReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node ContainerRuntimeReady False", corev1.NodeCondition{
Type: "ContainerRuntimeReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
)
It("should ignore disruption budgets", func() {
nodePool.Spec.Disruption.Budgets = []karpenterv1.Budget{
Expand Down

0 comments on commit 10202fe

Please sign in to comment.