Skip to content

Commit

Permalink
Add Support for Node Monitoring Agent
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Dec 20, 2024
1 parent c023ab7 commit cb4b833
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 1 deletion.
29 changes: 28 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ func getTags(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.

func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy {
return []cloudprovider.RepairPolicy{
// Supported Kubelet fields
// Supported Kubelet Node Conditions
{
ConditionType: corev1.NodeReady,
ConditionStatus: corev1.ConditionFalse,
Expand All @@ -270,6 +270,33 @@ func (c *CloudProvider) RepairPolicies() []cloudprovider.RepairPolicy {
ConditionStatus: corev1.ConditionUnknown,
TolerationDuration: 30 * time.Minute,
},
// Support Node Monitoring Agent Conditions
//
{
ConditionType: "AcceleratedHardwareReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 10 * time.Minute,
},
{
ConditionType: "StorageReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "NetworkingReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "KernelReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
{
ConditionType: "ContainerRuntimeReady",
ConditionStatus: corev1.ConditionFalse,
TolerationDuration: 30 * time.Minute,
},
}
}

Expand Down
27 changes: 27 additions & 0 deletions test/suites/integration/repair_policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ var _ = Describe("Repair Policy", func() {
env.EventuallyExpectNotFound(pod, node)
env.EventuallyExpectHealthyPodCount(selector, numPods)
},
// Kubelet Supported Conditions
Entry("Node Ready False", corev1.NodeCondition{
Type: corev1.NodeReady,
Status: corev1.ConditionFalse,
Expand All @@ -83,6 +84,32 @@ var _ = Describe("Repair Policy", func() {
Status: corev1.ConditionUnknown,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
// Node Monitoring Agent Supported Conditions
Entry("Node AcceleratedHardwareReady False", corev1.NodeCondition{
Type: "AcceleratedHardwareReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-11 * time.Minute)},
}),
Entry("Node StorageReady False", corev1.NodeCondition{
Type: "StorageReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node NetworkingReady False", corev1.NodeCondition{
Type: "NetworkingReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node KernelReady False", corev1.NodeCondition{
Type: "KernelReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
Entry("Node ContainerRuntimeReady False", corev1.NodeCondition{
Type: "ContainerRuntimeReady",
Status: corev1.ConditionFalse,
LastTransitionTime: metav1.Time{Time: time.Now().Add(-31 * time.Minute)},
}),
)
It("should ignore disruption budgets", func() {
nodePool.Spec.Disruption.Budgets = []karpenterv1.Budget{
Expand Down

0 comments on commit cb4b833

Please sign in to comment.