diff --git a/pkg/controllers/disruption/consolidation_test.go b/pkg/controllers/disruption/consolidation_test.go index d69554f1db..9506873470 100644 --- a/pkg/controllers/disruption/consolidation_test.go +++ b/pkg/controllers/disruption/consolidation_test.go @@ -112,7 +112,7 @@ var _ = Describe("Consolidation", func() { ExpectSingletonReconciled(ctx, disruptionController) wg.Wait() - Expect(recorder.Calls("Unconsolidatable")).To(Equal(0)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(0)) }) It("should fire an event for ConsolidationDisabled when the NodePool has consolidation set to WhenEmpty", func() { pod := test.Pod() @@ -123,7 +123,7 @@ var _ = Describe("Consolidation", func() { ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) ExpectSingletonReconciled(ctx, disruptionController) - Expect(recorder.Calls("Unconsolidatable")).To(Equal(4)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(4)) }) It("should fire an event for ConsolidationDisabled when the NodePool has consolidateAfter set to 'Never'", func() { pod := test.Pod() @@ -135,7 +135,7 @@ var _ = Describe("Consolidation", func() { ExpectSingletonReconciled(ctx, disruptionController) // We get six calls here because we have Nodes and NodeClaims that fired for this event // and each of the consolidation mechanisms specifies that this event should be fired - Expect(recorder.Calls("Unconsolidatable")).To(Equal(6)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(6)) }) It("should fire an event when a candidate does not have a resolvable instance type", func() { pod := test.Pod() @@ -148,7 +148,7 @@ var _ = Describe("Consolidation", func() { ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) ExpectSingletonReconciled(ctx, disruptionController) // We get four calls since we only care about this since we don't emit for empty node consolidation - Expect(recorder.Calls("Unconsolidatable")).To(Equal(4)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(4)) }) It("should fire an event when a candidate does not have the capacity type label", func() { pod := test.Pod() @@ -161,7 +161,7 @@ var _ = Describe("Consolidation", func() { ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) ExpectSingletonReconciled(ctx, disruptionController) // We get four calls since we only care about this since we don't emit for empty node consolidation - Expect(recorder.Calls("Unconsolidatable")).To(Equal(4)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(4)) }) It("should fire an event when a candidate does not have the zone label", func() { pod := test.Pod() @@ -174,7 +174,7 @@ var _ = Describe("Consolidation", func() { ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*corev1.Node{node}, []*v1.NodeClaim{nodeClaim}) ExpectSingletonReconciled(ctx, disruptionController) // We get four calls since we only care about this since we don't emit for empty node consolidation - Expect(recorder.Calls("Unconsolidatable")).To(Equal(4)) + Expect(recorder.Calls(events.Unconsolidatable)).To(Equal(4)) }) }) Context("Metrics", func() { diff --git a/pkg/controllers/disruption/events/events.go b/pkg/controllers/disruption/events/events.go index e40eec8b4a..263d4c326c 100644 --- a/pkg/controllers/disruption/events/events.go +++ b/pkg/controllers/disruption/events/events.go @@ -32,7 +32,7 @@ func Launching(nodeClaim *v1.NodeClaim, reason string) events.Event { return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, - Reason: "DisruptionLaunching", + Reason: events.DisruptionLaunching, Message: fmt.Sprintf("Launching NodeClaim: %s", cases.Title(language.Und, cases.NoLower).String(reason)), DedupeValues: []string{string(nodeClaim.UID), reason}, } @@ -42,7 +42,7 @@ func WaitingOnReadiness(nodeClaim *v1.NodeClaim) events.Event { return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, - Reason: "DisruptionWaitingReadiness", + Reason: events.DisruptionWaitingReadiness, Message: "Waiting on readiness to continue disruption", DedupeValues: []string{string(nodeClaim.UID)}, } @@ -53,14 +53,14 @@ func Terminating(node *corev1.Node, nodeClaim *v1.NodeClaim, reason string) []ev { InvolvedObject: node, Type: corev1.EventTypeNormal, - Reason: "DisruptionTerminating", + Reason: events.DisruptionTerminating, Message: fmt.Sprintf("Disrupting Node: %s", cases.Title(language.Und, cases.NoLower).String(reason)), DedupeValues: []string{string(node.UID), reason}, }, { InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, - Reason: "DisruptionTerminating", + Reason: events.DisruptionTerminating, Message: fmt.Sprintf("Disrupting NodeClaim: %s", cases.Title(language.Und, cases.NoLower).String(reason)), DedupeValues: []string{string(nodeClaim.UID), reason}, }, @@ -74,7 +74,7 @@ func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) [] { InvolvedObject: node, Type: corev1.EventTypeNormal, - Reason: "Unconsolidatable", + Reason: events.Unconsolidatable, Message: msg, DedupeValues: []string{string(node.UID)}, DedupeTimeout: time.Minute * 15, @@ -82,7 +82,7 @@ func Unconsolidatable(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) [] { InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, - Reason: "Unconsolidatable", + Reason: events.Unconsolidatable, Message: msg, DedupeValues: []string{string(nodeClaim.UID)}, DedupeTimeout: time.Minute * 15, @@ -97,7 +97,7 @@ func Blocked(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) (evs []even evs = append(evs, events.Event{ InvolvedObject: node, Type: corev1.EventTypeNormal, - Reason: "DisruptionBlocked", + Reason: events.DisruptionBlocked, Message: msg, DedupeValues: []string{string(node.UID)}, }) @@ -106,7 +106,7 @@ func Blocked(node *corev1.Node, nodeClaim *v1.NodeClaim, msg string) (evs []even evs = append(evs, events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeNormal, - Reason: "DisruptionBlocked", + Reason: events.DisruptionBlocked, Message: msg, DedupeValues: []string{string(nodeClaim.UID)}, }) @@ -118,7 +118,7 @@ func NodePoolBlockedForDisruptionReason(nodePool *v1.NodePool, reason v1.Disrupt return events.Event{ InvolvedObject: nodePool, Type: corev1.EventTypeNormal, - Reason: "DisruptionBlocked", + Reason: events.DisruptionBlocked, Message: fmt.Sprintf("No allowed disruptions for disruption reason %s due to blocking budget", reason), DedupeValues: []string{string(nodePool.UID), string(reason)}, DedupeTimeout: 1 * time.Minute, @@ -129,7 +129,7 @@ func NodePoolBlocked(nodePool *v1.NodePool) events.Event { return events.Event{ InvolvedObject: nodePool, Type: corev1.EventTypeNormal, - Reason: "DisruptionBlocked", + Reason: events.DisruptionBlocked, Message: "No allowed disruptions due to blocking budget", DedupeValues: []string{string(nodePool.UID)}, // Set a small timeout as a NodePool's disruption budget can change every minute. diff --git a/pkg/controllers/node/health/events.go b/pkg/controllers/node/health/events.go index 8dfddaf7ca..2e45a40475 100644 --- a/pkg/controllers/node/health/events.go +++ b/pkg/controllers/node/health/events.go @@ -30,7 +30,7 @@ func NodeRepairBlocked(node *corev1.Node, nodeClaim *v1.NodeClaim, nodePool *v1. { InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "NodeRepairBlocked", + Reason: events.NodeRepairBlocked, Message: reason, DedupeValues: []string{string(node.UID)}, DedupeTimeout: time.Minute * 15, @@ -38,7 +38,7 @@ func NodeRepairBlocked(node *corev1.Node, nodeClaim *v1.NodeClaim, nodePool *v1. { InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "NodeRepairBlocked", + Reason: events.NodeRepairBlocked, Message: reason, DedupeValues: []string{string(nodeClaim.UID)}, DedupeTimeout: time.Minute * 15, @@ -46,7 +46,7 @@ func NodeRepairBlocked(node *corev1.Node, nodeClaim *v1.NodeClaim, nodePool *v1. { InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "NodeRepairBlocked", + Reason: events.NodeRepairBlocked, Message: reason, DedupeValues: []string{string(nodePool.UID)}, DedupeTimeout: time.Minute * 15, @@ -59,7 +59,7 @@ func NodeRepairBlockedUnmanagedNodeClaim(node *corev1.Node, nodeClaim *v1.NodeCl { InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "NodeRepairBlocked", + Reason: events.NodeRepairBlocked, Message: reason, DedupeValues: []string{string(node.UID)}, DedupeTimeout: time.Minute * 15, @@ -67,7 +67,7 @@ func NodeRepairBlockedUnmanagedNodeClaim(node *corev1.Node, nodeClaim *v1.NodeCl { InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "NodeRepairBlocked", + Reason: events.NodeRepairBlocked, Message: reason, DedupeValues: []string{string(nodeClaim.UID)}, DedupeTimeout: time.Minute * 15, diff --git a/pkg/controllers/node/termination/terminator/events/events.go b/pkg/controllers/node/termination/terminator/events/events.go index 0e790978f3..8e816d06a8 100644 --- a/pkg/controllers/node/termination/terminator/events/events.go +++ b/pkg/controllers/node/termination/terminator/events/events.go @@ -30,7 +30,7 @@ func EvictPod(pod *corev1.Pod, message string) events.Event { return events.Event{ InvolvedObject: pod, Type: corev1.EventTypeNormal, - Reason: "Evicted", + Reason: events.Evicted, Message: "Evicted pod: " + message, DedupeValues: []string{pod.Name}, } @@ -40,7 +40,7 @@ func DisruptPodDelete(pod *corev1.Pod, gracePeriodSeconds *int64, nodeGracePerio return events.Event{ InvolvedObject: pod, Type: corev1.EventTypeNormal, - Reason: "Disrupted", + Reason: events.Disrupted, Message: fmt.Sprintf("Deleting the pod to accommodate the terminationTime %v of the node. The pod was granted %v seconds of grace-period of its %v terminationGracePeriodSeconds. This bypasses the PDB of the pod and the do-not-disrupt annotation.", *nodeGracePeriodTerminationTime, *gracePeriodSeconds, pod.Spec.TerminationGracePeriodSeconds), DedupeValues: []string{pod.Name}, } @@ -50,7 +50,7 @@ func NodeFailedToDrain(node *corev1.Node, err error) events.Event { return events.Event{ InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "FailedDraining", + Reason: events.FailedDraining, Message: fmt.Sprintf("Failed to drain node, %s", err), DedupeValues: []string{node.Name}, } @@ -60,7 +60,7 @@ func NodeTerminationGracePeriodExpiring(node *corev1.Node, terminationTime strin return events.Event{ InvolvedObject: node, Type: corev1.EventTypeWarning, - Reason: "TerminationGracePeriodExpiring", + Reason: events.TerminationGracePeriodExpiring, Message: fmt.Sprintf("All pods will be deleted by %s", terminationTime), DedupeValues: []string{node.Name}, } @@ -70,7 +70,7 @@ func NodeClaimTerminationGracePeriodExpiring(nodeClaim *v1.NodeClaim, terminatio return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeWarning, - Reason: "TerminationGracePeriodExpiring", + Reason: events.TerminationGracePeriodExpiring, Message: fmt.Sprintf("All pods will be deleted by %s", terminationTime), DedupeValues: []string{nodeClaim.Name}, } diff --git a/pkg/controllers/node/termination/terminator/suite_test.go b/pkg/controllers/node/termination/terminator/suite_test.go index 39bd3ce16b..86dd901a37 100644 --- a/pkg/controllers/node/termination/terminator/suite_test.go +++ b/pkg/controllers/node/termination/terminator/suite_test.go @@ -35,6 +35,7 @@ import ( "sigs.k8s.io/karpenter/pkg/apis" "sigs.k8s.io/karpenter/pkg/controllers/node/termination/terminator" + "sigs.k8s.io/karpenter/pkg/events" "sigs.k8s.io/karpenter/pkg/operator/options" "sigs.k8s.io/karpenter/pkg/test" . "sigs.k8s.io/karpenter/pkg/test/expectations" @@ -112,7 +113,7 @@ var _ = Describe("Eviction/Queue", func() { ExpectApplied(ctx, env.Client, pod) Expect(queue.Evict(ctx, terminator.NewQueueKey(pod, node.Spec.ProviderID))).To(BeTrue()) ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "200"}) - Expect(recorder.Calls("Evicted")).To(Equal(1)) + Expect(recorder.Calls(events.Evicted)).To(Equal(1)) }) It("should succeed with no event when there are PDBs that allow an eviction", func() { pdb = test.PodDisruptionBudget(test.PDBOptions{ @@ -121,12 +122,12 @@ var _ = Describe("Eviction/Queue", func() { }) ExpectApplied(ctx, env.Client, pod) Expect(queue.Evict(ctx, terminator.NewQueueKey(pod, node.Spec.ProviderID))).To(BeTrue()) - Expect(recorder.Calls("Evicted")).To(Equal(1)) + Expect(recorder.Calls(events.Evicted)).To(Equal(1)) }) It("should return a NodeDrainError event when a PDB is blocking", func() { ExpectApplied(ctx, env.Client, pdb, pod) Expect(queue.Evict(ctx, terminator.NewQueueKey(pod, node.Spec.ProviderID))).To(BeFalse()) - Expect(recorder.Calls("FailedDraining")).To(Equal(1)) + Expect(recorder.Calls(events.FailedDraining)).To(Equal(1)) }) It("should fail when two PDBs refer to the same pod", func() { pdb2 := test.PodDisruptionBudget(test.PDBOptions{ @@ -171,7 +172,7 @@ var _ = Describe("Eviction/Queue", func() { Expect(terminatorInstance.DeleteExpiringPods(ctx, []*corev1.Pod{pod}, nil)).To(Succeed()) ExpectExists(ctx, env.Client, pod) - Expect(recorder.Calls("Disrupted")).To(Equal(0)) + Expect(recorder.Calls(events.Disrupted)).To(Equal(0)) }) It("should not delete a pod with terminationGracePeriodSeconds still remaining before nodeTerminationTime", func() { pod.Spec.TerminationGracePeriodSeconds = lo.ToPtr[int64](60) @@ -180,7 +181,7 @@ var _ = Describe("Eviction/Queue", func() { nodeTerminationTime := time.Now().Add(time.Minute * 5) Expect(terminatorInstance.DeleteExpiringPods(ctx, []*corev1.Pod{pod}, &nodeTerminationTime)).To(Succeed()) ExpectExists(ctx, env.Client, pod) - Expect(recorder.Calls("Disrupted")).To(Equal(0)) + Expect(recorder.Calls(events.Disrupted)).To(Equal(0)) }) It("should delete a pod with less than terminationGracePeriodSeconds remaining before nodeTerminationTime", func() { pod.Spec.TerminationGracePeriodSeconds = lo.ToPtr[int64](120) @@ -189,7 +190,7 @@ var _ = Describe("Eviction/Queue", func() { nodeTerminationTime := time.Now().Add(time.Minute * 1) Expect(terminatorInstance.DeleteExpiringPods(ctx, []*corev1.Pod{pod}, &nodeTerminationTime)).To(Succeed()) ExpectNotFound(ctx, env.Client, pod) - Expect(recorder.Calls("Disrupted")).To(Equal(1)) + Expect(recorder.Calls(events.Disrupted)).To(Equal(1)) }) }) }) diff --git a/pkg/controllers/nodeclaim/consistency/events.go b/pkg/controllers/nodeclaim/consistency/events.go index 948d7bdfb1..d6ba447842 100644 --- a/pkg/controllers/nodeclaim/consistency/events.go +++ b/pkg/controllers/nodeclaim/consistency/events.go @@ -27,7 +27,7 @@ func FailedConsistencyCheckEvent(nodeClaim *v1.NodeClaim, message string) events return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeWarning, - Reason: "FailedConsistencyCheck", + Reason: events.FailedConsistencyCheck, Message: message, DedupeValues: []string{string(nodeClaim.UID), message}, } diff --git a/pkg/controllers/nodeclaim/lifecycle/events.go b/pkg/controllers/nodeclaim/lifecycle/events.go index 2697b1cdc0..040cb03df9 100644 --- a/pkg/controllers/nodeclaim/lifecycle/events.go +++ b/pkg/controllers/nodeclaim/lifecycle/events.go @@ -29,7 +29,7 @@ func InsufficientCapacityErrorEvent(nodeClaim *v1.NodeClaim, err error) events.E return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeWarning, - Reason: "InsufficientCapacityError", + Reason: events.InsufficientCapacityError, Message: fmt.Sprintf("NodeClaim %s event: %s", nodeClaim.Name, truncateMessage(err.Error())), DedupeValues: []string{string(nodeClaim.UID)}, } @@ -39,7 +39,7 @@ func NodeClassNotReadyEvent(nodeClaim *v1.NodeClaim, err error) events.Event { return events.Event{ InvolvedObject: nodeClaim, Type: corev1.EventTypeWarning, - Reason: "NodeClassNotReady", + Reason: events.NodeClassNotReady, Message: fmt.Sprintf("NodeClaim %s event: %s", nodeClaim.Name, truncateMessage(err.Error())), DedupeValues: []string{string(nodeClaim.UID)}, } diff --git a/pkg/controllers/provisioning/scheduling/events.go b/pkg/controllers/provisioning/scheduling/events.go index aa092475bc..0eff0a197b 100644 --- a/pkg/controllers/provisioning/scheduling/events.go +++ b/pkg/controllers/provisioning/scheduling/events.go @@ -42,7 +42,7 @@ func NominatePodEvent(pod *corev1.Pod, node *corev1.Node, nodeClaim *v1.NodeClai return events.Event{ InvolvedObject: pod, Type: corev1.EventTypeNormal, - Reason: "Nominated", + Reason: events.Nominated, Message: fmt.Sprintf("Pod should schedule on: %s", strings.Join(info, ", ")), DedupeValues: []string{string(pod.UID)}, RateLimiter: PodNominationRateLimiter, @@ -53,7 +53,7 @@ func NoCompatibleInstanceTypes(np *v1.NodePool) events.Event { return events.Event{ InvolvedObject: np, Type: corev1.EventTypeWarning, - Reason: "NoCompatibleInstanceTypes", + Reason: events.NoCompatibleInstanceTypes, Message: "NodePool requirements filtered out all compatible available instance types", DedupeValues: []string{string(np.UID)}, DedupeTimeout: 1 * time.Minute, @@ -64,7 +64,7 @@ func PodFailedToScheduleEvent(pod *corev1.Pod, err error) events.Event { return events.Event{ InvolvedObject: pod, Type: corev1.EventTypeWarning, - Reason: "FailedScheduling", + Reason: events.FailedScheduling, Message: fmt.Sprintf("Failed to schedule pod, %s", err), DedupeValues: []string{string(pod.UID)}, DedupeTimeout: 5 * time.Minute, diff --git a/pkg/events/reason.go b/pkg/events/reason.go new file mode 100644 index 0000000000..7d8ebc1fd0 --- /dev/null +++ b/pkg/events/reason.go @@ -0,0 +1,48 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package events + +// Reasons of events controllers emit +const ( + // disruption + DisruptionBlocked = "DisruptionBlocked" + DisruptionLaunching = "DisruptionLaunching" + DisruptionTerminating = "DisruptionTerminating" + DisruptionWaitingReadiness = "DisruptionWaitingReadiness" + Unconsolidatable = "Unconsolidatable" + + // provisioning/scheduling + FailedScheduling = "FailedScheduling" + NoCompatibleInstanceTypes = "NoCompatibleInstanceTypes" + Nominated = "Nominated" + + // node/health + NodeRepairBlocked = "NodeRepairBlocked" + + // node/termination/terminator + Disrupted = "Disrupted" + Evicted = "Evicted" + FailedDraining = "FailedDraining" + TerminationGracePeriodExpiring = "TerminationGracePeriodExpiring" + + // nodeclaim/consistency + FailedConsistencyCheck = "FailedConsistencyCheck" + + // nodeclaim/lifecycle + InsufficientCapacityError = "InsufficientCapacityError" + NodeClassNotReady = "NodeClassNotReady" +)