From ce20dd7fc9a1c263d224cd22e23fc4bc3db37c62 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Wed, 4 Oct 2023 15:51:18 +0200 Subject: [PATCH] Add nfd-gc Deployment Signed-off-by: Carlos Eduardo Arango Gutierrez --- build/assets/gc/clusterrole.yaml | 32 +++++++++++++++++ build/assets/gc/clusterrolebinding.yaml | 12 +++++++ build/assets/gc/deployment.yaml | 23 ++++++++++++ build/assets/gc/serviceaccount.yaml | 4 +++ .../nodefeaturediscovery_controller.go | 34 ++++++++++++++++-- controllers/nodefeaturediscovery_state.go | 1 + controllers/nodefeaturediscovery_status.go | 36 ++++++++++++++++--- 7 files changed, 136 insertions(+), 6 deletions(-) create mode 100644 build/assets/gc/clusterrole.yaml create mode 100644 build/assets/gc/clusterrolebinding.yaml create mode 100644 build/assets/gc/deployment.yaml create mode 100644 build/assets/gc/serviceaccount.yaml diff --git a/build/assets/gc/clusterrole.yaml b/build/assets/gc/clusterrole.yaml new file mode 100644 index 00000000..d4e776c2 --- /dev/null +++ b/build/assets/gc/clusterrole.yaml @@ -0,0 +1,32 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nfd-gc +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - nodes/proxy + verbs: + - get +- apiGroups: + - topology.node.k8s.io + resources: + - noderesourcetopologies + verbs: + - delete + - list +- apiGroups: + - nfd.k8s-sigs.io + resources: + - nodefeatures + verbs: + - delete + - list diff --git a/build/assets/gc/clusterrolebinding.yaml b/build/assets/gc/clusterrolebinding.yaml new file mode 100644 index 00000000..9f2ddef9 --- /dev/null +++ b/build/assets/gc/clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nfd-gc +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nfd-gc +subjects: +- kind: ServiceAccount + name: nfd-gc + namespace: default diff --git a/build/assets/gc/deployment.yaml b/build/assets/gc/deployment.yaml new file mode 100644 index 00000000..3a64947a --- /dev/null +++ b/build/assets/gc/deployment.yaml @@ -0,0 +1,23 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: nfd + name: nfd-gc +spec: + selector: + matchLabels: + app: nfd-gc + template: + metadata: + labels: + app: nfd-gc + spec: + dnsPolicy: ClusterFirstWithHostNet + serviceAccount: nfd-gc + containers: + - name: nfd-gc + image: $(NODE_FEATURE_DISCOVERY_IMAGE) + imagePullPolicy: Always + command: + - "nfd-gc" diff --git a/build/assets/gc/serviceaccount.yaml b/build/assets/gc/serviceaccount.yaml new file mode 100644 index 00000000..ec9501a8 --- /dev/null +++ b/build/assets/gc/serviceaccount.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nfd-gc diff --git a/controllers/nodefeaturediscovery_controller.go b/controllers/nodefeaturediscovery_controller.go index 6a4dcdf8..a5684706 100644 --- a/controllers/nodefeaturediscovery_controller.go +++ b/controllers/nodefeaturediscovery_controller.go @@ -171,6 +171,13 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return r.updateDegradedCondition(instance, conditionNFDWorkerServiceAccountDegraded, "nfd-worker service account has been degraded") } + // Check the status of the NFD Operator Garbage Collector ServiceAccount + if rstatus, err := r.getGarbageCollectorServiceAccountConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDGcServiceAccount, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDGcServiceAccountDegraded, "nfd-gc service account has been degraded") + } + // Check the status of the NFD Operator Master ServiceAccount if rstatus, err := r.getMasterServiceAccountConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionFailedGettingNFDMasterServiceAccount, err.Error()) @@ -185,18 +192,32 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return r.updateDegradedCondition(instance, conditionNFDRoleDegraded, "nfd-worker role has been degraded") } + // Check the status of the NFD Operator cluster role for Garbage Collector + if rstatus, err := r.getGarbageCollectorClusterRoleConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd-gc ClusterRole has been degraded") + } + // Check the status of the NFD Operator cluster role if rstatus, err := r.getMasterClusterRoleConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, err.Error()) } else if rstatus.isDegraded { - return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd ClusterRole has been degraded") + return r.updateDegradedCondition(instance, conditionNFDClusterRoleDegraded, "nfd-master ClusterRole has been degraded") + } + + // Check the status of the NFD Operator cluster role binding for Garbage Collector + if rstatus, err := r.getGarbageCollectorClusterRoleBindingConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error()) + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd-gc ClusterRoleBinding has been degraded") } // Check the status of the NFD Operator cluster role binding if rstatus, err := r.getMasterClusterRoleBindingConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionFailedGettingNFDClusterRoleBinding, err.Error()) } else if rstatus.isDegraded { - return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd ClusterRoleBinding has been degraded") + return r.updateDegradedCondition(instance, conditionNFDClusterRoleBindingDegraded, "nfd-master ClusterRoleBinding has been degraded") } // Check the status of the NFD Operator role binding @@ -229,6 +250,15 @@ func (r *NodeFeatureDiscoveryReconciler) Reconcile(ctx context.Context, req ctrl return r.updateDegradedCondition(instance, err.Error(), "nfd-worker Daemonset has been degraded") } + // Check the status of the NFD Operator Garbage Collector Deployment + if rstatus, err := r.getGarbageCollectorDeploymentConditions(ctx, instance); err != nil { + return r.updateDegradedCondition(instance, conditionFailedGettingNFDGcDeployment, err.Error()) + } else if rstatus.isProgressing { + return r.updateProgressingCondition(instance, err.Error(), "nfd-gc Deployment is progressing") + } else if rstatus.isDegraded { + return r.updateDegradedCondition(instance, err.Error(), "nfd-gc Deployment has been degraded") + } + // Check the status of the NFD Operator Master Deployment if rstatus, err := r.getMasterDeploymentConditions(ctx, instance); err != nil { return r.updateDegradedCondition(instance, conditionFailedGettingNFDMasterDeployment, err.Error()) diff --git a/controllers/nodefeaturediscovery_state.go b/controllers/nodefeaturediscovery_state.go index 251611bc..0a34fdf5 100644 --- a/controllers/nodefeaturediscovery_state.go +++ b/controllers/nodefeaturediscovery_state.go @@ -60,6 +60,7 @@ func (n *NFD) init( n.ins = i n.idx = 0 if len(n.controls) == 0 { + n.addState("/opt/nfd/gc") n.addState("/opt/nfd/master") n.addState("/opt/nfd/worker") n.addState("/opt/nfd/topologyupdater") diff --git a/controllers/nodefeaturediscovery_status.go b/controllers/nodefeaturediscovery_status.go index 8f52188d..db2276f6 100644 --- a/controllers/nodefeaturediscovery_status.go +++ b/controllers/nodefeaturediscovery_status.go @@ -30,20 +30,23 @@ import ( ) const ( - nfdWorkerApp string = "nfd-worker" - nfdMasterApp string = "nfd-master" - nfdTopologyUpdaterApp string = "nfd-topology-updater" - nfdPruneApp string = "nfd-prune" + nfdWorkerApp string = "nfd-worker" + nfdMasterApp string = "nfd-master" + nfdGarbageCollectorApp string = "nfd-gc" + nfdTopologyUpdaterApp string = "nfd-topology-updater" + nfdPruneApp string = "nfd-prune" ) const ( // Resource is missing conditionFailedGettingNFDWorkerConfig = "FailedGettingNFDWorkerConfig" conditionFailedGettingNFDWorkerServiceAccount = "FailedGettingNFDWorkerServiceAccount" + conditionFailedGettingNFDGcServiceAccount = "FailedGettingNFDGcServiceAccount" conditionFailedGettingNFDTopologyUpdaterServiceAccount = "FailedGettingNFDTopoloGyUpdaterServiceAccount" conditionFailedGettingNFDMasterServiceAccount = "FailedGettingNFDMasterServiceAccount" conditionFailedGettingNFDService = "FailedGettingNFDService" conditionFailedGettingNFDWorkerDaemonSet = "FailedGettingNFDWorkerDaemonSet" + conditionFailedGettingNFDGcDeployment = "FailedGettingNFDGcDeployment" conditionFailedGettingNFDMasterDeployment = "FailedGettingNFDMasterDeployment" conditionFailedGettingNFDRoleBinding = "FailedGettingNFDRoleBinding" conditionFailedGettingNFDClusterRoleBinding = "FailedGettingNFDClusterRole" @@ -51,6 +54,7 @@ const ( // Resource degraded conditionNFDWorkerConfigDegraded = "NFDWorkerConfigResourceDegraded" conditionNFDWorkerServiceAccountDegraded = "NFDWorkerServiceAccountDegraded" + conditionNFDGcServiceAccountDegraded = "NFDGcServiceAccountDegraded" conditionNFDTopologyUpdaterServiceAccountDegraded = "NFDTopologyUpdaterServiceAccountDegraded" conditionNFDMasterServiceAccountDegraded = "NFDMasterServiceAccountDegraded" conditionNFDServiceDegraded = "NFDServiceDegraded" @@ -278,6 +282,12 @@ func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterDaemonSetConditions(c return r.getDaemonSetConditions(ctx, instance, nfdTopologyUpdaterApp) } +// getGarbageCollectorDeploymentConditions is a wrapper around "getDeploymentConditions" for +// garbage collector Deployment +func (r *NodeFeatureDiscoveryReconciler) getGarbageCollectorDeploymentConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getDeploymentConditions(ctx, instance, nfdGarbageCollectorApp) +} + // getMasterDeploymentConditions is a wrapper around "getDeploymentConditions" for // master Deployment func (r *NodeFeatureDiscoveryReconciler) getMasterDeploymentConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { @@ -461,6 +471,12 @@ func (r *NodeFeatureDiscoveryReconciler) getRoleBindingConditions(ctx context.Co return status, nil } +// getGarbageCollectorClusterRoleConditions is a wrapper around "getClusterRoleConditions" for +// garbage collector cluster role. +func (r *NodeFeatureDiscoveryReconciler) getGarbageCollectorClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getClusterRoleConditions(ctx, instance, nfdGarbageCollectorApp) +} + // getMasterClusterRoleConditions is a wrapper around "getClusterRoleConditions" for // worker service account. func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { @@ -494,6 +510,12 @@ func (r *NodeFeatureDiscoveryReconciler) getClusterRoleConditions(ctx context.Co return status, nil } +// getGarbageCollectorClusterRoleBindingConditions is a wrapper around "getClusterRoleBindingConditions" for +// garbage collector cluster role binding. +func (r *NodeFeatureDiscoveryReconciler) getGarbageCollectorClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getClusterRoleBindingConditions(ctx, instance, nfdGarbageCollectorApp) +} + // getMasterClusterRoleBindingConditions is a wrapper around "getServiceAccountConditions" for // worker service account. func (r *NodeFeatureDiscoveryReconciler) getMasterClusterRoleBindingConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { @@ -540,6 +562,12 @@ func (r *NodeFeatureDiscoveryReconciler) getTopologyUpdaterServiceAccountConditi return r.getServiceAccountConditions(ctx, instance, nfdTopologyUpdaterApp) } +// getGarbageCollectorServiceAccountConditions is a wrapper around "getServiceAccountConditions" for +// garbage collector service account. +func (r *NodeFeatureDiscoveryReconciler) getGarbageCollectorServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) { + return r.getServiceAccountConditions(ctx, instance, nfdGarbageCollectorApp) +} + // getMasterServiceAccountConditions is a wrapper around "getServiceAccountConditions" for // master service account. func (r *NodeFeatureDiscoveryReconciler) getMasterServiceAccountConditions(ctx context.Context, instance *nfdv1.NodeFeatureDiscovery) (Status, error) {