From 01946bdb0002ed3115649165588fda56490af891 Mon Sep 17 00:00:00 2001 From: Zhiwei Yin Date: Fri, 14 Feb 2025 00:03:23 +0800 Subject: [PATCH] refator gc and move rbac deletion to cluster controller Signed-off-by: Zhiwei Yin --- pkg/common/helpers/clusters.go | 5 + .../helpers/testing/testinghelpers.go | 10 + pkg/registration/hub/gc/controller.go | 97 +++----- pkg/registration/hub/gc/controller_test.go | 147 ++++++++----- pkg/registration/hub/gc/doc.go | 6 +- pkg/registration/hub/gc/gc_cluster_rbac.go | 137 ------------ .../hub/gc/gc_cluster_rbac_test.go | 207 ------------------ pkg/registration/hub/gc/gc_resources.go | 27 +-- pkg/registration/hub/gc/gc_resources_test.go | 23 +- .../hub/managedcluster/controller.go | 133 +++++++---- .../hub/managedcluster/controller_test.go | 84 ++++++- pkg/registration/hub/manager.go | 11 +- .../registration/addon_lease_test.go | 6 +- .../registration/addon_registration_test.go | 6 +- .../managedcluster_deletion_test.go | 17 ++ .../spokeagent_rebootstrap_test.go | 6 +- .../spokeagent_switch_hub_test.go | 7 +- .../registration/spokecluster_claim_test.go | 6 +- .../registration/spokecluster_joining_test.go | 6 +- .../registration/spokecluster_status_test.go | 9 +- 20 files changed, 366 insertions(+), 584 deletions(-) delete mode 100644 pkg/registration/hub/gc/gc_cluster_rbac.go delete mode 100644 pkg/registration/hub/gc/gc_cluster_rbac_test.go diff --git a/pkg/common/helpers/clusters.go b/pkg/common/helpers/clusters.go index 4a1c3a37c..132dd766a 100644 --- a/pkg/common/helpers/clusters.go +++ b/pkg/common/helpers/clusters.go @@ -9,6 +9,11 @@ import ( clustersdkv1beta1 "open-cluster-management.io/sdk-go/pkg/apis/cluster/v1beta1" ) +const ( + // GcFinalizer is added to the managedCluster for resource cleanup, which maintained by gc controller. + GcFinalizer = "cluster.open-cluster-management.io/resource-cleanup" +) + type PlacementDecisionGetter struct { Client clusterlister.PlacementDecisionLister } diff --git a/pkg/registration/helpers/testing/testinghelpers.go b/pkg/registration/helpers/testing/testinghelpers.go index d8522dffc..28b663fe2 100644 --- a/pkg/registration/helpers/testing/testinghelpers.go +++ b/pkg/registration/helpers/testing/testinghelpers.go @@ -153,6 +153,16 @@ func NewDeletingManagedCluster() *clusterv1.ManagedCluster { }, } } +func NewDeletingManagedClusterWithFinalizers(finalizers []string) *clusterv1.ManagedCluster { + now := metav1.Now() + return &clusterv1.ManagedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: TestManagedClusterName, + DeletionTimestamp: &now, + Finalizers: finalizers, + }, + } +} func NewManagedClusterCondition(name, status, reason, message string, lastTransition *metav1.Time) metav1.Condition { ret := metav1.Condition{ diff --git a/pkg/registration/hub/gc/controller.go b/pkg/registration/hub/gc/controller.go index 53c9bca9f..9e5b3279e 100644 --- a/pkg/registration/hub/gc/controller.go +++ b/pkg/registration/hub/gc/controller.go @@ -3,37 +3,25 @@ package gc import ( "context" "strings" - "time" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/events" + "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" - utilerrors "k8s.io/apimachinery/pkg/util/errors" - "k8s.io/client-go/kubernetes" - rbacv1listers "k8s.io/client-go/listers/rbac/v1" "k8s.io/client-go/metadata" "k8s.io/klog/v2" clientset "open-cluster-management.io/api/client/cluster/clientset/versioned" informerv1 "open-cluster-management.io/api/client/cluster/informers/externalversions/cluster/v1" clusterv1listers "open-cluster-management.io/api/client/cluster/listers/cluster/v1" - worklister "open-cluster-management.io/api/client/work/listers/work/v1" clusterv1 "open-cluster-management.io/api/cluster/v1" "open-cluster-management.io/sdk-go/pkg/patcher" + commonhelper "open-cluster-management.io/ocm/pkg/common/helpers" "open-cluster-management.io/ocm/pkg/common/queue" - "open-cluster-management.io/ocm/pkg/registration/register" -) - -type gcReconcileOp int - -const ( - gcReconcileRequeue gcReconcileOp = iota - gcReconcileStop - gcReconcileContinue ) var ( @@ -43,31 +31,20 @@ var ( Version: "v1", Resource: "manifestworks"} ) -// gcReconciler is an interface for reconcile cleanup logic after cluster is deleted. -// clusterName is from the queueKey, cluster may be nil. -type gcReconciler interface { - reconcile(ctx context.Context, cluster *clusterv1.ManagedCluster, clusterNamespace string) (gcReconcileOp, error) -} - type GCController struct { - clusterLister clusterv1listers.ManagedClusterLister - clusterPatcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus] - gcReconcilers []gcReconciler + clusterLister clusterv1listers.ManagedClusterLister + clusterPatcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus] + gcResourcesController *gcResourcesController + eventRecorder events.Recorder } // NewGCController ensures the related resources are cleaned up after cluster is deleted func NewGCController( - clusterRoleLister rbacv1listers.ClusterRoleLister, - roleBindingLister rbacv1listers.RoleBindingLister, clusterInformer informerv1.ManagedClusterInformer, - manifestWorkLister worklister.ManifestWorkLister, clusterClient clientset.Interface, - kubeClient kubernetes.Interface, metadataClient metadata.Interface, - approver register.Approver, eventRecorder events.Recorder, gcResourceList []string, - resourceCleanupFeatureGateEnable bool, ) factory.Controller { clusterPatcher := patcher.NewPatcher[ *clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]( @@ -76,11 +53,9 @@ func NewGCController( controller := &GCController{ clusterLister: clusterInformer.Lister(), clusterPatcher: clusterPatcher, - gcReconcilers: []gcReconciler{}, + eventRecorder: eventRecorder.WithComponentSuffix("gc-resources"), } - - // do not clean resources if featureGate is disabled or no gc resource list for backwards compatible. - if resourceCleanupFeatureGateEnable && len(gcResourceList) != 0 { + if len(gcResourceList) != 0 { gcResources := []schema.GroupVersionResource{} for _, gcResource := range gcResourceList { subStrings := strings.Split(gcResource, "/") @@ -91,14 +66,9 @@ func NewGCController( gcResources = append(gcResources, schema.GroupVersionResource{ Group: subStrings[0], Version: subStrings[1], Resource: subStrings[2]}) } - controller.gcReconcilers = append(controller.gcReconcilers, - newGCResourcesController(metadataClient, gcResources, eventRecorder)) + controller.gcResourcesController = newGCResourcesController(metadataClient, gcResources) } - controller.gcReconcilers = append(controller.gcReconcilers, - newGCClusterRbacController(kubeClient, clusterPatcher, clusterRoleLister, roleBindingLister, - manifestWorkLister, approver, eventRecorder, resourceCleanupFeatureGateEnable)) - return factory.New(). WithInformersQueueKeysFunc(queue.QueueKeyByMetaName, clusterInformer.Informer()). WithSync(controller.sync).ToController("GCController", eventRecorder) @@ -106,70 +76,53 @@ func NewGCController( // gc controller is watching cluster and to do these jobs: // 1. add a cleanup finalizer to managedCluster if the cluster is not deleting. -// 2. clean up all rolebinding and resources in the cluster ns after the cluster is deleted. +// 2. clean up the resources in the cluster ns after the cluster is deleted. func (r *GCController) sync(ctx context.Context, controllerContext factory.SyncContext) error { clusterName := controllerContext.QueueKey() if clusterName == "" || clusterName == factory.DefaultQueueKey { return nil } - // cluster could be nil, that means the cluster is gone but the gc is not finished. cluster, err := r.clusterLister.Get(clusterName) if err != nil && !apierrors.IsNotFound(err) { return err } - var copyCluster *clusterv1.ManagedCluster if cluster != nil { if cluster.DeletionTimestamp.IsZero() { - _, err = r.clusterPatcher.AddFinalizer(ctx, cluster, clusterv1.ManagedClusterFinalizer) + _, err = r.clusterPatcher.AddFinalizer(ctx, cluster, commonhelper.GcFinalizer) return err } - copyCluster = cluster.DeepCopy() } - var errs []error - var requeue bool - for _, reconciler := range r.gcReconcilers { - op, err := reconciler.reconcile(ctx, copyCluster, clusterName) - if err != nil { - errs = append(errs, err) - } - if op == gcReconcileRequeue { - requeue = true - } - if op == gcReconcileStop { - break - } - } - - if requeue { - controllerContext.Queue().AddAfter(clusterName, 5*time.Second) - } - + gcErr := r.gcResourcesController.reconcile(ctx, copyCluster, clusterName) if cluster == nil { - return utilerrors.NewAggregate(errs) + return gcErr } - // update cluster condition - if len(errs) != 0 { - applyErrors := utilerrors.NewAggregate(errs) + if gcErr != nil && !errors.Is(gcErr, requeueError) { + r.eventRecorder.Eventf("ResourceCleanupFail", + "failed to cleanup resources in cluster %s:%v", cluster.Name, gcErr) + meta.SetStatusCondition(©Cluster.Status.Conditions, metav1.Condition{ Type: clusterv1.ManagedClusterConditionDeleting, Status: metav1.ConditionFalse, Reason: clusterv1.ConditionDeletingReasonResourceError, - Message: applyErrors.Error(), + Message: gcErr.Error(), }) } if _, err = r.clusterPatcher.PatchStatus(ctx, cluster, copyCluster.Status, cluster.Status); err != nil { - errs = append(errs, err) + return err } - if len(errs) != 0 || requeue { - return utilerrors.NewAggregate(errs) + if gcErr != nil { + return gcErr } - return r.clusterPatcher.RemoveFinalizer(ctx, cluster, clusterv1.ManagedClusterFinalizer) + r.eventRecorder.Eventf("ResourceCleanupCompleted", + "resources in cluster %s are cleaned up", cluster.Name) + + return r.clusterPatcher.RemoveFinalizer(ctx, cluster, commonhelper.GcFinalizer) } diff --git a/pkg/registration/hub/gc/controller_test.go b/pkg/registration/hub/gc/controller_test.go index 75276f74f..8c760f16c 100644 --- a/pkg/registration/hub/gc/controller_test.go +++ b/pkg/registration/hub/gc/controller_test.go @@ -2,29 +2,29 @@ package gc import ( "context" + "encoding/json" "testing" "time" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/events" - corev1 "k8s.io/api/core/v1" + "github.com/pkg/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - kubeinformers "k8s.io/client-go/informers" - fakeclient "k8s.io/client-go/kubernetes/fake" - "k8s.io/client-go/kubernetes/scheme" fakemetadataclient "k8s.io/client-go/metadata/fake" clienttesting "k8s.io/client-go/testing" + addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" fakeclusterclient "open-cluster-management.io/api/client/cluster/clientset/versioned/fake" clusterinformers "open-cluster-management.io/api/client/cluster/informers/externalversions" - fakeworkclient "open-cluster-management.io/api/client/work/clientset/versioned/fake" - workinformers "open-cluster-management.io/api/client/work/informers/externalversions" clusterv1 "open-cluster-management.io/api/cluster/v1" + workv1 "open-cluster-management.io/api/work/v1" "open-cluster-management.io/sdk-go/pkg/patcher" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" testingcommon "open-cluster-management.io/ocm/pkg/common/testing" testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing" - "open-cluster-management.io/ocm/pkg/registration/register" ) func TestGController(t *testing.T) { @@ -32,43 +32,105 @@ func TestGController(t *testing.T) { name string key string cluster *clusterv1.ManagedCluster - namespace *corev1.Namespace - expectedErr string + objs []runtime.Object validateActions func(t *testing.T, clusterActions []clienttesting.Action) }{ { - name: "invalid key", - key: factory.DefaultQueueKey, - cluster: testinghelpers.NewDeletingManagedCluster(), - expectedErr: "", + name: "invalid key", + key: factory.DefaultQueueKey, + cluster: testinghelpers.NewDeletingManagedCluster(), + validateActions: func(t *testing.T, clusterActions []clienttesting.Action) { testingcommon.AssertNoActions(t, clusterActions) }, }, { - name: "valid key with cluster", - key: testinghelpers.TestManagedClusterName, - cluster: testinghelpers.NewDeletingManagedCluster(), - expectedErr: "", + name: " cluster is not deleting", + key: testinghelpers.TestManagedClusterName, + cluster: testinghelpers.NewManagedCluster(), + validateActions: func(t *testing.T, clusterActions []clienttesting.Action) { + testingcommon.AssertActions(t, clusterActions, "patch") + patch := clusterActions[0].(clienttesting.PatchAction).GetPatch() + managedCluster := &clusterv1.ManagedCluster{} + err := json.Unmarshal(patch, managedCluster) + if err != nil { + t.Fatal(err) + } + if !commonhelpers.HasFinalizer(managedCluster.Finalizers, commonhelpers.GcFinalizer) { + t.Errorf("expected gc finalizer") + } + }, + }, + { + name: "cluster is deleting with no resources", + key: testinghelpers.TestManagedClusterName, + cluster: testinghelpers.NewDeletingManagedClusterWithFinalizers([]string{commonhelpers.GcFinalizer}), validateActions: func(t *testing.T, clusterActions []clienttesting.Action) { testingcommon.AssertActions(t, clusterActions, "patch", "patch") + expectedCondition := metav1.Condition{ + Type: clusterv1.ManagedClusterConditionDeleting, + Status: metav1.ConditionTrue, + Reason: clusterv1.ConditionDeletingReasonNoResource, + Message: "No cleaned resource in cluster ns."} + + patch := clusterActions[0].(clienttesting.PatchAction).GetPatch() + managedCluster := &clusterv1.ManagedCluster{} + if err := json.Unmarshal(patch, managedCluster); err != nil { + t.Fatal(err) + } + testingcommon.AssertCondition(t, managedCluster.Status.Conditions, expectedCondition) + + patch = clusterActions[1].(clienttesting.PatchAction).GetPatch() + if err := json.Unmarshal(patch, managedCluster); err != nil { + t.Fatal(err) + } + if len(managedCluster.Finalizers) != 0 { + t.Errorf("expected no finalizer") + } }, }, { - name: "valid key with no cluster ", - key: "cluster1", - cluster: testinghelpers.NewDeletingManagedCluster(), - expectedErr: "", + name: "cluster is deleting with resources", + key: testinghelpers.TestManagedClusterName, + cluster: testinghelpers.NewDeletingManagedClusterWithFinalizers([]string{commonhelpers.GcFinalizer}), + objs: []runtime.Object{newAddonMetadata(testinghelpers.TestManagedClusterName, "test", nil)}, + validateActions: func(t *testing.T, clusterActions []clienttesting.Action) { + testingcommon.AssertActions(t, clusterActions, "patch") + expectedCondition := metav1.Condition{ + Type: clusterv1.ManagedClusterConditionDeleting, + Status: metav1.ConditionFalse, + Reason: clusterv1.ConditionDeletingReasonResourceRemaining, + Message: "The resource managedclusteraddons is remaning, the remaining count is 1, " + + "the finalizer pending count is 0", + } + + patch := clusterActions[0].(clienttesting.PatchAction).GetPatch() + managedCluster := &clusterv1.ManagedCluster{} + if err := json.Unmarshal(patch, managedCluster); err != nil { + t.Fatal(err) + } + testingcommon.AssertCondition(t, managedCluster.Status.Conditions, expectedCondition) + + }, + }, + { + name: "cluster is gone with resources", + key: "test", + cluster: testinghelpers.NewDeletingManagedClusterWithFinalizers([]string{commonhelpers.GcFinalizer}), + objs: []runtime.Object{newAddonMetadata(testinghelpers.TestManagedClusterName, "test", nil)}, validateActions: func(t *testing.T, clusterActions []clienttesting.Action) { testingcommon.AssertNoActions(t, clusterActions) }, }, } + for _, c := range cases { t.Run(c.name, func(t *testing.T) { - kubeClient := fakeclient.NewSimpleClientset() - kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClient, time.Minute*10) - metadataClient := fakemetadataclient.NewSimpleMetadataClient(scheme.Scheme) + scheme := fakemetadataclient.NewTestScheme() + _ = addonv1alpha1.Install(scheme) + _ = workv1.Install(scheme) + _ = metav1.AddMetaToScheme(scheme) + metadataClient := fakemetadataclient.NewSimpleMetadataClient(scheme, c.objs...) clusterClient := fakeclusterclient.NewSimpleClientset(c.cluster) clusterInformerFactory := clusterinformers.NewSharedInformerFactory(clusterClient, time.Minute*10) @@ -79,52 +141,31 @@ func TestGController(t *testing.T) { } } - workClient := fakeworkclient.NewSimpleClientset() - workInformerFactory := workinformers.NewSharedInformerFactory(workClient, 5*time.Minute) - _ = NewGCController( - kubeInformerFactory.Rbac().V1().ClusterRoles().Lister(), - kubeInformerFactory.Rbac().V1().RoleBindings().Lister(), clusterInformerFactory.Cluster().V1().ManagedClusters(), - workInformerFactory.Work().V1().ManifestWorks().Lister(), clusterClient, - kubeClient, metadataClient, - register.NewNoopApprover(), events.NewInMemoryRecorder(""), []string{"addon.open-cluster-management.io/v1alpha1/managedclusteraddons", "work.open-cluster-management.io/v1/manifestworks"}, - true, ) - namespaceStore := kubeInformerFactory.Core().V1().Namespaces().Informer().GetStore() - if c.namespace != nil { - if err := namespaceStore.Add(c.namespace); err != nil { - t.Fatal(err) - } - } + clusterPatcher := patcher.NewPatcher[ *clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]( clusterClient.ClusterV1().ManagedClusters()) ctrl := &GCController{ - clusterLister: clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), - clusterPatcher: clusterPatcher, - gcReconcilers: []gcReconciler{ - newGCResourcesController(metadataClient, []schema.GroupVersionResource{addonGvr, workGvr}, - events.NewInMemoryRecorder("")), - newGCClusterRbacController(kubeClient, clusterPatcher, - kubeInformerFactory.Rbac().V1().ClusterRoles().Lister(), - kubeInformerFactory.Rbac().V1().RoleBindings().Lister(), - workInformerFactory.Work().V1().ManifestWorks().Lister(), - register.NewNoopApprover(), - events.NewInMemoryRecorder(""), - true), - }, + clusterLister: clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), + clusterPatcher: clusterPatcher, + gcResourcesController: newGCResourcesController(metadataClient, []schema.GroupVersionResource{addonGvr, workGvr}), + eventRecorder: events.NewInMemoryRecorder(""), } controllerContext := testingcommon.NewFakeSyncContext(t, c.key) err := ctrl.sync(context.TODO(), controllerContext) - testingcommon.AssertError(t, err, c.expectedErr) + if err != nil && !errors.Is(err, requeueError) { + t.Error(err) + } c.validateActions(t, clusterClient.Actions()) }) } diff --git a/pkg/registration/hub/gc/doc.go b/pkg/registration/hub/gc/doc.go index e60be2d13..521e41374 100644 --- a/pkg/registration/hub/gc/doc.go +++ b/pkg/registration/hub/gc/doc.go @@ -1,5 +1,3 @@ -// package gc contains the hub-side reconciler to cleanup finalizer on -// role/rolebinding in cluster namespace when ManagedCluster is being deleted. -// and delete the cluterRoles for the registration and work agent after therer is no -// cluster and manifestwork. +// Package gc contains the hub-side reconciler to clean up the resources in the cluster ns +// after the cluster is deleted. package gc diff --git a/pkg/registration/hub/gc/gc_cluster_rbac.go b/pkg/registration/hub/gc/gc_cluster_rbac.go deleted file mode 100644 index b470f9ae6..000000000 --- a/pkg/registration/hub/gc/gc_cluster_rbac.go +++ /dev/null @@ -1,137 +0,0 @@ -package gc - -import ( - "context" - "fmt" - - "github.com/openshift/library-go/pkg/operator/events" - "github.com/openshift/library-go/pkg/operator/resource/resourceapply" - operatorhelpers "github.com/openshift/library-go/pkg/operator/v1helpers" - v1 "k8s.io/api/rbac/v1" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/kubernetes" - rbacv1listers "k8s.io/client-go/listers/rbac/v1" - - worklister "open-cluster-management.io/api/client/work/listers/work/v1" - clusterv1 "open-cluster-management.io/api/cluster/v1" - "open-cluster-management.io/sdk-go/pkg/patcher" - - "open-cluster-management.io/ocm/pkg/registration/helpers" - "open-cluster-management.io/ocm/pkg/registration/hub/manifests" - "open-cluster-management.io/ocm/pkg/registration/register" -) - -const ( - manifestWorkFinalizer = "cluster.open-cluster-management.io/manifest-work-cleanup" -) - -var ( - mclClusterRoleName = func(clusterName string) string { - return fmt.Sprintf("open-cluster-management:managedcluster:%s", clusterName) - } - mclClusterRoleBindingName = func(clusterName string) string { - return fmt.Sprintf("open-cluster-management:managedcluster:%s", clusterName) - } - registrationRoleBindingName = func(clusterName string) string { - return fmt.Sprintf("open-cluster-management:managedcluster:%s:registration", clusterName) - } - workRoleBindingName = func(clusterName string) string { - return fmt.Sprintf("open-cluster-management:managedcluster:%s:work", clusterName) - } -) - -type gcClusterRbacController struct { - kubeClient kubernetes.Interface - clusterRoleLister rbacv1listers.ClusterRoleLister - roleBindingLister rbacv1listers.RoleBindingLister - manifestWorkLister worklister.ManifestWorkLister - clusterPatcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus] - approver register.Approver - eventRecorder events.Recorder - resourceCleanupFeatureGateEnable bool -} - -// newGCClusterRoleController ensures the rbac of agent are deleted after cluster is deleted. -func newGCClusterRbacController( - kubeClient kubernetes.Interface, - clusterPatcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus], - clusterRoleLister rbacv1listers.ClusterRoleLister, - roleBindingLister rbacv1listers.RoleBindingLister, - manifestWorkLister worklister.ManifestWorkLister, - approver register.Approver, - eventRecorder events.Recorder, - resourceCleanupFeatureGateEnable bool, -) gcReconciler { - return &gcClusterRbacController{ - kubeClient: kubeClient, - clusterRoleLister: clusterRoleLister, - roleBindingLister: roleBindingLister, - manifestWorkLister: manifestWorkLister, - clusterPatcher: clusterPatcher, - approver: approver, - eventRecorder: eventRecorder.WithComponentSuffix("gc-cluster-rbac"), - resourceCleanupFeatureGateEnable: resourceCleanupFeatureGateEnable, - } -} - -func (r *gcClusterRbacController) reconcile(ctx context.Context, - cluster *clusterv1.ManagedCluster, clusterNamespace string) (gcReconcileOp, error) { - if cluster != nil { - if err := r.removeClusterRbac(ctx, cluster.Name, cluster.Spec.HubAcceptsClient); err != nil { - return gcReconcileContinue, err - } - - if err := r.approver.Cleanup(ctx, cluster); err != nil { - return gcReconcileContinue, err - } - - // if GC feature gate is disable, the finalizer is removed from the cluster after the related rbac is deleted. - // there is no need to wait other resources are cleaned up before remove the finalizer. - if !r.resourceCleanupFeatureGateEnable { - if err := r.clusterPatcher.RemoveFinalizer(ctx, cluster, clusterv1.ManagedClusterFinalizer); err != nil { - return gcReconcileStop, err - } - } - } - - works, err := r.manifestWorkLister.ManifestWorks(clusterNamespace).List(labels.Everything()) - if err != nil && !errors.IsNotFound(err) { - return gcReconcileStop, err - } - if len(works) != 0 { - return gcReconcileRequeue, nil - } - - return gcReconcileStop, r.removeFinalizerFromWorkRoleBinding(ctx, clusterNamespace, manifestWorkFinalizer) -} - -func (r *gcClusterRbacController) removeClusterRbac(ctx context.Context, clusterName string, accepted bool) error { - var errs []error - // Clean up managed cluster manifests - assetFn := helpers.ManagedClusterAssetFnWithAccepted(manifests.RBACManifests, clusterName, accepted) - resourceResults := resourceapply.DeleteAll(ctx, resourceapply.NewKubeClientHolder(r.kubeClient), - r.eventRecorder, assetFn, append(manifests.ClusterSpecificRBACFiles, manifests.ClusterSpecificRoleBindings...)...) - for _, result := range resourceResults { - if result.Error != nil { - errs = append(errs, fmt.Errorf("%q (%T): %v", result.File, result.Type, result.Error)) - } - } - return operatorhelpers.NewMultiLineAggregate(errs) -} - -func (r *gcClusterRbacController) removeFinalizerFromWorkRoleBinding(ctx context.Context, - clusterName, finalizer string) error { - workRoleBinding, err := r.roleBindingLister.RoleBindings(clusterName).Get(workRoleBindingName(clusterName)) - switch { - case errors.IsNotFound(err): - return nil - case err != nil: - return err - } - - roleBindingFinalizerPatcher := patcher.NewPatcher[*v1.RoleBinding, v1.RoleBinding, - v1.RoleBinding](r.kubeClient.RbacV1().RoleBindings(clusterName)) - return roleBindingFinalizerPatcher.RemoveFinalizer(ctx, workRoleBinding, finalizer) - -} diff --git a/pkg/registration/hub/gc/gc_cluster_rbac_test.go b/pkg/registration/hub/gc/gc_cluster_rbac_test.go deleted file mode 100644 index eba0d8fdc..000000000 --- a/pkg/registration/hub/gc/gc_cluster_rbac_test.go +++ /dev/null @@ -1,207 +0,0 @@ -package gc - -import ( - "context" - "testing" - "time" - - "github.com/openshift/library-go/pkg/operator/events" - "github.com/stretchr/testify/assert" - "k8s.io/apimachinery/pkg/runtime" - kubeinformers "k8s.io/client-go/informers" - fakeclient "k8s.io/client-go/kubernetes/fake" - clienttesting "k8s.io/client-go/testing" - - fakeclusterclient "open-cluster-management.io/api/client/cluster/clientset/versioned/fake" - clusterinformers "open-cluster-management.io/api/client/cluster/informers/externalversions" - fakeworkclient "open-cluster-management.io/api/client/work/clientset/versioned/fake" - workinformers "open-cluster-management.io/api/client/work/informers/externalversions" - clusterv1 "open-cluster-management.io/api/cluster/v1" - workv1 "open-cluster-management.io/api/work/v1" - "open-cluster-management.io/sdk-go/pkg/patcher" - - testingcommon "open-cluster-management.io/ocm/pkg/common/testing" - testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing" - "open-cluster-management.io/ocm/pkg/registration/register" -) - -func TestGCClusterRbacController(t *testing.T) { - cases := []struct { - name string - clusterName string - cluster *clusterv1.ManagedCluster - works []*workv1.ManifestWork - workRoleBinding runtime.Object - resourceCleanupFeatureGateEnable bool - expectedOp gcReconcileOp - validateActions func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) - }{ - { - name: "no cluster and work, do nothing", - expectedOp: gcReconcileStop, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertNoActions(t, kubeActions) - }, - }, - { - name: "cluster is nil and have works, Requeue", - clusterName: "cluster1", - works: []*workv1.ManifestWork{ - testinghelpers.NewManifestWork("cluster1", "test", nil, nil, nil, nil), - }, - expectedOp: gcReconcileRequeue, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertNoActions(t, kubeActions) - }, - }, - { - name: "cluster is nil and no works, remove finalizer from work rolebinding", - clusterName: "cluster1", - expectedOp: gcReconcileStop, - workRoleBinding: testinghelpers.NewRoleBinding("cluster1", - workRoleBindingName("cluster1"), []string{manifestWorkFinalizer}, - map[string]string{clusterv1.ClusterNameLabelKey: "cluster1"}, true), - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertActions(t, kubeActions, "patch") - }, - }, - { - name: "gc is disable, delete rbac and remove finalizer from cluster", - clusterName: testinghelpers.TestManagedClusterName, - cluster: testinghelpers.NewDeletingManagedCluster(), - resourceCleanupFeatureGateEnable: false, - expectedOp: gcReconcileStop, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertActions(t, kubeActions, "delete", "delete", "delete", "delete") - testingcommon.AssertActions(t, clusterActions, "patch") - }, - }, - { - name: "gc is enable with no work, delete rbac ", - clusterName: testinghelpers.TestManagedClusterName, - cluster: testinghelpers.NewDeletingManagedCluster(), - resourceCleanupFeatureGateEnable: true, - expectedOp: gcReconcileStop, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertActions(t, kubeActions, "delete", "delete", "delete", "delete") - }, - }, - { - name: "gc is disable with works", - clusterName: testinghelpers.TestManagedClusterName, - cluster: testinghelpers.NewDeletingManagedCluster(), - resourceCleanupFeatureGateEnable: false, - works: []*workv1.ManifestWork{ - testinghelpers.NewManifestWork(testinghelpers.TestManagedClusterName, "test", nil, nil, nil, nil), - }, - workRoleBinding: testinghelpers.NewRoleBinding(testinghelpers.TestManagedClusterName, - workRoleBindingName(testinghelpers.TestManagedClusterName), []string{manifestWorkFinalizer}, - map[string]string{clusterv1.ClusterNameLabelKey: testinghelpers.TestManagedClusterName}, true), - - expectedOp: gcReconcileRequeue, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertActions(t, kubeActions, "delete", "delete", "delete", "delete") - testingcommon.AssertActions(t, clusterActions, "patch") - }, - }, - { - name: "gc is disable with no works", - clusterName: testinghelpers.TestManagedClusterName, - cluster: testinghelpers.NewDeletingManagedCluster(), - resourceCleanupFeatureGateEnable: false, - workRoleBinding: testinghelpers.NewRoleBinding(testinghelpers.TestManagedClusterName, - workRoleBindingName(testinghelpers.TestManagedClusterName), []string{manifestWorkFinalizer}, - map[string]string{clusterv1.ClusterNameLabelKey: testinghelpers.TestManagedClusterName}, true), - - expectedOp: gcReconcileStop, - validateActions: func(t *testing.T, kubeActions, clusterActions []clienttesting.Action) { - testingcommon.AssertActions(t, kubeActions, "delete", "delete", "delete", "delete", "patch") - testingcommon.AssertActions(t, clusterActions, "patch") - }, - }, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - objs := []runtime.Object{} - - mclClusterRole := testinghelpers.NewClusterRole( - mclClusterRoleName(c.clusterName), - []string{}, map[string]string{clusterv1.ClusterNameLabelKey: ""}, false) - mclClusterRoleBinding := testinghelpers.NewClusterRoleBinding( - mclClusterRoleBindingName(c.clusterName), - []string{}, map[string]string{clusterv1.ClusterNameLabelKey: ""}, false) - - registrationRoleBinding := testinghelpers.NewRoleBinding(c.clusterName, - registrationRoleBindingName(c.clusterName), - []string{}, map[string]string{clusterv1.ClusterNameLabelKey: ""}, false) - objs = append(objs, mclClusterRole, mclClusterRoleBinding, registrationRoleBinding) - - if c.workRoleBinding != nil { - objs = append(objs, c.workRoleBinding) - } - kubeClient := fakeclient.NewSimpleClientset(objs...) - kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClient, time.Minute*10) - - roleBindingStore := kubeInformerFactory.Rbac().V1().RoleBindings().Informer().GetStore() - if c.workRoleBinding != nil { - if err := roleBindingStore.Add(c.workRoleBinding); err != nil { - t.Fatal(err) - } - } - - var clusterPatcher patcher.Patcher[*clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus] - var clusterClient *fakeclusterclient.Clientset - if c.cluster != nil { - clusterClient = fakeclusterclient.NewSimpleClientset(c.cluster) - clusterInformerFactory := clusterinformers.NewSharedInformerFactory(clusterClient, time.Minute*10) - clusterStore := clusterInformerFactory.Cluster().V1().ManagedClusters().Informer().GetStore() - if err := clusterStore.Add(c.cluster); err != nil { - t.Fatal(err) - } - - clusterPatcher = patcher.NewPatcher[ - *clusterv1.ManagedCluster, clusterv1.ManagedClusterSpec, clusterv1.ManagedClusterStatus]( - clusterClient.ClusterV1().ManagedClusters()) - } - - workClient := fakeworkclient.NewSimpleClientset() - workInformerFactory := workinformers.NewSharedInformerFactory(workClient, 5*time.Minute) - workStore := workInformerFactory.Work().V1().ManifestWorks().Informer().GetStore() - for _, work := range c.works { - if err := workStore.Add(work); err != nil { - t.Fatal(err) - } - } - - _ = newGCClusterRbacController( - kubeClient, - clusterPatcher, - kubeInformerFactory.Rbac().V1().ClusterRoles().Lister(), - kubeInformerFactory.Rbac().V1().RoleBindings().Lister(), - workInformerFactory.Work().V1().ManifestWorks().Lister(), - register.NewNoopApprover(), - events.NewInMemoryRecorder(""), - c.resourceCleanupFeatureGateEnable, - ) - - ctrl := &gcClusterRbacController{ - kubeClient: kubeClient, - clusterRoleLister: kubeInformerFactory.Rbac().V1().ClusterRoles().Lister(), - roleBindingLister: kubeInformerFactory.Rbac().V1().RoleBindings().Lister(), - manifestWorkLister: workInformerFactory.Work().V1().ManifestWorks().Lister(), - clusterPatcher: clusterPatcher, - approver: register.NewNoopApprover(), - eventRecorder: events.NewInMemoryRecorder(""), - resourceCleanupFeatureGateEnable: c.resourceCleanupFeatureGateEnable, - } - op, err := ctrl.reconcile(context.TODO(), c.cluster, c.clusterName) - testingcommon.AssertError(t, err, "") - assert.Equal(t, op, c.expectedOp) - if clusterClient != nil { - c.validateActions(t, kubeClient.Actions(), clusterClient.Actions()) - } else { - c.validateActions(t, kubeClient.Actions(), nil) - } - }) - } -} diff --git a/pkg/registration/hub/gc/gc_resources.go b/pkg/registration/hub/gc/gc_resources.go index c89d99299..d230eef78 100644 --- a/pkg/registration/hub/gc/gc_resources.go +++ b/pkg/registration/hub/gc/gc_resources.go @@ -4,8 +4,8 @@ import ( "context" "fmt" "strconv" + "time" - "github.com/openshift/library-go/pkg/operator/events" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -15,12 +15,13 @@ import ( "k8s.io/klog/v2" clusterv1 "open-cluster-management.io/api/cluster/v1" + + "open-cluster-management.io/ocm/pkg/common/helpers" ) type gcResourcesController struct { metadataClient metadata.Interface resourceGVRList []schema.GroupVersionResource - eventRecorder events.Recorder } // the range of cleanupPriority is [0,100]. @@ -34,20 +35,18 @@ const ( maxCleanupPriority cleanupPriority = 100 ) -func newGCResourcesController( - metadataClient metadata.Interface, - resourceList []schema.GroupVersionResource, - eventRecorder events.Recorder, -) gcReconciler { +var requeueError = helpers.NewRequeueError("gc requeue", 5*time.Second) + +func newGCResourcesController(metadataClient metadata.Interface, + resourceList []schema.GroupVersionResource) *gcResourcesController { return &gcResourcesController{ metadataClient: metadataClient, resourceGVRList: resourceList, - eventRecorder: eventRecorder.WithComponentSuffix("gc-resources"), } } func (r *gcResourcesController) reconcile(ctx context.Context, - cluster *clusterv1.ManagedCluster, clusterNamespace string) (gcReconcileOp, error) { + cluster *clusterv1.ManagedCluster, clusterNamespace string) error { var errs []error // delete the resources in order. to delete the next resource after all resource instances are deleted. for _, resourceGVR := range r.resourceGVRList { @@ -57,8 +56,7 @@ func (r *gcResourcesController) reconcile(ctx context.Context, continue } if err != nil { - return gcReconcileContinue, fmt.Errorf("failed to list resource %v. err:%v", - resourceGVR.Resource, err) + return fmt.Errorf("failed to list resource %v. err:%v", resourceGVR.Resource, err) } if len(resourceList.Items) == 0 { continue @@ -87,10 +85,9 @@ func (r *gcResourcesController) reconcile(ctx context.Context, } } if len(errs) != 0 { - return gcReconcileContinue, fmt.Errorf("failed to clean up %v. err:%v", - resourceGVR.Resource, utilerrors.NewAggregate(errs)) + return fmt.Errorf("failed to clean up %v. err:%v", resourceGVR.Resource, utilerrors.NewAggregate(errs)) } - return gcReconcileRequeue, nil + return requeueError } if cluster != nil { @@ -101,7 +98,7 @@ func (r *gcResourcesController) reconcile(ctx context.Context, Message: "No cleaned resource in cluster ns.", }) } - return gcReconcileContinue, nil + return nil } func mapPriorityResource(resourceList *metav1.PartialObjectMetadataList) map[cleanupPriority][]string { diff --git a/pkg/registration/hub/gc/gc_resources_test.go b/pkg/registration/hub/gc/gc_resources_test.go index 5c1bccffa..0d6b020aa 100644 --- a/pkg/registration/hub/gc/gc_resources_test.go +++ b/pkg/registration/hub/gc/gc_resources_test.go @@ -4,7 +4,6 @@ import ( "context" "testing" - "github.com/openshift/library-go/pkg/operator/events" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -23,11 +22,10 @@ import ( func TestGCResourcesController(t *testing.T) { cases := []struct { name string - clusterNamespace string cluster *clusterv1.ManagedCluster + clusterNamespace string objs []runtime.Object - expectedOp gcReconcileOp - expectedErr error + expectedError error validateActions func(t *testing.T, kubeActions []clienttesting.Action) }{ { @@ -35,19 +33,17 @@ func TestGCResourcesController(t *testing.T) { cluster: testinghelpers.NewDeletingManagedCluster(), clusterNamespace: testinghelpers.TestManagedClusterName, objs: []runtime.Object{newAddonMetadata(testinghelpers.TestManagedClusterName, "test", nil)}, - expectedOp: gcReconcileRequeue, - expectedErr: nil, + expectedError: requeueError, validateActions: func(t *testing.T, kubeActions []clienttesting.Action) { testingcommon.AssertActions(t, kubeActions, "list", "delete") }, }, { name: "delete work", - clusterNamespace: testinghelpers.TestManagedClusterName, cluster: testinghelpers.NewDeletingManagedCluster(), + clusterNamespace: testinghelpers.TestManagedClusterName, objs: []runtime.Object{newWorkMetadata(testinghelpers.TestManagedClusterName, "test", nil)}, - expectedOp: gcReconcileRequeue, - expectedErr: nil, + expectedError: requeueError, validateActions: func(t *testing.T, kubeActions []clienttesting.Action) { testingcommon.AssertActions(t, kubeActions, "list", "list", "delete") }, @@ -60,18 +56,15 @@ func TestGCResourcesController(t *testing.T) { _ = workv1.Install(scheme) _ = metav1.AddMetaToScheme(scheme) metadataClient := fakemetadataclient.NewSimpleMetadataClient(scheme, c.objs...) - _ = newGCResourcesController(metadataClient, []schema.GroupVersionResource{addonGvr, workGvr}, - events.NewInMemoryRecorder("")) + _ = newGCResourcesController(metadataClient, []schema.GroupVersionResource{addonGvr, workGvr}) ctrl := &gcResourcesController{ metadataClient: metadataClient, resourceGVRList: []schema.GroupVersionResource{addonGvr, workGvr}, - eventRecorder: events.NewInMemoryRecorder(""), } - op, err := ctrl.reconcile(context.TODO(), c.cluster, c.clusterNamespace) - assert.Equal(t, c.expectedErr, err) - assert.Equal(t, c.expectedOp, op) + err := ctrl.reconcile(context.TODO(), c.cluster, c.clusterNamespace) + assert.Equal(t, c.expectedError, err) c.validateActions(t, metadataClient.Actions()) }) } diff --git a/pkg/registration/hub/managedcluster/controller.go b/pkg/registration/hub/managedcluster/controller.go index c56fabddd..679a5f22a 100644 --- a/pkg/registration/hub/managedcluster/controller.go +++ b/pkg/registration/hub/managedcluster/controller.go @@ -10,22 +10,29 @@ import ( "github.com/openshift/library-go/pkg/operator/resource/resourceapply" operatorhelpers "github.com/openshift/library-go/pkg/operator/v1helpers" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" rbacv1informers "k8s.io/client-go/informers/rbac/v1" "k8s.io/client-go/kubernetes" + rbacv1listers "k8s.io/client-go/listers/rbac/v1" "k8s.io/klog/v2" clientset "open-cluster-management.io/api/client/cluster/clientset/versioned" informerv1 "open-cluster-management.io/api/client/cluster/informers/externalversions/cluster/v1" listerv1 "open-cluster-management.io/api/client/cluster/listers/cluster/v1" + workinformers "open-cluster-management.io/api/client/work/informers/externalversions/work/v1" + worklister "open-cluster-management.io/api/client/work/listers/work/v1" v1 "open-cluster-management.io/api/cluster/v1" ocmfeature "open-cluster-management.io/api/feature" + workv1 "open-cluster-management.io/api/work/v1" "open-cluster-management.io/sdk-go/pkg/patcher" "open-cluster-management.io/ocm/pkg/common/apply" + commonhelper "open-cluster-management.io/ocm/pkg/common/helpers" "open-cluster-management.io/ocm/pkg/common/queue" "open-cluster-management.io/ocm/pkg/features" "open-cluster-management.io/ocm/pkg/registration/helpers" @@ -37,16 +44,25 @@ import ( // expected to be changed or removed outside. const clusterAcceptedAnnotationKey = "open-cluster-management.io/automatically-accepted-on" +var ( + workRoleBindingName = func(clusterName string) string { + return fmt.Sprintf("open-cluster-management:managedcluster:%s:work", clusterName) + } + requeueError = commonhelper.NewRequeueError("cluster rbac cleanup requeue", 5*time.Second) +) + // managedClusterController reconciles instances of ManagedCluster on the hub. type managedClusterController struct { - kubeClient kubernetes.Interface - clusterClient clientset.Interface - clusterLister listerv1.ManagedClusterLister - applier *apply.PermissionApplier - patcher patcher.Patcher[*v1.ManagedCluster, v1.ManagedClusterSpec, v1.ManagedClusterStatus] - approver register.Approver - hubDriver register.HubDriver - eventRecorder events.Recorder + kubeClient kubernetes.Interface + clusterClient clientset.Interface + roleBindingLister rbacv1listers.RoleBindingLister + manifestWorkLister worklister.ManifestWorkLister + clusterLister listerv1.ManagedClusterLister + applier *apply.PermissionApplier + patcher patcher.Patcher[*v1.ManagedCluster, v1.ManagedClusterSpec, v1.ManagedClusterStatus] + approver register.Approver + hubDriver register.HubDriver + eventRecorder events.Recorder } // NewManagedClusterController creates a new managed cluster controller @@ -58,16 +74,19 @@ func NewManagedClusterController( clusterRoleInformer rbacv1informers.ClusterRoleInformer, rolebindingInformer rbacv1informers.RoleBindingInformer, clusterRoleBindingInformer rbacv1informers.ClusterRoleBindingInformer, + manifestWorkInformer workinformers.ManifestWorkInformer, approver register.Approver, hubDriver register.HubDriver, recorder events.Recorder) factory.Controller { c := &managedClusterController{ - kubeClient: kubeClient, - clusterClient: clusterClient, - clusterLister: clusterInformer.Lister(), - approver: approver, - hubDriver: hubDriver, + kubeClient: kubeClient, + clusterClient: clusterClient, + roleBindingLister: rolebindingInformer.Lister(), + manifestWorkLister: manifestWorkInformer.Lister(), + clusterLister: clusterInformer.Lister(), + hubDriver: hubDriver, + approver: approver, applier: apply.NewPermissionApplier( kubeClient, roleInformer.Lister(), @@ -99,8 +118,7 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn logger.V(4).Info("Reconciling ManagedCluster", "managedClusterName", managedClusterName) managedCluster, err := c.clusterLister.Get(managedClusterName) if errors.IsNotFound(err) { - // Spoke cluster not found, could have been deleted, do nothing. - return nil + return c.removeClusterRbac(ctx, managedClusterName, true) } if err != nil { return err @@ -109,8 +127,16 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn newManagedCluster := managedCluster.DeepCopy() if !managedCluster.DeletionTimestamp.IsZero() { - // the cleanup job is moved to gc controller - return nil + if err = c.approver.Cleanup(ctx, managedCluster); err != nil { + return err + } + + err = c.removeClusterRbac(ctx, managedClusterName, true) + if err != nil { + return err + } + + return c.patcher.RemoveFinalizer(ctx, newManagedCluster, v1.ManagedClusterFinalizer) } if features.HubMutableFeatureGate.Enabled(ocmfeature.ManagedClusterAutoApproval) { @@ -144,25 +170,6 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn return aggErr } - // Remove the cluster role binding files for registration-agent and work-agent. - removeResults := resourceapply.DeleteAll(ctx, - resourceapply.NewKubeClientHolder(c.kubeClient), - c.eventRecorder, - helpers.ManagedClusterAssetFn(manifests.RBACManifests, managedClusterName), - manifests.ClusterSpecificRoleBindings...) - for _, result := range removeResults { - if result.Error != nil { - errs = append(errs, fmt.Errorf("%q (%T): %v", result.File, result.Type, result.Error)) - } - } - if aggErr := operatorhelpers.NewMultiLineAggregate(errs); aggErr != nil { - return aggErr - } - - if err = c.approver.Cleanup(ctx, managedCluster); err != nil { - return err - } - meta.SetStatusCondition(&newManagedCluster.Status.Conditions, metav1.Condition{ Type: v1.ManagedClusterConditionHubAccepted, Status: metav1.ConditionFalse, @@ -173,7 +180,9 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn if _, err := c.patcher.PatchStatus(ctx, newManagedCluster, newManagedCluster.Status, managedCluster.Status); err != nil { return err } - return nil + + // Remove the cluster role binding files for registration-agent and work-agent. + return c.removeClusterRbac(ctx, managedClusterName, managedCluster.Spec.HubAcceptsClient) } // TODO consider to add the managedcluster-namespace.yaml back to staticFiles, @@ -198,6 +207,11 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn errs = append(errs, err) } + _, err = c.patcher.AddFinalizer(ctx, newManagedCluster, v1.ManagedClusterFinalizer) + if err != nil { + return err + } + resourceResults := c.applier.Apply(ctx, syncCtx.Recorder(), helpers.ManagedClusterAssetFnWithAccepted(manifests.RBACManifests, managedClusterName, managedCluster.Spec.HubAcceptsClient), append(manifests.ClusterSpecificRBACFiles, manifests.ClusterSpecificRoleBindings...)...) @@ -256,3 +270,48 @@ func (c *managedClusterController) acceptCluster(ctx context.Context, managedClu types.MergePatchType, []byte(patch), metav1.PatchOptions{}) return err } + +// remove the cluster rbac resources firstly. +// the work roleBinding with a finalizer remains because it is used by work agent to operator the works. +// the finalizer on work roleBinding will be removed after there is no works in the ns. +func (c *managedClusterController) removeClusterRbac(ctx context.Context, clusterName string, accepted bool) error { + var errs []error + assetFn := helpers.ManagedClusterAssetFnWithAccepted(manifests.RBACManifests, clusterName, accepted) + files := manifests.ClusterSpecificRoleBindings + if accepted { + files = append(files, manifests.ClusterSpecificRBACFiles...) + } + + resourceResults := resourceapply.DeleteAll(ctx, resourceapply.NewKubeClientHolder(c.kubeClient), + c.eventRecorder, assetFn, files...) + for _, result := range resourceResults { + if result.Error != nil { + errs = append(errs, fmt.Errorf("%q (%T): %v", result.File, result.Type, result.Error)) + } + } + + works, err := c.manifestWorkLister.ManifestWorks(clusterName).List(labels.Everything()) + if err != nil && !errors.IsNotFound(err) { + errs = append(errs, err) + return operatorhelpers.NewMultiLineAggregate(errs) + } + if len(works) != 0 { + return requeueError + } + + return c.removeFinalizerFromWorkRoleBinding(ctx, clusterName) +} + +func (c *managedClusterController) removeFinalizerFromWorkRoleBinding(ctx context.Context, clusterName string) error { + workRoleBinding, err := c.roleBindingLister.RoleBindings(clusterName).Get(workRoleBindingName(clusterName)) + switch { + case errors.IsNotFound(err): + return nil + case err != nil: + return err + } + + roleBindingFinalizerPatcher := patcher.NewPatcher[*rbacv1.RoleBinding, rbacv1.RoleBinding, + rbacv1.RoleBinding](c.kubeClient.RbacV1().RoleBindings(clusterName)) + return roleBindingFinalizerPatcher.RemoveFinalizer(ctx, workRoleBinding, workv1.ManifestWorkFinalizer) +} diff --git a/pkg/registration/hub/managedcluster/controller_test.go b/pkg/registration/hub/managedcluster/controller_test.go index cddf84d85..9ad89146f 100644 --- a/pkg/registration/hub/managedcluster/controller_test.go +++ b/pkg/registration/hub/managedcluster/controller_test.go @@ -8,6 +8,8 @@ import ( "time" "github.com/openshift/library-go/pkg/operator/events/eventstesting" + "github.com/pkg/errors" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" kubeinformers "k8s.io/client-go/informers" @@ -16,8 +18,11 @@ import ( clusterfake "open-cluster-management.io/api/client/cluster/clientset/versioned/fake" clusterinformers "open-cluster-management.io/api/client/cluster/informers/externalversions" + fakeworkclient "open-cluster-management.io/api/client/work/clientset/versioned/fake" + workinformers "open-cluster-management.io/api/client/work/informers/externalversions" v1 "open-cluster-management.io/api/cluster/v1" ocmfeature "open-cluster-management.io/api/feature" + workv1 "open-cluster-management.io/api/work/v1" "open-cluster-management.io/sdk-go/pkg/patcher" "open-cluster-management.io/ocm/pkg/common/apply" @@ -32,6 +37,8 @@ func TestSyncManagedCluster(t *testing.T) { cases := []struct { name string autoApprovalEnabled bool + roleBindings []runtime.Object + manifestWorks []runtime.Object startingObjects []runtime.Object validateClusterActions func(t *testing.T, actions []clienttesting.Action) validateKubeActions func(t *testing.T, actions []clienttesting.Action) @@ -43,7 +50,11 @@ func TestSyncManagedCluster(t *testing.T) { testingcommon.AssertNoActions(t, actions) }, validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { - testingcommon.AssertNoActions(t, actions) + testingcommon.AssertActions(t, actions, + "delete", // clusterrole + "delete", // clusterrolebinding + "delete", // registration rolebinding + "delete") // work rolebinding }, }, { @@ -127,14 +138,56 @@ func TestSyncManagedCluster(t *testing.T) { }, }, { - name: "delete a spoke cluster", + name: "delete a spoke cluster without manifestworks", + roleBindings: []runtime.Object{testinghelpers.NewRoleBinding(testinghelpers.TestManagedClusterName, + workRoleBindingName(testinghelpers.TestManagedClusterName), []string{workv1.ManifestWorkFinalizer}, + nil, false)}, startingObjects: []runtime.Object{testinghelpers.NewDeletingManagedCluster()}, validateClusterActions: func(t *testing.T, actions []clienttesting.Action) { - testingcommon.AssertNoActions(t, actions) + testingcommon.AssertActions(t, actions, "patch") + patch := actions[0].(clienttesting.PatchAction).GetPatch() + managedCluster := &v1.ManagedCluster{} + err := json.Unmarshal(patch, managedCluster) + if err != nil { + t.Fatal(err) + } + if len(managedCluster.Finalizers) != 0 { + t.Errorf("expected no finalizer") + } }, validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, + "delete", // clusterrole + "delete", // clusterrolebinding + "delete", // registration rolebinding + "delete", // work rolebinding + "patch") // work rolebinding + patch := actions[4].(clienttesting.PatchAction).GetPatch() + roleBinding := &rbacv1.RoleBinding{} + err := json.Unmarshal(patch, roleBinding) + if err != nil { + t.Fatal(err) + } + if len(roleBinding.Finalizers) != 0 { + t.Errorf("expected no finalizer") + } + }, + }, + { + name: "delete a spoke cluster with manifestworks", + startingObjects: []runtime.Object{testinghelpers.NewDeletingManagedCluster()}, + manifestWorks: []runtime.Object{testinghelpers.NewManifestWork(testinghelpers.TestManagedClusterName, + "test", nil, nil, nil, nil)}, + validateClusterActions: func(t *testing.T, actions []clienttesting.Action) { testingcommon.AssertNoActions(t, actions) }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, + "delete", // clusterrole + "delete", // clusterrolebinding + "delete", // registration rolebinding + "delete") // work rolebinding + }, }, { name: "should accept the clusters when auto approval is enabled", @@ -168,9 +221,17 @@ func TestSyncManagedCluster(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { clusterClient := clusterfake.NewSimpleClientset(c.startingObjects...) - kubeClient := kubefake.NewSimpleClientset() - clusterInformerFactory := clusterinformers.NewSharedInformerFactory(clusterClient, time.Minute*10) + kubeClient := kubefake.NewSimpleClientset(c.roleBindings...) + kubeInformer := kubeinformers.NewSharedInformerFactoryWithOptions(kubeClient, time.Minute*10) + roleBindingStore := kubeInformer.Rbac().V1().RoleBindings().Informer().GetStore() + for _, roleBinding := range c.roleBindings { + if err := roleBindingStore.Add(roleBinding); err != nil { + t.Fatal(err) + } + } + + clusterInformerFactory := clusterinformers.NewSharedInformerFactory(clusterClient, time.Minute*10) clusterStore := clusterInformerFactory.Cluster().V1().ManagedClusters().Informer().GetStore() for _, cluster := range c.startingObjects { if err := clusterStore.Add(cluster); err != nil { @@ -178,10 +239,21 @@ func TestSyncManagedCluster(t *testing.T) { } } + workClient := fakeworkclient.NewSimpleClientset(c.manifestWorks...) + workInformerFactory := workinformers.NewSharedInformerFactory(workClient, time.Minute*10) + workStore := workInformerFactory.Work().V1().ManifestWorks().Informer().GetStore() + for _, work := range c.manifestWorks { + if err := workStore.Add(work); err != nil { + t.Fatal(err) + } + } + features.HubMutableFeatureGate.Set(fmt.Sprintf("%s=%v", ocmfeature.ManagedClusterAutoApproval, c.autoApprovalEnabled)) ctrl := managedClusterController{ kubeClient, clusterClient, + kubeInformer.Rbac().V1().RoleBindings().Lister(), + workInformerFactory.Work().V1().ManifestWorks().Lister(), clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), apply.NewPermissionApplier( kubeClient, @@ -195,7 +267,7 @@ func TestSyncManagedCluster(t *testing.T) { csr.NewCSRHubDriver(), eventstesting.NewTestingEventRecorder(t)} syncErr := ctrl.sync(context.TODO(), testingcommon.NewFakeSyncContext(t, testinghelpers.TestManagedClusterName)) - if syncErr != nil { + if syncErr != nil && !errors.Is(syncErr, requeueError) { t.Errorf("unexpected err: %v", syncErr) } diff --git a/pkg/registration/hub/manager.go b/pkg/registration/hub/manager.go index bccef88df..f819d1bbd 100644 --- a/pkg/registration/hub/manager.go +++ b/pkg/registration/hub/manager.go @@ -186,6 +186,7 @@ func (m *HubManagerOptions) RunControllerManagerWithInformers( kubeInformers.Rbac().V1().ClusterRoles(), kubeInformers.Rbac().V1().RoleBindings(), kubeInformers.Rbac().V1().ClusterRoleBindings(), + workInformers.Work().V1().ManifestWorks(), approver, hubDriver, controllerContext.EventRecorder, @@ -296,17 +297,11 @@ func (m *HubManagerOptions) RunControllerManagerWithInformers( } gcController := gc.NewGCController( - kubeInformers.Rbac().V1().ClusterRoles().Lister(), - kubeInformers.Rbac().V1().RoleBindings().Lister(), clusterInformers.Cluster().V1().ManagedClusters(), - workInformers.Work().V1().ManifestWorks().Lister(), clusterClient, - kubeClient, metadataClient, - approver, controllerContext.EventRecorder, m.GCResourceList, - features.HubMutableFeatureGate.Enabled(ocmfeature.ResourceCleanup), ) go clusterInformers.Start(ctx.Done()) @@ -341,7 +336,9 @@ func (m *HubManagerOptions) RunControllerManagerWithInformers( go clusterImporter.Run(ctx, 1) } - go gcController.Run(ctx, 1) + if features.HubMutableFeatureGate.Enabled(ocmfeature.ResourceCleanup) { + go gcController.Run(ctx, 1) + } <-ctx.Done() return nil diff --git a/test/integration/registration/addon_lease_test.go b/test/integration/registration/addon_lease_test.go index 08eb415c4..936b84ad9 100644 --- a/test/integration/registration/addon_lease_test.go +++ b/test/integration/registration/addon_lease_test.go @@ -17,6 +17,7 @@ import ( addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/spoke" "open-cluster-management.io/ocm/test/integration/util" @@ -50,11 +51,8 @@ var _ = ginkgo.Describe("Addon Lease Resync", func() { if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false } diff --git a/test/integration/registration/addon_registration_test.go b/test/integration/registration/addon_registration_test.go index 756c4f04d..7c064ba59 100644 --- a/test/integration/registration/addon_registration_test.go +++ b/test/integration/registration/addon_registration_test.go @@ -20,6 +20,7 @@ import ( addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/register" "open-cluster-management.io/ocm/pkg/registration/register/csr" @@ -83,11 +84,8 @@ var _ = ginkgo.Describe("Addon Registration", func() { if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false } diff --git a/test/integration/registration/managedcluster_deletion_test.go b/test/integration/registration/managedcluster_deletion_test.go index ddd623f44..543932d61 100644 --- a/test/integration/registration/managedcluster_deletion_test.go +++ b/test/integration/registration/managedcluster_deletion_test.go @@ -13,6 +13,7 @@ import ( clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing" ) @@ -104,6 +105,22 @@ var _ = ginkgo.Describe("Cluster deleting", func() { return nil }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + // check finalizers are added + gomega.Eventually(func() error { + cluster, err := clusterClient.ClusterV1().ManagedClusters().Get(context.Background(), + managedCluster.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if !commonhelpers.HasFinalizer(cluster.Finalizers, clusterv1.ManagedClusterFinalizer) { + return fmt.Errorf("managedCluster.Finalizers does not contain api-resource-cleanup") + } + if !commonhelpers.HasFinalizer(cluster.Finalizers, commonhelpers.GcFinalizer) { + return fmt.Errorf("managedCluster.Finalizers does not contain resource-cleanup") + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + // delete cluster err = clusterClient.ClusterV1().ManagedClusters().Delete(context.Background(), managedCluster.Name, metav1.DeleteOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/test/integration/registration/spokeagent_rebootstrap_test.go b/test/integration/registration/spokeagent_rebootstrap_test.go index 27a1f4143..7c14aafb4 100644 --- a/test/integration/registration/spokeagent_rebootstrap_test.go +++ b/test/integration/registration/spokeagent_rebootstrap_test.go @@ -28,6 +28,7 @@ import ( clusterclientset "open-cluster-management.io/api/client/cluster/clientset/versioned" clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/hub" "open-cluster-management.io/ocm/pkg/registration/register" @@ -128,11 +129,8 @@ var _ = ginkgo.Describe("Rebootstrap", func() { if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false } diff --git a/test/integration/registration/spokeagent_switch_hub_test.go b/test/integration/registration/spokeagent_switch_hub_test.go index 4765a7668..5d415eaa2 100644 --- a/test/integration/registration/spokeagent_switch_hub_test.go +++ b/test/integration/registration/spokeagent_switch_hub_test.go @@ -25,6 +25,7 @@ import ( clusterv1 "open-cluster-management.io/api/cluster/v1" ocmfeature "open-cluster-management.io/api/feature" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/features" "open-cluster-management.io/ocm/pkg/registration/hub" @@ -255,11 +256,7 @@ func approveAndAcceptManagedCluster(managedClusterName string, if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false } diff --git a/test/integration/registration/spokecluster_claim_test.go b/test/integration/registration/spokecluster_claim_test.go index ae90156b4..30c169c86 100644 --- a/test/integration/registration/spokecluster_claim_test.go +++ b/test/integration/registration/spokecluster_claim_test.go @@ -16,6 +16,7 @@ import ( clusterv1 "open-cluster-management.io/api/cluster/v1" clusterv1alpha1 "open-cluster-management.io/api/cluster/v1alpha1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/spoke" "open-cluster-management.io/ocm/test/integration/util" @@ -89,11 +90,8 @@ var _ = ginkgo.Describe("Cluster Claim", func() { if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false } diff --git a/test/integration/registration/spokecluster_joining_test.go b/test/integration/registration/spokecluster_joining_test.go index 1758d3724..b5686acce 100644 --- a/test/integration/registration/spokecluster_joining_test.go +++ b/test/integration/registration/spokecluster_joining_test.go @@ -13,6 +13,7 @@ import ( clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/spoke" "open-cluster-management.io/ocm/test/integration/util" @@ -70,11 +71,8 @@ var _ = ginkgo.Describe("Joining Process", func() { if err != nil { return err } - if len(spokeCluster.Finalizers) != 1 { - return fmt.Errorf("cluster should have finalizer") - } - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return fmt.Errorf("finalizer is not correct") } diff --git a/test/integration/registration/spokecluster_status_test.go b/test/integration/registration/spokecluster_status_test.go index 66b6aa827..e7b9394df 100644 --- a/test/integration/registration/spokecluster_status_test.go +++ b/test/integration/registration/spokecluster_status_test.go @@ -11,6 +11,7 @@ import ( clusterv1 "open-cluster-management.io/api/cluster/v1" + commonhelpers "open-cluster-management.io/ocm/pkg/common/helpers" commonoptions "open-cluster-management.io/ocm/pkg/common/options" "open-cluster-management.io/ocm/pkg/registration/spoke" "open-cluster-management.io/ocm/test/integration/util" @@ -27,7 +28,7 @@ var _ = ginkgo.Describe("Collecting Node Resource", func() { gomega.Expect(err).NotTo(gomega.HaveOccurred()) managedClusterName := "resorucetest-managedcluster" - //#nosec G101 + // #nosec G101 hubKubeconfigSecret := "resorucetest-hub-kubeconfig-secret" hubKubeconfigDir := path.Join(util.TestDir, "resorucetest", "hub-kubeconfig") @@ -65,11 +66,7 @@ var _ = ginkgo.Describe("Collecting Node Resource", func() { if err != nil { return false } - if len(spokeCluster.Finalizers) != 1 { - return false - } - - if spokeCluster.Finalizers[0] != clusterv1.ManagedClusterFinalizer { + if !commonhelpers.HasFinalizer(spokeCluster.Finalizers, clusterv1.ManagedClusterFinalizer) { return false }