Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] : redis cluster status after scale up #682

Closed
wants to merge 13 commits into from
Closed
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
testpath:
- ./tests/e2e/v1beta2/setup
- ./tests/e2e/v1beta2/ignore-annots
- ./tests/e2e/v1beta2/scaling

steps:
- name: Checkout code
Expand Down
10 changes: 10 additions & 0 deletions api/status/redis-cluster_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,22 @@ const (
InitializingClusterLeaderReason string = "RedisCluster is initializing leaders"
InitializingClusterFollowerReason string = "RedisCluster is initializing followers"
BootstrapClusterReason string = "RedisCluster is bootstrapping"
ScalingUpLeaderClusterReason string = "RedisCluster is scaling up leaders"
ScalingUpFollowerClusterReason string = "RedisCluster is scaling up followers"
ScalingDownLeaderClusterReason string = "RedisCluster is scaling down leaders"
ScalingDownFollowerClusterReason string = "RedisCluster is scaling down followers"
ReshardingClusterReason string = "RedisCluster is resharding"
RebalanceClusterReason string = "RedisCluster is rebalancing"
)

// Status Field of the Redis Cluster
const (
RedisClusterReady RedisClusterState = "Ready"
RedisClusterInitializing RedisClusterState = "Initializing"
RedisClusterBootstrap RedisClusterState = "Bootstrap"
RedisClusterScalingUp RedisClusterState = "ScalingUp"
RedisClusterScalingDown RedisClusterState = "ScalingDown"
RedisClusterResharding RedisClusterState = "Resharding"
RedisClusterRebalancing RedisClusterState = "Rebalancing"
// RedisClusterFailed RedisClusterState = "Failed"
)
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ spec:
value: "false"
args:
- --leader-elect
- -zap-log-level=info
- -zap-log-level=debug
- -enable-webhooks=false
image: controller
imagePullPolicy: Never
Expand Down
66 changes: 45 additions & 21 deletions controllers/rediscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request

// Check if the cluster is downscaled
if leaderReplicas < instance.Status.ReadyLeaderReplicas {

leaderStatusCount := instance.Status.ReadyLeaderReplicas
followerStatusCount := instance.Status.ReadyFollowerReplicas
// Imp if the last index of leader sts is not leader make it then
// check whether the redis is leader or not ?
// if not true then make it leader pod
Expand All @@ -84,12 +85,20 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
}

// Step 1 Rehard the Cluster
// Change the status of the cluster to resharding don't update the leader and follower count
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterResharding, status.ReshardingClusterReason, leaderStatusCount, followerStatusCount)
k8sutils.ReshardRedisCluster(instance)
// Step 2 Remove the Follower Node
// Change the status of the cluster to scaling down follower update follower count
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterScalingDown, status.ScalingDownFollowerClusterReason, leaderStatusCount, followerStatusCount-1)
k8sutils.RemoveRedisFollowerNodesFromCluster(ctx, instance)
// Step 3 Remove the Leader Node
// Change the status of the cluster to scaling down leader update leader count
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterScalingDown, status.ScalingDownLeaderClusterReason, leaderStatusCount-1, followerStatusCount-1)
k8sutils.RemoveRedisNodeFromCluster(ctx, instance)
// Step 4 Rebalance the cluster
// Change the status of the cluster to rebalancing
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterRebalancing, status.RebalanceClusterReason, leaderStatusCount-1, followerStatusCount-1)
k8sutils.RebalanceRedisCluster(instance)
return ctrl.Result{RequeueAfter: time.Second * 100}, nil
}
Expand Down Expand Up @@ -120,7 +129,10 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request

redisLeaderInfo, err := k8sutils.GetStatefulSet(instance.Namespace, instance.ObjectMeta.Name+"-leader")
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 60}, err
if errors.IsNotFound(err) {
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}
return ctrl.Result{}, err
}

if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas {
Expand Down Expand Up @@ -151,7 +163,10 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
}
redisFollowerInfo, err := k8sutils.GetStatefulSet(instance.Namespace, instance.ObjectMeta.Name+"-follower")
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 60}, err
if errors.IsNotFound(err) {
return ctrl.Result{RequeueAfter: time.Second * 60}, nil
}
return ctrl.Result{}, err
}

if leaderReplicas == 0 {
Expand All @@ -165,12 +180,10 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
}

// Mark the cluster status as bootstrapping if all the leader and follower nodes are ready
if int32(redisLeaderInfo.Status.ReadyReplicas) == leaderReplicas && int32(redisFollowerInfo.Status.ReadyReplicas) == followerReplicas {
if instance.Status.ReadyLeaderReplicas == leaderReplicas && instance.Status.ReadyFollowerReplicas == 0 {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
if instance.Status.ReadyLeaderReplicas != leaderReplicas || instance.Status.ReadyFollowerReplicas != followerReplicas {
err = k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterBootstrap, status.BootstrapClusterReason, leaderReplicas, followerReplicas)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
}

Expand All @@ -183,33 +196,44 @@ func (r *RedisClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
k8sutils.ExecuteRedisClusterCommand(ctx, instance)
} else {
if leaderCount < leaderReplicas {
leaderStatusCount := instance.Status.ReadyLeaderReplicas
followerStatusCount := instance.Status.ReadyFollowerReplicas
// Scale up the cluster
// Step 2 : Add Redis Node
// Step 1 : Add Redis Leader Node; Change the status of the cluster to scaling up leader; don't update the leader and follower count
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterScalingUp, status.ScalingUpLeaderClusterReason, leaderStatusCount, followerStatusCount)
k8sutils.AddRedisNodeToCluster(ctx, instance)
// Step 3 Rebalance the cluster using the empty masters
// Step 2 : Rebalance the cluster using the empty masters; Change the status of the cluster to rebalancing; update the leader count only
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterRebalancing, status.RebalanceClusterReason, leaderStatusCount+1, followerStatusCount)
k8sutils.RebalanceRedisClusterEmptyMasters(instance)
}
}
} else {
if followerReplicas > 0 && redisFollowerInfo.Status.ReadyReplicas == followerReplicas {
// Add the follower nodes to the cluster
// Step 1 : Add Redis Follower Nodes; Change the status of the cluster to scaling up follower; don't update the leader and follower count
// Step 2 : Update the follower count after adding the follower nodes
leaderStatusCount := instance.Status.ReadyLeaderReplicas
followerStatusCount := instance.Status.ReadyFollowerReplicas
reqLogger.Info("All leader are part of the cluster, adding follower/replicas", "Leaders.Count", leaderCount, "Instance.Size", leaderReplicas, "Follower.Replicas", followerReplicas)
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterScalingUp, status.ScalingUpFollowerClusterReason, leaderStatusCount, followerStatusCount)
k8sutils.ExecuteRedisReplicationCommand(ctx, instance)
k8sutils.UpdateRedisClusterStatus(instance, status.RedisClusterRebalancing, status.BootstrapClusterReason, leaderStatusCount, followerReplicas)
} else {
reqLogger.Info("no follower/replicas configured, skipping replication configuration", "Leaders.Count", leaderCount, "Leader.Size", leaderReplicas, "Follower.Replicas", followerReplicas)
}
}
} else {
reqLogger.Info("Redis leader count is desired")
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
}
return ctrl.Result{RequeueAfter: time.Second * 120}, nil
reqLogger.Info("Cluster is bootstrapping, will reconcile redis cluster operator in again 10 seconds")
return ctrl.Result{RequeueAfter: time.Second * 10}, nil
}

reqLogger.Info("The total replica count of the Redis cluster is desired", "Total.Replicas", totalReplicas)
if int(totalReplicas) > 1 && k8sutils.CheckRedisClusterState(ctx, instance) >= int(totalReplicas)-1 {
reqLogger.Info("Redis leader is not desired, executing failover operation")
err = k8sutils.ExecuteFailoverOperation(ctx, instance)
if err != nil {
return ctrl.Result{RequeueAfter: time.Second * 10}, err
}
}
// Check If there is No Empty Master Node
if k8sutils.CheckRedisNodeCount(ctx, instance, "") == totalReplicas {
k8sutils.CheckIfEmptyMasters(ctx, instance)
Expand Down
2 changes: 1 addition & 1 deletion k8sutils/cluster-scaling.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func getRedisClusterSlots(ctx context.Context, cr *redisv1beta2.RedisCluster, no
}
}

logger.V(1).Info("Total cluster slots to be transferred from", "node", nodeID, "is", totalSlots)
logger.V(1).Info("Got total slots", "totalSlots", totalSlots, "node", nodeID)

return strconv.Itoa(totalSlots)
}
Expand Down
2 changes: 1 addition & 1 deletion k8sutils/poddisruption.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func GetPodDisruptionBudget(namespace string, pdb string) (*policyv1.PodDisrupti
}
pdbInfo, err := generateK8sClient().PolicyV1().PodDisruptionBudgets(namespace).Get(context.TODO(), pdb, getOpts)
if err != nil {
logger.Info("Redis PodDisruptionBudget get action failed")
logger.V(1).Info("Redis PodDisruptionBudget get action failed")
return nil, err
}
logger.V(1).Info("Redis PodDisruptionBudget get action was successful")
Expand Down
1 change: 0 additions & 1 deletion k8sutils/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ func getExternalConfig(configMapName string) []corev1.Volume {
// createPVCTemplate will create the persistent volume claim template
func createPVCTemplate(volumeName string, stsMeta metav1.ObjectMeta, storageSpec corev1.PersistentVolumeClaim) corev1.PersistentVolumeClaim {
pvcTemplate := storageSpec
pvcTemplate.CreationTimestamp = metav1.Time{}
pvcTemplate.Name = volumeName
pvcTemplate.Labels = stsMeta.GetLabels()
// We want the same annoations as the StatefulSet here
Expand Down
1 change: 1 addition & 0 deletions tests/_config/kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ timeout: 300
testDirs:
- tests/e2e/v1beta2/setup
- tests/e2e/v1beta2/ignore-annots
- tests/e2e/v1beta2/scaling
suppress :
- events
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/00-install.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/01-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster-scale-up.yaml
assert :
- ready-cluster-scale-up.yaml
- ready-sts-scale-up.yaml
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/02-scale-down.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: kuttl.dev/v1beta1
kind: TestStep
apply :
- cluster.yaml
assert :
- ready-cluster.yaml
- ready-sts.yaml
51 changes: 51 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 3
redisLeader:
replicas: 6
redisFollower:
replicas: 6
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
47 changes: 47 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
spec:
clusterSize: 3
clusterVersion: v7
persistenceEnabled: true
podSecurityContext:
runAsUser: 1000
fsGroup: 1000
kubernetesConfig:
image: quay.io/opstree/redis:latest
imagePullPolicy: Always
resources:
requests:
cpu: 101m
memory: 128Mi
limits:
cpu: 101m
memory: 128Mi
redisExporter:
enabled: true
image: quay.io/opstree/redis-exporter:v1.44.0
imagePullPolicy: Always
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 100m
memory: 128Mi
storage:
volumeClaimTemplate:
spec:
# storageClassName: standard
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
nodeConfVolume: true
nodeConfVolumeClaimTemplate:
spec:
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
readyLeaderReplicas: 6
readyFollowerReplicas: 6
7 changes: 7 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: redis.redis.opstreelabs.in/v1beta2
kind: RedisCluster
metadata:
name: redis-cluster-v1beta2
status:
readyLeaderReplicas: 3
readyFollowerReplicas: 3
23 changes: 23 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-sts-scale-up.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-leader
labels:
app: redis-cluster-v1beta2-leader
redis_setup_type: cluster
role: leader
status:
replicas: 6
readyReplicas: 6
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-follower
labels:
app: redis-cluster-v1beta2-follower
redis_setup_type: cluster
role: follower
status:
replicas: 6
readyReplicas: 6
25 changes: 25 additions & 0 deletions tests/e2e/v1beta2/scaling/redis-cluster/ready-sts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-leader
labels:
app: redis-cluster-v1beta2-leader
redis_setup_type: cluster
role: leader
status:
replicas: 3
readyReplicas: 3

---

apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster-v1beta2-follower
labels:
app: redis-cluster-v1beta2-follower
redis_setup_type: cluster
role: follower
status:
replicas: 3
readyReplicas: 3
Loading