diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 51cc31b9..eab0ff19 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -79,7 +79,7 @@ jobs: strategy: max-parallel: 2 matrix: - test-name: [operations, sidecars, scaling, multi-dcs, backup-restore, nodetool] + test-name: [operations, sidecars, scaling, multi-dcs, backup-restore, nodetool, storage-upsize] steps: - id: lower-repo shell: pwsh @@ -105,13 +105,65 @@ jobs: ref: ${{ steps.get-branch.outputs.branch }} - uses: rinx/setup-k3d@v0.0.4 + if: matrix.test-name != 'storage-upsize' with: version: v5.4.0 options: --image rancher/k3s:v1.24.13-k3s1 - - uses: azure/setup-helm@v1 + - name: Create kind config with LVM mounts + if: matrix.test-name == 'storage-upsize' + run: | + mkdir -p kindAndLvm + cat < kindAndLvm/kind-lvm-config.yaml + kind: Cluster + apiVersion: kind.x-k8s.io/v1alpha4 + nodes: + - role: control-plane + - role: worker + extraMounts: + - hostPath: /tmp/kind-worker1-lvm + containerPath: /mnt/disks + - role: worker + extraMounts: + - hostPath: /tmp/kind-worker2-lvm + containerPath: /mnt/disks + EOF + + mkdir -p /tmp/kind-worker1-lvm /tmp/kind-worker2-lvm + + - name: Setup kind cluster + if: matrix.test-name == 'storage-upsize' + uses: helm/kind-action@v1.8.0 with: - version: v3.8.1 + cluster_name: lvm-test + config: kindAndLvm/kind-lvm-config.yaml + wait: 300s + + - name: Setup LVM on worker nodes + if: matrix.test-name == 'storage-upsize' + run: | + WORKER_NODES=$(kubectl get nodes -o name | grep worker | sed 's|node/||') + + for NODE in $WORKER_NODES; do + echo "Setting up LVM on $NODE..." + docker exec $NODE bash -c ' + apt-get update -qq && apt-get install -y -qq lvm2 thin-provisioning-tools > /dev/null 2>&1 + mkdir -p /mnt/disks + truncate -s 10G /mnt/disks/disk.img + LOOP_DEVICE=$(losetup -f) + losetup $LOOP_DEVICE /mnt/disks/disk.img + pvcreate $LOOP_DEVICE + vgcreate lvmvg $LOOP_DEVICE + vgs + ' + done + + - name: Install OpenEBS LVM LocalPV + if: matrix.test-name == 'storage-upsize' + run: | + kubectl apply -f https://openebs.github.io/charts/lvm-operator.yaml + kubectl wait --for=condition=ready pod -l app=openebs-lvm-controller -n kube-system --timeout=300s + kubectl rollout status daemonset/openebs-lvm-node -n kube-system --timeout=300s - name: Restore Kuttl id: cache-kuttl diff --git a/api/v2/cassandracluster_types.go b/api/v2/cassandracluster_types.go index 44b68fda..2d0286c3 100644 --- a/api/v2/cassandracluster_types.go +++ b/api/v2/cassandracluster_types.go @@ -70,6 +70,8 @@ var ( ActionCorrectCRDConfig = ClusterStateInfo{11, "CorrectCRDConfig"} //The Operator has correct a bad CRD configuration + ActionStorageUpsize = ClusterStateInfo{12, "StorageUpsize"} + regexDCRackName = regexp.MustCompile("^[a-z]([-a-z0-9]*[a-z0-9])?$") ) @@ -232,6 +234,10 @@ func (cc *CassandraCluster) GetDCName(dc int) string { return cc.Spec.Topology.DC[dc].Name } +func (cc *CassandraCluster) GetDCNameStrongType(dc int) DcName { + return DcName(cc.GetDCName(dc)) +} + func (cc *CassandraCluster) getDCNodesPerRacksFromIndex(dc int) int32 { if dc >= cc.GetDCSize() { return cc.Spec.NodesPerRacks @@ -462,6 +468,11 @@ func (cc *CassandraCluster) GetDataCapacityForDC(dcName string) string { return cc.GetDataCapacityFromDCName(dcName) } +// GetDataCapacityForDC sends back the data capacity of cassandra nodes for the given strongly-typed dcName +func (cc *CassandraCluster) GetDataCapacityForDCName(dcName DcName) string { + return cc.GetDataCapacityFromDCName(dcName.String()) +} + // GetDataCapacityFromDCName send DataCapacity used for the given dcName func (cc *CassandraCluster) GetDataCapacityFromDCName(dcName string) string { dcIndex := cc.GetDCIndexFromDCName(dcName) @@ -480,6 +491,11 @@ func (cc *CassandraCluster) GetDataStorageClassForDC(dcName string) string { return cc.GetDataStorageClassFromDCName(dcName) } +// GetDataStorageClassForDCName send DataStorageClass used for the given strongly-typed dcName +func (cc *CassandraCluster) GetDataStorageClassForDCName(dcName DcName) string { + return cc.GetDataStorageClassFromDCName(dcName.String()) +} + // GetDataCapacityFromDCName send DataStorageClass used for the given dcName func (cc *CassandraCluster) GetDataStorageClassFromDCName(dcName string) string { dcIndex := cc.GetDCIndexFromDCName(dcName) @@ -539,6 +555,11 @@ func (cc *CassandraCluster) GetNodesPerRacks(dcRackName string) int32 { return nodesPerRacks } +// GetNodesPerRacks sends back the number of cassandra nodes to uses for this strongly-typed dc-rack +func (cc *CassandraCluster) GetNodesPerRacksStrongType(dcRackName DcRackName) int32 { + return cc.GetNodesPerRacks(dcRackName.String()) +} + // GetDCNodesPerRacksFromDCRackName send NodesPerRack used for the given dcRackName func (cc *CassandraCluster) GetDCRackNames() []string { dcsize := cc.GetDCSize() @@ -931,6 +952,11 @@ type BackRestSidecar struct { VolumeMounts []v1.VolumeMount `json:"volumeMount,omitempty"` } +// GetCassandraRackStatus returns CassandraRackStatus for a given strongly-typed dcRack +func (in *CassandraClusterStatus) GetCassandraRackStatus(dcRackName DcRackName) *CassandraRackStatus { + return in.CassandraRackStatus[dcRackName.String()] +} + // CassandraRackStatus defines states of Cassandra for 1 rack (1 statefulset) type CassandraRackStatus struct { // Phase indicates the state this Cassandra cluster jumps in. @@ -943,6 +969,10 @@ type CassandraRackStatus struct { // PodLastOperation manage status for Pod Operation (nodetool cleanup, upgradesstables..) PodLastOperation PodLastOperation `json:"podLastOperation,omitempty"` + + // StatefulSetSnapshotBeforeStorageResize is the StatefulSet snapshot taken before storage resize + // The purpose is to isolate the storage resize operation from other operations + StatefulSetSnapshotBeforeStorageResize string `json:"statefulSetSnapshotBeforeStorageResize,omitempty"` } // CassandraClusterStatus defines Global state of CassandraCluster diff --git a/api/v2/names.go b/api/v2/names.go new file mode 100644 index 00000000..3b54755e --- /dev/null +++ b/api/v2/names.go @@ -0,0 +1,25 @@ +package v2 + +type DcName string + +func (n DcName) String() string { + return string(n) +} + +type RackName string + +func (n RackName) String() string { + return string(n) +} + +type DcRackName string + +func (n DcRackName) String() string { + return string(n) +} + +type CompleteRackName struct { + DcName DcName + RackName RackName + DcRackName DcRackName +} diff --git a/api/v2/zz_generated.deepcopy.go b/api/v2/zz_generated.deepcopy.go index d115b9fa..65e6c4a5 100644 --- a/api/v2/zz_generated.deepcopy.go +++ b/api/v2/zz_generated.deepcopy.go @@ -537,6 +537,21 @@ func (in *ClusterStateInfo) DeepCopy() *ClusterStateInfo { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CompleteRackName) DeepCopyInto(out *CompleteRackName) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CompleteRackName. +func (in *CompleteRackName) DeepCopy() *CompleteRackName { + if in == nil { + return nil + } + out := new(CompleteRackName) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DC) DeepCopyInto(out *DC) { *out = *in diff --git a/charts/casskop/crds/db.orange.com_cassandraclusters.yaml b/charts/casskop/crds/db.orange.com_cassandraclusters.yaml index feb039a2..491a37d2 100644 --- a/charts/casskop/crds/db.orange.com_cassandraclusters.yaml +++ b/charts/casskop/crds/db.orange.com_cassandraclusters.yaml @@ -2308,6 +2308,11 @@ spec: format: date-time status: type: string + statefulSetSnapshotBeforeStorageResize: + description: |- + StatefulSetSnapshotBeforeStorageResize is the StatefulSet snapshot taken before storage resize + The purpose is to isolate the storage resize operation from other operations + type: string lastClusterAction: description: Store last action at cluster level type: string diff --git a/charts/multi-casskop/crds/db.orange.com_cassandraclusters.yaml b/charts/multi-casskop/crds/db.orange.com_cassandraclusters.yaml index feb039a2..491a37d2 100644 --- a/charts/multi-casskop/crds/db.orange.com_cassandraclusters.yaml +++ b/charts/multi-casskop/crds/db.orange.com_cassandraclusters.yaml @@ -2308,6 +2308,11 @@ spec: format: date-time status: type: string + statefulSetSnapshotBeforeStorageResize: + description: |- + StatefulSetSnapshotBeforeStorageResize is the StatefulSet snapshot taken before storage resize + The purpose is to isolate the storage resize operation from other operations + type: string lastClusterAction: description: Store last action at cluster level type: string diff --git a/config/crd/bases/db.orange.com_cassandraclusters.yaml b/config/crd/bases/db.orange.com_cassandraclusters.yaml index feb039a2..491a37d2 100644 --- a/config/crd/bases/db.orange.com_cassandraclusters.yaml +++ b/config/crd/bases/db.orange.com_cassandraclusters.yaml @@ -2308,6 +2308,11 @@ spec: format: date-time status: type: string + statefulSetSnapshotBeforeStorageResize: + description: |- + StatefulSetSnapshotBeforeStorageResize is the StatefulSet snapshot taken before storage resize + The purpose is to isolate the storage resize operation from other operations + type: string lastClusterAction: description: Store last action at cluster level type: string diff --git a/controllers/cassandracluster/cassandra_status.go b/controllers/cassandracluster/cassandra_status.go index 01a9b240..0bcb6bd7 100644 --- a/controllers/cassandracluster/cassandra_status.go +++ b/controllers/cassandracluster/cassandra_status.go @@ -17,13 +17,16 @@ package cassandracluster import ( "context" "fmt" - "github.com/r3labs/diff" "reflect" "strconv" "time" api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/cassandrapod" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/cscetbon/casskop/pkg/k8s" + "github.com/r3labs/diff" "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -69,36 +72,35 @@ func (rcc *CassandraClusterReconciler) updateCassandraStatus(ctx context.Context // getNextCassandraClusterStatus goal is to detect some changes in the status between cassandracluster and its statefulset // We follow only one change at a Time : so this function will return on the first change found func (rcc *CassandraClusterReconciler) getNextCassandraClusterStatus(ctx context.Context, cc *api.CassandraCluster, dc, rack int, - dcName, rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) error { + completeDcRackName api.CompleteRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) error { //UpdateStatusIfUpdateResources(cc, dcRackName, storedStatefulSet, status) - dcRackName := cc.GetDCRackName(dcName, rackName) - if needToWaitDelayBeforeCheck(cc, dcRackName, storedStatefulSet, status) { + if needToWaitDelayBeforeCheck(cc, completeDcRackName.DcRackName, status) { return nil } - if rcc.UpdateStatusIfActionEnded(ctx, cc, dcName, rackName, storedStatefulSet, status) { + if rcc.UpdateStatusIfActionEnded(ctx, cc, completeDcRackName, storedStatefulSet, status) { return nil } //If we set up UnlockNextOperation in CRD we allow to see mode change even last operation didn't ended correctly unlockNextOperation := false if cc.Spec.UnlockNextOperation && - rcc.hasUnschedulablePod(ctx, cc.Namespace, dcName, rackName) { + rcc.hasUnschedulablePod(ctx, completeDcRackName) { unlockNextOperation = true } //Do nothing in Initial phase except if we force it - if status.CassandraRackStatus[dcRackName].Phase == api.ClusterPhaseInitial.Name { + if status.GetCassandraRackStatus(completeDcRackName.DcRackName).Phase == api.ClusterPhaseInitial.Name { if !unlockNextOperation { ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) return nil } - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name + status.GetCassandraRackStatus(completeDcRackName.DcRackName).Phase = api.ClusterPhasePending.Name ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) } - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(completeDcRackName.DcRackName).CassandraLastAction // Do not check for new action if there is one ongoing or planed // Check to discover new changes are not done if action.status is Ongoing or ToDo/Finalizing @@ -115,35 +117,40 @@ func (rcc *CassandraClusterReconciler) getNextCassandraClusterStatus(ctx context lastAction.Status != api.StatusFinalizing) { // Update Status if ConfigMap Has Changed - if UpdateStatusIfconfigMapHasChanged(cc, dcRackName, storedStatefulSet, status) { + if UpdateStatusIfconfigMapHasChanged(cc, completeDcRackName.DcRackName, storedStatefulSet, status) { return nil } // Update Status if ConfigMap Has Changed - if UpdateStatusIfDockerImageHasChanged(cc, dcRackName, storedStatefulSet, status) { + if UpdateStatusIfDockerImageHasChanged(cc, completeDcRackName.DcRackName, storedStatefulSet, status) { + return nil + } + + rcc.storedStatefulSet = storedStatefulSet + if rcc.UpdateStatusIfStorageUpsize(completeDcRackName, status) { return nil } // Update Status if There is a ScaleUp or ScaleDown - if UpdateStatusIfScaling(cc, dcRackName, storedStatefulSet, status) { + if UpdateStatusIfScaling(cc, completeDcRackName.DcRackName, storedStatefulSet, status) { return nil } // Update Status if Topology for SeedList has changed - if UpdateStatusIfSeedListHasChanged(cc, dcRackName, storedStatefulSet, status) { + if UpdateStatusIfSeedListHasChanged(cc, completeDcRackName.DcRackName, storedStatefulSet, status) { return nil } - if UpdateStatusIfRollingRestart(cc, dc, rack, dcRackName, status) { + if UpdateStatusIfRollingRestart(cc, dc, rack, completeDcRackName.DcRackName, status) { return nil } - if UpdateStatusIfStatefulSetChanged(dcRackName, storedStatefulSet, status) { + if UpdateStatusIfStatefulSetChanged(completeDcRackName.DcRackName, storedStatefulSet, status) { return nil } } else { logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "dc-rack": dcRackName}).Info("We don't check for new action before the cluster become stable again") + "dc-rack": completeDcRackName.DcRackName}).Info("We don't check for new action before the cluster become stable again") } if lastAction.Status == api.StatusToDo && lastAction.Name == api.ActionUpdateResources.Name { @@ -159,9 +166,8 @@ func (rcc *CassandraClusterReconciler) getNextCassandraClusterStatus(ctx context // that means the last operation was started only a few seconds ago and checking now would not make any sense // this is mostly to give cassandra and the operator enough time to correctly stage the action // DefaultDelayWait is of 2 minutes -func needToWaitDelayBeforeCheck(cc *api.CassandraCluster, dcRackName string, storedStatefulSet *appsv1.StatefulSet, - status *api.CassandraClusterStatus) bool { - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction +func needToWaitDelayBeforeCheck(cc *api.CassandraCluster, dcRackName api.DcRackName, status *api.CassandraClusterStatus) bool { + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction if lastAction.StartTime != nil { t := *lastAction.StartTime @@ -189,7 +195,7 @@ func defaultDelayWait() time.Duration { // UpdateStatusIfconfigMapHasChanged updates CassandraCluster Action Status if it detect a changes : // - a new configmapName in the CRD // - or the add or remoove of the configmap in the CRD -func UpdateStatusIfconfigMapHasChanged(cc *api.CassandraCluster, dcRackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { +func UpdateStatusIfconfigMapHasChanged(cc *api.CassandraCluster, dcRackName api.DcRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { var updateConfigMap bool = false @@ -220,7 +226,7 @@ func UpdateStatusIfconfigMapHasChanged(cc *api.CassandraCluster, dcRackName stri } if updateConfigMap { - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction lastAction.Status = api.StatusToDo lastAction.Name = api.ActionUpdateConfigMap.Name ClusterActionMetric.set(api.ActionUpdateConfigMap, cc.Name) @@ -232,17 +238,17 @@ func UpdateStatusIfconfigMapHasChanged(cc *api.CassandraCluster, dcRackName stri } // UpdateStatusIfDockerImageHasChanged updates CassandraCluster Action Status if it detect a changes in the DockerImage: -func UpdateStatusIfDockerImageHasChanged(cc *api.CassandraCluster, dcRackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { +func UpdateStatusIfDockerImageHasChanged(cc *api.CassandraCluster, dcRackName api.DcRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { desiredDockerImage := cc.Spec.CassandraImage //This needs to be refactor if we load more than 1 container if storedStatefulSet.Spec.Template.Spec.Containers != nil { for _, container := range storedStatefulSet.Spec.Template.Spec.Containers { - if container.Name == cassandraContainerName && desiredDockerImage != container.Image { + if container.Name == consts.CassandraContainerName && desiredDockerImage != container.Image { { logrus.Infof("[%s][%s]: We ask to change DockerImage CRD:%s -> StatefulSet:%s", cc.Name, dcRackName, desiredDockerImage, storedStatefulSet.Spec.Template.Spec.Containers[0].Image) - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction lastAction.Status = api.StatusToDo lastAction.Name = api.ActionUpdateDockerImage.Name ClusterActionMetric.set(api.ActionUpdateDockerImage, cc.Name) @@ -257,12 +263,12 @@ func UpdateStatusIfDockerImageHasChanged(cc *api.CassandraCluster, dcRackName st } func UpdateStatusIfRollingRestart(cc *api.CassandraCluster, dc, - rack int, dcRackName string, status *api.CassandraClusterStatus) bool { + rack int, dcRackName api.DcRackName, status *api.CassandraClusterStatus) bool { if cc.Spec.Topology.DC[dc].Rack[rack].RollingRestart { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName}).Info("Scoping RollingRestart of the Rack") - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction lastAction.Status = api.StatusToDo lastAction.Name = api.ActionRollingRestart.Name ClusterActionMetric.set(api.ActionRollingRestart, cc.Name) @@ -275,7 +281,7 @@ func UpdateStatusIfRollingRestart(cc *api.CassandraCluster, dc, } // UpdateStatusIfSeedListHasChanged updates CassandraCluster Action Status if it detects a change -func UpdateStatusIfSeedListHasChanged(cc *api.CassandraCluster, dcRackName string, +func UpdateStatusIfSeedListHasChanged(cc *api.CassandraCluster, dcRackName api.DcRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { storedSeedList := getStoredSeedList(storedStatefulSet) @@ -297,7 +303,7 @@ func UpdateStatusIfSeedListHasChanged(cc *api.CassandraCluster, dcRackName strin // This is to ensure that we won't do 2 different kind of operations in different racks at the same time (ex:scaling + updateseedlist) if !reflect.DeepEqual(status.SeedList, storedSeedList) { logrus.Infof("[%s][%s]: We ask to Change the Cassandra SeedList", cc.Name, dcRackName) - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction lastAction.Status = api.StatusConfiguring lastAction.Name = api.ActionUpdateSeedList.Name ClusterActionMetric.set(api.ActionUpdateSeedList, cc.Name) @@ -312,11 +318,11 @@ func UpdateStatusIfSeedListHasChanged(cc *api.CassandraCluster, dcRackName strin // UpdateStatusIfScaling will detect any change of replicas // To Scale Down the operator will need to first decommission the last node from Cassandra before removing it from kubernetes. // To Scale Up some PodOperations may be scheduled if Auto-pilot is activeted. -func UpdateStatusIfScaling(cc *api.CassandraCluster, dcRackName string, storedStatefulSet *appsv1.StatefulSet, +func UpdateStatusIfScaling(cc *api.CassandraCluster, dcRackName api.DcRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { - nodesPerRacks := cc.GetNodesPerRacks(dcRackName) + nodesPerRacks := cc.GetNodesPerRacksStrongType(dcRackName) if nodesPerRacks != *storedStatefulSet.Spec.Replicas { - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction lastAction.Status = api.StatusToDo if nodesPerRacks > *storedStatefulSet.Spec.Replicas { lastAction.Name = api.ActionScaleUp.Name @@ -339,10 +345,10 @@ func UpdateStatusIfScaling(cc *api.CassandraCluster, dcRackName string, storedSt // If we detect a Statefulset change with this method, then the operator won't catch it before the statefulset tells // the operator that a change is ongoing. That means that all statefulsets may do their rolling upgrade in parallel, so // there will be node down in // in the cluster. -func UpdateStatusIfStatefulSetChanged(dcRackName string, storedStatefulSet *appsv1.StatefulSet, +func UpdateStatusIfStatefulSetChanged(dcRackName api.DcRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { // We have not detected any change with out specific tests - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction if storedStatefulSet.Status.CurrentRevision != storedStatefulSet.Status.UpdateRevision { lastAction.Name = api.ActionUpdateStatefulSet.Name lastAction.Status = api.StatusOngoing @@ -355,16 +361,16 @@ func UpdateStatusIfStatefulSetChanged(dcRackName string, storedStatefulSet *apps } // UpdateStatusIfActionEnded Implement Tests to detect End of Ongoing Actions -func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Context, cc *api.CassandraCluster, dcName string, - rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { - dcRackName := cc.GetDCRackName(dcName, rackName) - rackLastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction +func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Context, cc *api.CassandraCluster, + completeDcRackName api.CompleteRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { + + rackLastAction := &status.GetCassandraRackStatus(completeDcRackName.DcRackName).CassandraLastAction now := metav1.Now() if rackLastAction.Status == api.StatusOngoing || rackLastAction.Status == api.StatusContinue { - nodesPerRacks := cc.GetNodesPerRacks(dcRackName) + nodesPerRacks := cc.GetNodesPerRacksStrongType(completeDcRackName.DcRackName) switch rackLastAction.Name { case api.ActionScaleUp.Name: @@ -372,29 +378,31 @@ func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Con //Does the Scaling ended ? if nodesPerRacks == storedStatefulSet.Status.Replicas { - podsList, err := rcc.ListPodsOrderByNameAscending(ctx, cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) + labelsForList := k8s.LabelsForCassandraDCRackStrongTypes(cc, completeDcRackName.DcName, completeDcRackName.RackName) + podsList, err := rcc.ListPodsOrderByNameAscending(ctx, cc.Namespace, labelsForList) nb := len(podsList.Items) if err != nil || nb < 1 { return false } if nb < int(nodesPerRacks) { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName}).Warn(fmt.Sprintf( + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": completeDcRackName.DcRackName}).Warn(fmt.Sprintf( "Although statefulSet has %d replicas, only %d matching pods found", nodesPerRacks, nb)) return false } pod := podsList.Items[nodesPerRacks-1] //We need lastPod to be running to consider ScaleUp ended - if cassandraPodIsReady(&pod) { + if cassandrapod.IsReady(&pod) { if hasJoiningNodes, err := rcc.hasJoiningNodes(ctx, cc); err != nil { return false } else if hasJoiningNodes { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc": dcName, "rack": rackName, - "err": err}).Info("Cluster has joining nodes, ScaleUp not yet completed") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc": completeDcRackName.DcName, + "rack": completeDcRackName.RackName, "err": err}). + Info("Cluster has joining nodes, ScaleUp not yet completed") return false } - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName}).Info("ScaleUp is Done") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": completeDcRackName.DcRackName}).Info("ScaleUp is Done") rackLastAction.Status = api.StatusDone rackLastAction.EndTime = &now @@ -404,7 +412,7 @@ func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Con } else { labels["operation-status"] = api.StatusManual } - rcc.addPodOperationLabels(ctx, cc, dcName, rackName, labels) + rcc.addPodOperationLabels(ctx, cc, completeDcRackName, labels) return true } @@ -414,14 +422,14 @@ func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Con case api.ActionScaleDown.Name: if nodesPerRacks == storedStatefulSet.Status.Replicas { - if cc.Status.CassandraRackStatus[dcRackName].PodLastOperation.Name == api.OperationDecommission && - cc.Status.CassandraRackStatus[dcRackName].PodLastOperation.Status == api.StatusDone { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName}).Info("ScaleDown is Done") + if cc.Status.GetCassandraRackStatus(completeDcRackName.DcRackName).PodLastOperation.Name == api.OperationDecommission && + cc.Status.GetCassandraRackStatus(completeDcRackName.DcRackName).PodLastOperation.Status == api.StatusDone { + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": completeDcRackName.DcRackName}).Info("ScaleDown is Done") rackLastAction.Status = api.StatusDone rackLastAction.EndTime = &now return true } - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName}).Info("ScaleDown not yet Completed: Waiting for Pod operation to be Done") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": completeDcRackName.DcRackName}).Info("ScaleDown not yet Completed: Waiting for Pod operation to be Done") } case api.ClusterPhaseInitial.Name: @@ -429,10 +437,14 @@ func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Con //nothing particular here return false + case api.ActionStorageUpsize.Name: + //nothing particular here + return false + default: // Do the update has finished on all pods ? if storedStatefulSet.Status.CurrentRevision == storedStatefulSet.Status.UpdateRevision { - logrus.Infof("[%s][%s]: Update %s is Done", cc.Name, dcRackName, rackLastAction.Name) + logrus.Infof("[%s][%s]: Update %s is Done", cc.Name, completeDcRackName.DcRackName, rackLastAction.Name) rackLastAction.Status = api.StatusDone now := metav1.Now() rackLastAction.EndTime = &now @@ -448,16 +460,16 @@ func (rcc *CassandraClusterReconciler) UpdateStatusIfActionEnded(ctx context.Con // UpdateCassandraRackStatusPhase goal is to calculate the Cluster Phase according to StatefulSet Status. // The Phase is: Initializing -> Running <--> Pending // The Phase is a very high level view of the cluster, for a better view we need to see Actions and Pod Operations -func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx context.Context, cc *api.CassandraCluster, dcName string, - rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) { - dcRackName := cc.GetDCRackName(dcName, rackName) - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction +func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx context.Context, cc *api.CassandraCluster, + completeDcRackName api.CompleteRackName, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) { + dcRackName := completeDcRackName.DcRackName + lastAction := &status.GetCassandraRackStatus(dcRackName).CassandraLastAction logrusFields := logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "ReadyReplicas": storedStatefulSet.Status.ReadyReplicas, "RequestedReplicas": *storedStatefulSet.Spec.Replicas} - if status.CassandraRackStatus[dcRackName].Phase == api.ClusterPhaseInitial.Name { - nodesPerRacks := cc.GetNodesPerRacks(dcRackName) + if status.GetCassandraRackStatus(dcRackName).Phase == api.ClusterPhaseInitial.Name { + nodesPerRacks := cc.GetNodesPerRacksStrongType(dcRackName) //If we are stuck in initializing state, we can rollback the add of dc which implies decommissioning nodes if nodesPerRacks <= 0 { logrus.WithFields(logrus.Fields{"cluster": cc.Name, @@ -469,12 +481,13 @@ func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx contex ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) - if isStatefulSetNotReady(storedStatefulSet) { + if sts.IsStatefulSetNotReady(storedStatefulSet) { logrus.WithFields(logrusFields).Infof("Initializing StatefulSet: Replicas count is not okay") return } //If yes, just check that lastPod is running - podsList, err := rcc.ListPodsOrderByNameAscending(ctx, cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) + labelsForList := k8s.LabelsForCassandraDCRackStrongTypes(cc, completeDcRackName.DcName, completeDcRackName.RackName) + podsList, err := rcc.ListPodsOrderByNameAscending(ctx, cc.Namespace, labelsForList) if err != nil || len(podsList.Items) < 1 { return } @@ -482,8 +495,8 @@ func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx contex logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up") } pod := podsList.Items[nodesPerRacks-1] - if cassandraPodIsReady(&pod) { - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name + if cassandrapod.IsReady(&pod) { + status.GetCassandraRackStatus(dcRackName).Phase = api.ClusterPhaseRunning.Name ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) now := metav1.Now() lastAction.EndTime = &now @@ -493,29 +506,30 @@ func (rcc *CassandraClusterReconciler) UpdateCassandraRackStatusPhase(ctx contex } //No more in Initializing state - if isStatefulSetNotReady(storedStatefulSet) { + if sts.IsStatefulSetNotReady(storedStatefulSet) { logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is not okay") - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name + status.GetCassandraRackStatus(dcRackName).Phase = api.ClusterPhasePending.Name ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) - } else if status.CassandraRackStatus[dcRackName].Phase != api.ClusterPhaseRunning.Name { + } else if status.GetCassandraRackStatus(dcRackName).Phase != api.ClusterPhaseRunning.Name { logrus.WithFields(logrusFields).Infof("StatefulSet: Rack Phase is not %s", api.ClusterPhaseRunning.Name) - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name + status.GetCassandraRackStatus(dcRackName).Phase = api.ClusterPhaseRunning.Name ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) } } -func setDecommissionStatus(status *api.CassandraClusterStatus, dcRackName string) { - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name +func setDecommissionStatus(status *api.CassandraClusterStatus, dcRackName api.DcRackName) { + rackStatus := status.GetCassandraRackStatus(dcRackName) + rackStatus.Phase = api.ClusterPhasePending.Name now := metav1.Now() - lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + lastAction := &rackStatus.CassandraLastAction lastAction.StartTime = &now lastAction.Status = api.StatusToDo lastAction.Name = api.ActionScaleDown.Name - status.CassandraRackStatus[dcRackName].PodLastOperation.Status = api.StatusToDo - status.CassandraRackStatus[dcRackName].PodLastOperation.Name = api.OperationDecommission - status.CassandraRackStatus[dcRackName].PodLastOperation.StartTime = &now - status.CassandraRackStatus[dcRackName].PodLastOperation.EndTime = nil - status.CassandraRackStatus[dcRackName].PodLastOperation.Pods = []string{} - status.CassandraRackStatus[dcRackName].PodLastOperation.PodsOK = []string{} - status.CassandraRackStatus[dcRackName].PodLastOperation.PodsKO = []string{} + rackStatus.PodLastOperation.Status = api.StatusToDo + rackStatus.PodLastOperation.Name = api.OperationDecommission + rackStatus.PodLastOperation.StartTime = &now + rackStatus.PodLastOperation.EndTime = nil + rackStatus.PodLastOperation.Pods = []string{} + rackStatus.PodLastOperation.PodsOK = []string{} + rackStatus.PodLastOperation.PodsKO = []string{} } diff --git a/controllers/cassandracluster/cassandra_status_test.go b/controllers/cassandracluster/cassandra_status_test.go index 52d03a78..4531ada0 100644 --- a/controllers/cassandracluster/cassandra_status_test.go +++ b/controllers/cassandracluster/cassandra_status_test.go @@ -19,6 +19,8 @@ import ( "fmt" "time" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/cscetbon/casskop/controllers/common" "github.com/jarcoal/httpmock" "k8s.io/apimachinery/pkg/runtime" @@ -111,7 +113,12 @@ func HelperInitCluster(t *testing.T, name string) (*CassandraClusterReconciler, fakeClientScheme.AddKnownTypes(api.GroupVersion, &ccList) cl := fake.NewClientBuilder().WithScheme(fakeClientScheme).WithRuntimeObjects(objs...).WithStatusSubresource(&cc).Build() // Create a CassandraClusterReconciler object with the scheme and fake client. - rcc := CassandraClusterReconciler{Client: cl, Scheme: fakeClientScheme} + rcc := CassandraClusterReconciler{ + Client: cl, + StorageStateClient: storagestateclient.New(cl), + StsClient: sts.NewClient(cl), + Scheme: fakeClientScheme, + } cc.InitCassandraRackList() cl.Status().Update(context.TODO(), &cc) @@ -327,7 +334,7 @@ func TestUpdateStatusIfconfigMapHasChangedWithNoConfigMap(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, false, UpdateStatusIfconfigMapHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, false, UpdateStatusIfconfigMapHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -345,7 +352,7 @@ func TestUpdateStatusIfconfigMapHasChangedWithNoConfigMap(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -384,7 +391,7 @@ func TestUpdateStatusIfconfigMapHasChangedWithConfigMap(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, false, UpdateStatusIfconfigMapHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, false, UpdateStatusIfconfigMapHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -402,7 +409,7 @@ func TestUpdateStatusIfconfigMapHasChangedWithConfigMap(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -420,7 +427,7 @@ func TestUpdateStatusIfconfigMapHasChangedWithConfigMap(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, true, UpdateStatusIfconfigMapHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -459,7 +466,7 @@ func TestUpdateStatusIfDockerImageHasChanged(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, false, UpdateStatusIfDockerImageHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, false, UpdateStatusIfDockerImageHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } @@ -477,14 +484,14 @@ func TestUpdateStatusIfDockerImageHasChanged(t *testing.T) { if err != nil { t.Fatalf("get statefulset: (%v)", err) } - assert.Equal(t, true, UpdateStatusIfDockerImageHasChanged(rcc.cc, dcRackName, sts, &rcc.cc.Status)) + assert.Equal(t, true, UpdateStatusIfDockerImageHasChanged(rcc.cc, api.DcRackName(dcRackName), sts, &rcc.cc.Status)) } } } func assertRackStatusPhase(assert *assert.Assertions, rcc *CassandraClusterReconciler, dcRackName string, expectedPhase api.ClusterStateInfo) { - assert.Equal(expectedPhase.Name, rcc.cc.Status.CassandraRackStatus[dcRackName].Phase, dcRackName + " phase") + assert.Equal(expectedPhase.Name, rcc.cc.Status.CassandraRackStatus[dcRackName].Phase, dcRackName+" phase") } func assertClusterStatusPhase(assert *assert.Assertions, rcc *CassandraClusterReconciler, expectedPhase api.ClusterStateInfo) { diff --git a/controllers/cassandracluster/cassandra_util.go b/controllers/cassandracluster/cassandra_util.go index ed81a134..a0f9a3f7 100644 --- a/controllers/cassandracluster/cassandra_util.go +++ b/controllers/cassandracluster/cassandra_util.go @@ -16,19 +16,18 @@ package cassandracluster import ( "context" - api "github.com/cscetbon/casskop/api/v2" - v1 "k8s.io/api/core/v1" + api "github.com/cscetbon/casskop/api/v2" "github.com/cscetbon/casskop/pkg/k8s" "github.com/sirupsen/logrus" ) -//hasNoPodDisruption return true if there is no Disruption in the Pods of the cassandra Cluster +// hasNoPodDisruption return true if there is no Disruption in the Pods of the cassandra Cluster func (rcc *CassandraClusterReconciler) hasNoPodDisruption() bool { return rcc.storedPdb.Status.DisruptionsAllowed > 0 || rcc.storedPdb.Status.ExpectedPods == 0 } -//weAreScalingDown return true if we are Scaling Down the provided dc-rack +// weAreScalingDown return true if we are Scaling Down the provided dc-rack func (rcc *CassandraClusterReconciler) weAreScalingDown(dcRackStatus *api.CassandraRackStatus) bool { if dcRackStatus.CassandraLastAction.Name == api.ActionScaleDown.Name && (dcRackStatus.CassandraLastAction.Status == api.StatusToDo || @@ -39,35 +38,6 @@ func (rcc *CassandraClusterReconciler) weAreScalingDown(dcRackStatus *api.Cassan return false } -func cassandraPodIsReady(pod *v1.Pod) bool { - cassandraContainerStatus := getCassandraContainerStatus(pod) - - if cassandraContainerStatus != nil && cassandraContainerStatus.Name == cassandraContainerName && - pod.Status.Phase == v1.PodRunning && cassandraContainerStatus.Ready { - return true - } - return false -} - -func getCassandraContainerStatus(pod *v1.Pod) *v1.ContainerStatus { - - for i := range pod.Status.ContainerStatuses { - if pod.Status.ContainerStatuses[i].Name == cassandraContainerName { - return &pod.Status.ContainerStatuses[i] - } - } - return nil -} - -func cassandraPodRestartCount(pod *v1.Pod) int32 { - for idx := range pod.Status.ContainerStatuses { - if pod.Status.ContainerStatuses[idx].Name == cassandraContainerName { - return pod.Status.ContainerStatuses[idx].RestartCount - } - } - return 0 -} - // DeletePVC deletes persistentvolumes of nodes in a rack func (rcc *CassandraClusterReconciler) DeletePVCs(ctx context.Context, cc *api.CassandraCluster, dcName string, rackName string) { lpvc, err := rcc.ListPVC(ctx, cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) diff --git a/controllers/cassandracluster/cassandracluster_controller.go b/controllers/cassandracluster/cassandracluster_controller.go index 3007ba33..7de0ad4d 100644 --- a/controllers/cassandracluster/cassandracluster_controller.go +++ b/controllers/cassandracluster/cassandracluster_controller.go @@ -16,10 +16,13 @@ package cassandracluster import ( "context" + "time" + + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/go-logr/logr" ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" - "time" "github.com/sirupsen/logrus" @@ -53,6 +56,9 @@ type CassandraClusterReconciler struct { storedPdb *policyv1.PodDisruptionBudget storedStatefulSet *appsv1.StatefulSet + + storagestateclient.StorageStateClient + sts.StsClient } // +kubebuilder:rbac:groups=db.orange.com,resources=cassandraclusters,verbs=get;list;watch;create;update;patch;delete diff --git a/controllers/cassandracluster/cassandrapod/cassandra_pod.go b/controllers/cassandracluster/cassandrapod/cassandra_pod.go new file mode 100644 index 00000000..d69ba89b --- /dev/null +++ b/controllers/cassandracluster/cassandrapod/cassandra_pod.go @@ -0,0 +1,32 @@ +package cassandrapod + +import ( + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + v1 "k8s.io/api/core/v1" +) + +func IsReady(pod *v1.Pod) bool { + cassandraContainerStatus := getContainerStatus(pod) + if cassandraContainerStatus != nil && pod.Status.Phase == v1.PodRunning && cassandraContainerStatus.Ready { + return true + } + return false +} + +func getContainerStatus(pod *v1.Pod) *v1.ContainerStatus { + for i := range pod.Status.ContainerStatuses { + if pod.Status.ContainerStatuses[i].Name == consts.CassandraContainerName { + return &pod.Status.ContainerStatuses[i] + } + } + return nil +} + +func RestartCount(pod *v1.Pod) int32 { + for idx := range pod.Status.ContainerStatuses { + if pod.Status.ContainerStatuses[idx].Name == consts.CassandraContainerName { + return pod.Status.ContainerStatuses[idx].RestartCount + } + } + return 0 +} diff --git a/controllers/cassandracluster/consts/consts.go b/controllers/cassandracluster/consts/consts.go new file mode 100644 index 00000000..dd1a199c --- /dev/null +++ b/controllers/cassandracluster/consts/consts.go @@ -0,0 +1,6 @@ +package consts + +const ( + DataPVCName = "data" + CassandraContainerName = "cassandra" +) diff --git a/controllers/cassandracluster/deploy_cassandra.go b/controllers/cassandracluster/deploy_cassandra.go index 81c99220..e24a32e5 100644 --- a/controllers/cassandracluster/deploy_cassandra.go +++ b/controllers/cassandracluster/deploy_cassandra.go @@ -17,6 +17,7 @@ package cassandracluster import ( "context" "fmt" + api "github.com/cscetbon/casskop/api/v2" policyv1 "k8s.io/api/policy/v1" @@ -89,17 +90,18 @@ func (rcc *CassandraClusterReconciler) podDisruptionBudgetEnvelope(cc *api.Cassa // take dcRackName to accordingly named the statefulset // take dc and rack index of dc and rack in conf to retrieve according nodeselectors labels func (rcc *CassandraClusterReconciler) ensureCassandraStatefulSet(ctx context.Context, cc *api.CassandraCluster, - status *api.CassandraClusterStatus, dcName string, dcRackName string, dc int, rack int) (bool, error) { + status *api.CassandraClusterStatus, completeDcRackName api.CompleteRackName, dc int, rack int) (bool, error) { labels, nodeSelector := k8s.DCRackLabelsAndNodeSelectorForStatefulSet(cc, dc, rack) - ss, err := generateCassandraStatefulSet(cc, status, dcName, dcRackName, labels, nodeSelector, nil) + ss, err := generateCassandraStatefulSet(cc, status, completeDcRackName.DcName.String(), + completeDcRackName.DcRackName.String(), labels, nodeSelector, nil) if err != nil { return true, err } k8s.AddOwnerRefToObject(ss, k8s.AsOwner(cc)) - breakResyncloop, err := rcc.CreateOrUpdateStatefulSet(ctx, ss, status, dcRackName) + breakResyncloop, err := rcc.CreateOrUpdateStatefulSet(ctx, ss, status, completeDcRackName) if err != nil && !apierrors.IsAlreadyExists(err) { return breakResyncloop, fmt.Errorf("failed to create cassandra statefulset: %v", err) } diff --git a/controllers/cassandracluster/generator.go b/controllers/cassandracluster/generator.go index 5078ffe6..b5babad5 100644 --- a/controllers/cassandracluster/generator.go +++ b/controllers/cassandracluster/generator.go @@ -17,8 +17,10 @@ package cassandracluster import ( "encoding/json" "fmt" + "github.com/Jeffail/gabs" "github.com/banzaicloud/k8s-objectmatcher/patch" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" @@ -45,7 +47,6 @@ type JvmMemory struct { /*Bunch of different constants*/ const ( - cassandraContainerName = "cassandra" bootstrapContainerName = "bootstrap" cassConfigBuilderName = "config-builder" cassBaseConfigBuilderName = "base-config-builder" @@ -272,7 +273,7 @@ func generateVolumeClaimTemplate(cc *api.CassandraCluster, labels map[string]str pvc = []v1.PersistentVolumeClaim{ { ObjectMeta: metav1.ObjectMeta{ - Name: "data", + Name: consts.DataPVCName, Labels: labels, }, Spec: v1.PersistentVolumeClaimSpec{ @@ -423,7 +424,7 @@ func generateCassandraStatefulSet(cc *api.CassandraCluster, status *api.Cassandr addBootstrapContainerEnvVarsToSidecars(bootstrapContainer, ss) - if err := patch.DefaultAnnotator.SetLastAppliedAnnotation(ss); err != nil { + if err = patch.DefaultAnnotator.SetLastAppliedAnnotation(ss); err != nil { logrus.Warnf("[%s]: error while applying LastApplied Annotation on Statefulset", cc.Name) } return ss, nil @@ -467,7 +468,7 @@ func removeDuplicateVolumeMountsFromContainers(containers []v1.Container, if value == vm.MountPath { found = true volumeToRemove[vm.Name] = true - if container.Name != cassandraContainerName { + if container.Name != consts.CassandraContainerName { vm.Name = key newVolumesMounts = append(newVolumesMounts, vm) } @@ -489,7 +490,7 @@ func volumeClaimMapNameMountPath(containers []v1.Container, volumeClaimTemplate []v1.PersistentVolumeClaim) map[string]string { var cassandraContainerVolumeMounts []v1.VolumeMount for _, container := range containers { - if container.Name == cassandraContainerName { + if container.Name == consts.CassandraContainerName { cassandraContainerVolumeMounts = container.VolumeMounts } } @@ -507,7 +508,7 @@ func volumeClaimMapNameMountPath(containers []v1.Container, func addBootstrapContainerEnvVarsToSidecars(bootstrapContainer v1.Container, ss *appsv1.StatefulSet) { for idx, container := range ss.Spec.Template.Spec.Containers { - if container.Name != cassandraContainerName { + if container.Name != consts.CassandraContainerName { ss.Spec.Template.Spec.Containers[idx].Env = append(container.Env, bootstrapContainer.Env...) } } @@ -971,7 +972,7 @@ func createCassandraContainer(cc *api.CassandraCluster, status *api.CassandraClu } } cassandraContainer := v1.Container{ - Name: cassandraContainerName, + Name: consts.CassandraContainerName, Image: cc.Spec.CassandraImage, ImagePullPolicy: cc.Spec.ImagePullPolicy, Command: command, diff --git a/controllers/cassandracluster/generator_test.go b/controllers/cassandracluster/generator_test.go index 2d9bb751..33c45bab 100644 --- a/controllers/cassandracluster/generator_test.go +++ b/controllers/cassandracluster/generator_test.go @@ -21,6 +21,9 @@ import ( "testing" "github.com/Jeffail/gabs" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/cscetbon/casskop/controllers/common" "github.com/ghodss/yaml" "k8s.io/apimachinery/pkg/runtime" @@ -55,7 +58,12 @@ func helperInitCluster(t *testing.T, name string) (*CassandraClusterReconciler, fakeClientScheme.AddKnownTypes(api.GroupVersion, &ccList) cl := fake.NewClientBuilder().WithScheme(fakeClientScheme).WithRuntimeObjects(objs...).WithStatusSubresource(&cc).Build() // Create a CassandraClusterReconciler object with the scheme and fake client. - rcc := CassandraClusterReconciler{Client: cl, Scheme: fakeClientScheme} + rcc := CassandraClusterReconciler{ + Client: cl, + StorageStateClient: storagestateclient.New(cl), + StsClient: sts.NewClient(cl), + Scheme: fakeClientScheme, + } cc.InitCassandraRackList() return &rcc, &cc @@ -469,7 +477,7 @@ func TestCassandraStatefulSetHasNoDuplicateVolumes(t *testing.T) { assert := assert.New(t) cassandraContainer := sts.Spec.Template.Spec.Containers[2] - assert.Equal(cassandraContainer.Name, cassandraContainerName) + assert.Equal(cassandraContainer.Name, consts.CassandraContainerName) cassandraLogVolumeMounts := 0 for _, vol := range cassandraContainer.VolumeMounts { if vol.MountPath == "/var/log/cassandra" { @@ -531,7 +539,7 @@ func checkLiveAndReadiNessProbe(t *testing.T, containers []v1.Container, livenessFailureThreshold, livenessSuccessThreshold int32) { for _, c := range containers { - if c.Name == cassandraContainerName { + if c.Name == consts.CassandraContainerName { // Readiness Config check assert.Equal(t, readinessInitialDelaySecond, c.ReadinessProbe.InitialDelaySeconds) assert.Equal(t, readinessTimeoutSeconds, c.ReadinessProbe.TimeoutSeconds) @@ -736,7 +744,7 @@ func checkVarEnv(t *testing.T, containers []v1.Container, cc *api.CassandraClust } for _, container := range containers { - if container.Name != cassandraContainerName { + if container.Name != consts.CassandraContainerName { for _, env := range container.Env { if env.Name == "POD_IP" { continue diff --git a/controllers/cassandracluster/pod.go b/controllers/cassandracluster/pod.go index ed2c6516..9b89be3b 100644 --- a/controllers/cassandracluster/pod.go +++ b/controllers/cassandracluster/pod.go @@ -21,6 +21,8 @@ import ( "sort" "strconv" + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/cassandrapod" "github.com/cscetbon/casskop/pkg/k8s" v1 "k8s.io/api/core/v1" @@ -71,7 +73,7 @@ func GetLastOrFirstPod(podsList *v1.PodList, last bool) (*v1.Pod, error) { func GetLastOrFirstPodReady(podsList []v1.Pod, last bool) (*v1.Pod, error) { var readyPods []v1.Pod for _, pod := range podsList { - if cassandraPodIsReady(&pod) { + if cassandrapod.IsReady(&pod) { readyPods = append(readyPods, pod) } } @@ -159,8 +161,9 @@ func (rcc *CassandraClusterReconciler) UpdatePodLabel(ctx context.Context, pod * // - for lake of resources cpu/memory // - with bad docker image (imagepullbackoff) // - or else to add -func (rcc *CassandraClusterReconciler) hasUnschedulablePod(ctx context.Context, namespace string, dcName, rackName string) bool { - podsList, err := rcc.ListPods(ctx, rcc.cc.Namespace, k8s.LabelsForCassandraDCRack(rcc.cc, dcName, rackName)) +func (rcc *CassandraClusterReconciler) hasUnschedulablePod(ctx context.Context, completeDcRackName api.CompleteRackName) bool { + labelsForList := k8s.LabelsForCassandraDCRackStrongTypes(rcc.cc, completeDcRackName.DcName, completeDcRackName.RackName) + podsList, err := rcc.ListPods(ctx, rcc.cc.Namespace, labelsForList) if err != nil || len(podsList.Items) < 1 { return false } diff --git a/controllers/cassandracluster/pod_operation.go b/controllers/cassandracluster/pod_operation.go index 1e70b31b..62e89fa5 100644 --- a/controllers/cassandracluster/pod_operation.go +++ b/controllers/cassandracluster/pod_operation.go @@ -158,11 +158,11 @@ func (rcc *CassandraClusterReconciler) hasJoiningNodes(ctx context.Context, cc * } // addPodOperationLabels will add Pod Labels labels on all Pod in the Current dcRackName -func (rcc *CassandraClusterReconciler) addPodOperationLabels(ctx context.Context, cc *api.CassandraCluster, dcName string, - rackName string, labels map[string]string) { - dcRackName := cc.GetDCRackName(dcName, rackName) +func (rcc *CassandraClusterReconciler) addPodOperationLabels(ctx context.Context, cc *api.CassandraCluster, + completeDcRackName api.CompleteRackName, labels map[string]string) { + //Select all Pods in the Rack - selector := k8s.MergeLabels(k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) + selector := k8s.MergeLabels(k8s.LabelsForCassandraDCRackStrongTypes(cc, completeDcRackName.DcName, completeDcRackName.RackName)) podsList, err := rcc.ListPods(ctx, cc.Namespace, selector) @@ -180,10 +180,10 @@ func (rcc *CassandraClusterReconciler) addPodOperationLabels(ctx context.Context pod.SetLabels(newlabels) err = rcc.UpdatePod(ctx, &pod) if err != nil { - logrus.Errorf("[%s][%s]:[%s] UpdatePod Error: %v", cc.Name, dcRackName, pod.Name, err) + logrus.Errorf("[%s][%s]:[%s] UpdatePod Error: %v", cc.Name, completeDcRackName.DcRackName, pod.Name, err) } - logrus.Infof("[%s][%s]:[%s] UpdatePod Labels: %v", cc.Name, dcRackName, pod.Name, labels) + logrus.Infof("[%s][%s]:[%s] UpdatePod Labels: %v", cc.Name, completeDcRackName.DcRackName, pod.Name, labels) } } diff --git a/controllers/cassandracluster/pods/pods_client.go b/controllers/cassandracluster/pods/pods_client.go new file mode 100644 index 00000000..a12b700f --- /dev/null +++ b/controllers/cassandracluster/pods/pods_client.go @@ -0,0 +1,11 @@ +package pods + +import ( + "context" + + v1 "k8s.io/api/core/v1" +) + +type PodsClient interface { + ListPods(ctx context.Context, namespace string, selector map[string]string) (*v1.PodList, error) +} diff --git a/controllers/cassandracluster/pvc.go b/controllers/cassandracluster/pvc.go index d864e32d..188686f0 100644 --- a/controllers/cassandracluster/pvc.go +++ b/controllers/cassandracluster/pvc.go @@ -18,39 +18,8 @@ import ( "context" v1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/client" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/types" ) -func (rcc *CassandraClusterReconciler) GetPVC(ctx context.Context, namespace, name string) (*v1.PersistentVolumeClaim, error) { - - o := &v1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } - return o, rcc.Client.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, o) -} - -func (rcc *CassandraClusterReconciler) ListPVC(ctx context.Context, namespace string, - selector map[string]string) (*v1.PersistentVolumeClaimList, error) { - - clientOpt := &client.ListOptions{Namespace: namespace, LabelSelector: labels.SelectorFromSet(selector)} - opt := []client.ListOption{ - clientOpt, - } - - o := &v1.PersistentVolumeClaimList{} - - return o, rcc.Client.List(ctx, o, opt...) -} - func (rcc *CassandraClusterReconciler) deletePVC(ctx context.Context, pvc *v1.PersistentVolumeClaim) error { - return rcc.Client.Delete(ctx, pvc) - } diff --git a/controllers/cassandracluster/reconcile.go b/controllers/cassandracluster/reconcile.go index 595c2322..ae7bfb33 100644 --- a/controllers/cassandracluster/reconcile.go +++ b/controllers/cassandracluster/reconcile.go @@ -24,6 +24,9 @@ import ( "time" api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/cassandrapod" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/cscetbon/casskop/pkg/k8s" "github.com/prometheus/client_golang/prometheus" "github.com/r3labs/diff" @@ -142,21 +145,36 @@ func (rcc *CassandraClusterReconciler) CheckNonAllowedChanges(ctx context.Contex } for dc := 0; dc < cc.GetDCSize(); dc++ { - dcName := cc.GetDCName(dc) - //DataCapacity change is forbidden - if cc.GetDataCapacityForDC(dcName) != oldCRD.GetDataCapacityForDC(dcName) { + dcName := cc.GetDCNameStrongType(dc) + + oldCapacity := oldCRD.GetDataCapacityForDCName(dcName) + requestedCapacity := cc.GetDataCapacityForDCName(dcName) + dataCapacityChange := storageupsize.AnalyzeDataCapacityChange(oldCapacity, requestedCapacity) + switch dataCapacityChange { + case storageupsize.CapacityUpsize: + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dcName": dcName}). + Infof("The Operator has accepted the DataCapacity upsize from [%s] to NewValue[%s]", + oldCapacity, requestedCapacity) + case storageupsize.CapacityDownsize: logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dcName": dcName}). Warningf("The Operator has refused the change on DataCapacity from [%s] to NewValue[%s]", - oldCRD.GetDataCapacityForDC(dcName), cc.GetDataCapacityForDC(dcName)) + oldCapacity, requestedCapacity) cc.Spec.DataCapacity = oldCRD.Spec.DataCapacity cc.Spec.Topology.DC[dc].DataCapacity = oldCRD.Spec.Topology.DC[dc].DataCapacity needUpdate = true + case storageupsize.CapacitySyntacticChange: + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dcName": dcName}). + Debugf("The Operator has ignored the change on DataCapacity from [%s] to NewValue[%s] "+ + "as semantically nothing changes", oldCapacity, requestedCapacity) + default: + // nothing to do } + //DataStorage - if cc.GetDataStorageClassForDC(dcName) != oldCRD.GetDataStorageClassForDC(dcName) { + if cc.GetDataStorageClassForDCName(dcName) != oldCRD.GetDataStorageClassForDCName(dcName) { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dcName": dcName}). Warningf("The Operator has refused the change on DataStorageClass from [%s] to NewValue[%s]", - oldCRD.GetDataStorageClassForDC(dcName), cc.GetDataStorageClassForDC(dcName)) + oldCRD.GetDataStorageClassForDCName(dcName), cc.GetDataStorageClassForDCName(dcName)) cc.Spec.DataStorageClass = oldCRD.Spec.DataStorageClass cc.Spec.Topology.DC[dc].DataStorageClass = oldCRD.Spec.Topology.DC[dc].DataStorageClass needUpdate = true @@ -450,6 +468,11 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap if dcRackName == "" { return fmt.Errorf("name used for DC and/or Rack are not good") } + completeDcRackName := api.CompleteRackName{ + DcName: api.DcName(dcName), + RackName: api.RackName(rackName), + DcRackName: api.DcRackName(dcRackName), + } //If we have added a dc/rack to the CRD, we add it to the Status if _, exists := status.CassandraRackStatus[dcRackName]; !exists { @@ -468,23 +491,23 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap continue } Name := cc.Name + "-" + dcRackName - storedStatefulSet, err := rcc.GetStatefulSet(ctx, cc.Namespace, Name) - if err != nil { + storedStatefulSet, getStsErr := rcc.GetStatefulSet(ctx, cc.Namespace, Name) + if getStsErr != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "dc-rack": dcRackName}).Infof("failed to get cassandra's statefulset (%s) %v", Name, err) + "dc-rack": dcRackName}).Infof("failed to get cassandra's statefulset (%s) %v", Name, getStsErr) } else { //Update CassandraClusterPhase - rcc.UpdateCassandraRackStatusPhase(ctx, cc, dcName, rackName, storedStatefulSet, status) + rcc.UpdateCassandraRackStatusPhase(ctx, cc, completeDcRackName, storedStatefulSet, status) //Find if there is an Action to execute/end - rcc.getNextCassandraClusterStatus(ctx, cc, dc, rack, dcName, rackName, storedStatefulSet, status) + rcc.getNextCassandraClusterStatus(ctx, cc, dc, rack, completeDcRackName, storedStatefulSet, status) //If not Initializing cluster execute pod operations queued if dcRackStatus.Phase != api.ClusterPhaseInitial.Name { // Check if there are joining nodes and break the loop if there are breakResyncloop, err := rcc.handlePodOperation(ctx, cc, dcName, rackName, status, - !isStatefulSetNotReady(storedStatefulSet)) + sts.IsStatefulSetReady(storedStatefulSet)) if err != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName, "err": err}).Error("Executing pod operation failed") @@ -524,7 +547,22 @@ func (rcc *CassandraClusterReconciler) ReconcileRack(ctx context.Context, cc *ap "dc-rack": dcRackName}).Errorf("ensureCassandraServiceMonitoring Error: %v", err) } - breakLoop, err := rcc.ensureCassandraStatefulSet(ctx, cc, status, dcName, dcRackName, dc, rack) + if rcc.IsStorageUpsizeStarted(dcRackStatus) { + if getStsErr != nil && !apierrors.IsNotFound(getStsErr) { + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName}). + Infof("cannot continue storage upsize because: failed to get cassandra's statefulset (%s) %v", + Name, getStsErr) + return nil + } + if getStsErr != nil && apierrors.IsNotFound(getStsErr) { + rcc.storedStatefulSet = nil + } else { + rcc.storedStatefulSet = storedStatefulSet + } + return rcc.ReconcileStorageUpsize(ctx, cc, status, completeDcRackName) + } + + breakLoop, err := rcc.ensureCassandraStatefulSet(ctx, cc, status, completeDcRackName, dc, rack) if err != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName}).Errorf("ensureCassandraStatefulSet Error: %v", err) @@ -786,7 +824,7 @@ func processingPods(hostIDMap map[string]string, restartCountBeforePodDeletion i // For each pod for _, pod := range podsList { - cassandraPodRestartCount := cassandraPodRestartCount(&pod) + cassandraPodRestartCount := cassandrapod.RestartCount(&pod) // If the cassandra container has performed more restart than allowed if restartCountBeforePodDeletion > 0 && cassandraPodRestartCount > restartCountBeforePodDeletion { logrus.WithFields(logrus.Fields{"pod": pod.Name}).Debug(fmt.Sprintf("Pod found in restart status with %d restart", restartCountBeforePodDeletion)) @@ -802,7 +840,7 @@ func updateCassandraNodesStatusForPod(hostIDMap map[string]string, pod *v1.Pod, // Update Pod, HostId, Ip couple cached into status hostId, keyFound := hostIDMap[pod.Status.PodIP] - if keyFound && cassandraPodIsReady(pod) { + if keyFound && cassandrapod.IsReady(pod) { status.CassandraNodesStatus[pod.Name] = api.CassandraNodeStatus{HostId: hostId, NodeIp: pod.Status.PodIP} } } diff --git a/controllers/cassandracluster/reconcile_test.go b/controllers/cassandracluster/reconcile_test.go index b5fd29ce..f0209c9b 100644 --- a/controllers/cassandracluster/reconcile_test.go +++ b/controllers/cassandracluster/reconcile_test.go @@ -21,6 +21,7 @@ import ( "reflect" "testing" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" "github.com/cscetbon/casskop/controllers/common" "k8s.io/apimachinery/pkg/api/resource" @@ -359,7 +360,7 @@ func TestCheckNonAllowedChangesMix1(t *testing.T) { //Forbidden Changes //Global ScaleDown to 0 must be ignored cc.Spec.NodesPerRacks = 0 //instead of 1 - cc.Spec.DataCapacity = "4Gi" //instead of "3Gi" + cc.Spec.DataCapacity = "2Gi" //instead of "3Gi" cc.Spec.DataStorageClass = "fast" //instead of "local-storage" //Allow Changed cc.Spec.AutoPilot = false //instead of true @@ -678,11 +679,11 @@ func TestUpdateCassandraNodesStatusForPod(t *testing.T) { Phase: v1.PodRunning, ContainerStatuses: []v1.ContainerStatus{ { - Name: cassandraContainerName, + Name: consts.CassandraContainerName, Ready: ccReady, }, { - Name: cassandraContainerName + "B", + Name: consts.CassandraContainerName + "B", Ready: !ccReady, }, }, @@ -743,11 +744,11 @@ func TestCheckPodCrossIpUseCaseForPodKey(t *testing.T) { Phase: v1.PodRunning, ContainerStatuses: []v1.ContainerStatus{ { - Name: cassandraContainerName, + Name: consts.CassandraContainerName, Ready: ccReady, }, { - Name: cassandraContainerName + "B", + Name: consts.CassandraContainerName + "B", Ready: !ccReady, }, }, @@ -806,12 +807,12 @@ func TestProcessingPods(t *testing.T) { Phase: v1.PodRunning, ContainerStatuses: []v1.ContainerStatus{ { - Name: cassandraContainerName, + Name: consts.CassandraContainerName, Ready: true, RestartCount: restartCount, }, { - Name: cassandraContainerName + "B", + Name: consts.CassandraContainerName + "B", Ready: true, RestartCount: 10000, }, diff --git a/controllers/cassandracluster/statefulset.go b/controllers/cassandracluster/statefulset.go index 2c9f6fe3..d59b07c4 100644 --- a/controllers/cassandracluster/statefulset.go +++ b/controllers/cassandracluster/statefulset.go @@ -24,10 +24,8 @@ import ( "strings" "time" - "github.com/banzaicloud/k8s-objectmatcher/patch" - "k8s.io/apimachinery/pkg/util/wait" - "github.com/allamand/godebug/pretty" + "github.com/banzaicloud/k8s-objectmatcher/patch" api "github.com/cscetbon/casskop/api/v2" "github.com/cscetbon/casskop/pkg/k8s" "github.com/sirupsen/logrus" @@ -36,6 +34,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" ) var ( @@ -72,19 +71,6 @@ func (rcc *CassandraClusterReconciler) DeleteStatefulSet(ctx context.Context, na return rcc.Client.Delete(ctx, ss) } -// CreateStatefulSet create a new statefulset ss -func (rcc *CassandraClusterReconciler) CreateStatefulSet(ctx context.Context, statefulSet *appsv1.StatefulSet) error { - err := rcc.Client.Create(ctx, statefulSet) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return fmt.Errorf("statefulset already exists: %cc", err) - } - return fmt.Errorf("failed to create cassandra statefulset: %cc", err) - //return err - } - return nil -} - // UpdateStatefulSet updates an existing statefulset ss func (rcc *CassandraClusterReconciler) UpdateStatefulSet(ctx context.Context, statefulSet *appsv1.StatefulSet) error { revision := statefulSet.ResourceVersion @@ -204,7 +190,10 @@ func tryJsonPrettyPrint(patchResult []byte) string { // CreateOrUpdateStatefulSet Create statefulset if not found, or update it func (rcc *CassandraClusterReconciler) CreateOrUpdateStatefulSet(ctx context.Context, statefulSet *appsv1.StatefulSet, - status *api.CassandraClusterStatus, dcRackName string) (bool, error) { + status *api.CassandraClusterStatus, completeDcRackName api.CompleteRackName) (bool, error) { + + dcRackName := completeDcRackName.DcRackName + // if there is an existing pod disruptions // Or if we are not scaling Down the current statefulset if !rcc.hasNoPodDisruption() { @@ -218,7 +207,7 @@ func (rcc *CassandraClusterReconciler) CreateOrUpdateStatefulSet(ctx context.Con } } - dcRackStatus := status.CassandraRackStatus[dcRackName] + dcRackStatus := status.GetCassandraRackStatus(dcRackName) var err error now := metav1.Now() @@ -278,6 +267,8 @@ func (rcc *CassandraClusterReconciler) CreateOrUpdateStatefulSet(ctx context.Con *statefulSet.Spec.Replicas = *rcc.storedStatefulSet.Spec.Replicas + incrementValue } + rcc.RevertAnyStorageUpsizeBeyondUpsizeAction(completeDcRackName, dcRackStatus, statefulSet) + if dcRackStatus.CassandraLastAction.Name == api.ActionRollingRestart.Name && dcRackStatus.CassandraLastAction.Status == api.StatusToDo { statefulSet.Spec.Template.SetLabels(k8s.MergeLabels(statefulSet.Spec.Template.GetLabels(), map[string]string{ @@ -334,7 +325,3 @@ func getStoredSeedList(storedStatefulSet *appsv1.StatefulSet) []string { } return []string{} } - -func isStatefulSetNotReady(storedStatefulSet *appsv1.StatefulSet) bool { - return storedStatefulSet.Status.ReadyReplicas != *storedStatefulSet.Spec.Replicas -} diff --git a/controllers/cassandracluster/storage_upsize_adapter.go b/controllers/cassandracluster/storage_upsize_adapter.go new file mode 100644 index 00000000..3126c9d0 --- /dev/null +++ b/controllers/cassandracluster/storage_upsize_adapter.go @@ -0,0 +1,116 @@ +package cassandracluster + +import ( + "context" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/pods" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + "github.com/cscetbon/casskop/pkg/k8s" + "github.com/sirupsen/logrus" + appsv1 "k8s.io/api/apps/v1" +) + +func (rcc *CassandraClusterReconciler) RevertAnyStorageUpsizeBeyondUpsizeAction(completeDcRackName api.CompleteRackName, + dcRackStatus *api.CassandraRackStatus, statefulSet *appsv1.StatefulSet) { + + storageupsize.RevertAnyStorageUpsizeBeyondUpsizeAction(rcc.newRackView(completeDcRackName, dcRackStatus), statefulSet) +} + +func (rcc *CassandraClusterReconciler) UpdateStatusIfStorageUpsize(completeDcRackName api.CompleteRackName, + status *api.CassandraClusterStatus) bool { + + rackView := rcc.newRackView(completeDcRackName, status.GetCassandraRackStatus(completeDcRackName.DcRackName)) + if rcc.shouldStorageUpsizeBeStarted(rackView) { + rcc.startStorageUpsize(rackView) + return true + } + return false +} + +func (rcc *CassandraClusterReconciler) shouldStorageUpsizeBeStarted(rackView view.RackView) bool { + return storageupsize.ShouldBeStarted(rackView, rcc.cc.GetDataCapacityForDCName(rackView.DcName())) +} + +func (rcc *CassandraClusterReconciler) startStorageUpsize(rackView view.RackView) { + storageupsize.Start(rackView) + ClusterPhaseMetric.set(api.ClusterPhasePending, rcc.cc.Name) + ClusterActionMetric.set(api.ActionStorageUpsize, rcc.cc.Name) +} + +func (rcc *CassandraClusterReconciler) IsStorageUpsizeStarted(dcRackStatus *api.CassandraRackStatus) bool { + return storageupsize.IsStarted(dcRackStatus) +} + +func (rcc *CassandraClusterReconciler) ReconcileStorageUpsize(ctx context.Context, cc *api.CassandraCluster, + status *api.CassandraClusterStatus, completeDcRackName api.CompleteRackName) error { + + status.GetCassandraRackStatus(completeDcRackName.DcRackName).Phase = api.ClusterPhasePending.Name + ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) + + newDataCapacity := generateResourceQuantity(cc.GetDataCapacityForDCName(completeDcRackName.DcName)) + + rackView := rcc.newRackView(completeDcRackName, status.GetCassandraRackStatus(completeDcRackName.DcRackName)) + var storageStateClient storagestateclient.StorageStateClient = rcc + var stsClient sts.StsClient = rcc + var podsClient pods.PodsClient = rcc + + return storageupsize.Reconcile(ctx, cc, rackView, newDataCapacity, storageStateClient, stsClient, podsClient) +} + +func (rcc *CassandraClusterReconciler) newRackView(completeRackName api.CompleteRackName, + dcRackStatus *api.CassandraRackStatus) view.RackView { + + return &rccRackView{ + rcc: rcc, + completeRackName: completeRackName, + dcRackStatus: dcRackStatus, + } +} + +type rccRackView struct { + rcc *CassandraClusterReconciler + completeRackName api.CompleteRackName + dcRackStatus *api.CassandraRackStatus +} + +var _ view.RackView = (*rccRackView)(nil) + +func (v *rccRackView) ClusterName() string { + return v.rcc.cc.Name +} + +func (v *rccRackView) DcName() api.DcName { + return v.completeRackName.DcName +} + +func (v *rccRackView) RackName() api.RackName { + return v.completeRackName.RackName +} + +func (v *rccRackView) DcRackName() api.DcRackName { + return v.completeRackName.DcRackName +} + +func (v *rccRackView) RackStatus() *api.CassandraRackStatus { + return v.dcRackStatus +} + +func (v *rccRackView) LivingStatefulSet() *appsv1.StatefulSet { + return v.rcc.storedStatefulSet +} + +func (v *rccRackView) IsStatefulSetAliveNow() bool { + return v.rcc.storedStatefulSet != nil +} + +func (v *rccRackView) GetLabelsForCassandraDCRack(cc *api.CassandraCluster) map[string]string { + return k8s.LabelsForCassandraDCRackStrongTypes(cc, v.DcName(), v.RackName()) +} + +func (v *rccRackView) Log() *logrus.Entry { + return logrus.WithFields(logrus.Fields{"cluster": v.ClusterName(), "dc-rack": v.DcRackName()}) +} diff --git a/controllers/cassandracluster/storagestateclient/mock/mock_storage_state_client.go b/controllers/cassandracluster/storagestateclient/mock/mock_storage_state_client.go new file mode 100644 index 00000000..1e07d12e --- /dev/null +++ b/controllers/cassandracluster/storagestateclient/mock/mock_storage_state_client.go @@ -0,0 +1,27 @@ +package mock + +import ( + "context" + + "github.com/stretchr/testify/mock" + corev1 "k8s.io/api/core/v1" +) + +type StorageStateClient struct { + mock.Mock +} + +func (c *StorageStateClient) GetPVC(ctx context.Context, namespace, name string) (*corev1.PersistentVolumeClaim, error) { + args := c.Called(ctx, namespace, name) + return args.Get(0).(*corev1.PersistentVolumeClaim), args.Error(1) +} + +func (c *StorageStateClient) ListPVC(ctx context.Context, namespace string, selector map[string]string) (*corev1.PersistentVolumeClaimList, error) { + args := c.Called(ctx, namespace, selector) + return args.Get(0).(*corev1.PersistentVolumeClaimList), args.Error(1) +} + +func (c *StorageStateClient) UpdatePVC(ctx context.Context, pvc *corev1.PersistentVolumeClaim) error { + args := c.Called(ctx, pvc) + return args.Error(0) +} diff --git a/controllers/cassandracluster/storagestateclient/storage_state_client.go b/controllers/cassandracluster/storagestateclient/storage_state_client.go new file mode 100644 index 00000000..e3347c2a --- /dev/null +++ b/controllers/cassandracluster/storagestateclient/storage_state_client.go @@ -0,0 +1,56 @@ +package storagestateclient + +import ( + "context" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func New(client client.Client) StorageStateClient { + return &storageStateClient{ + k8sClient: client, + } +} + +type StorageStateClient interface { + GetPVC(ctx context.Context, namespace, name string) (*v1.PersistentVolumeClaim, error) + ListPVC(ctx context.Context, namespace string, selector map[string]string) (*v1.PersistentVolumeClaimList, error) + UpdatePVC(ctx context.Context, pvc *v1.PersistentVolumeClaim) error +} + +var _ StorageStateClient = (*storageStateClient)(nil) + +type storageStateClient struct { + k8sClient client.Client +} + +func (c *storageStateClient) GetPVC(ctx context.Context, namespace, name string) (*v1.PersistentVolumeClaim, error) { + o := &v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + return o, c.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, o) +} + +func (c *storageStateClient) ListPVC(ctx context.Context, namespace string, + selector map[string]string) (*v1.PersistentVolumeClaimList, error) { + + clientOpt := &client.ListOptions{Namespace: namespace, LabelSelector: labels.SelectorFromSet(selector)} + opt := []client.ListOption{ + clientOpt, + } + + o := &v1.PersistentVolumeClaimList{} + + return o, c.k8sClient.List(ctx, o, opt...) +} + +func (c *storageStateClient) UpdatePVC(ctx context.Context, pvc *v1.PersistentVolumeClaim) error { + return c.k8sClient.Update(ctx, pvc) +} diff --git a/controllers/cassandracluster/storageupsize/action_api.go b/controllers/cassandracluster/storageupsize/action_api.go new file mode 100644 index 00000000..8f29a43f --- /dev/null +++ b/controllers/cassandracluster/storageupsize/action_api.go @@ -0,0 +1,84 @@ +package storageupsize + +import ( + "context" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/pods" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + as "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize/actionstep" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func ShouldBeStarted(rack view.RackView, requestedCapacity string) bool { + requested := silentParseResourceQuantity(requestedCapacity) + _, current := findDataCapacity(rack.LivingStatefulSet().Spec.VolumeClaimTemplates) + if !requested.Equal(current) { + rack.Log().Infof("Storage upsize should be started: ask %v and have %v", requested, current) + return true + } + return false +} + +func Start(rack view.RackView) { + startUpsizeAction(rack) +} + +func IsStarted(dcRackStatus *api.CassandraRackStatus) bool { + return dcRackStatus.CassandraLastAction.Name == api.ActionStorageUpsize.Name && + dcRackStatus.CassandraLastAction.Status != api.StatusDone +} + +// Reconcile performs the storage upsize action steps +// Each step may +// - return an error +// - execute an action and break the loop (if action was not finished before or even not started yet) +// - do nothing and continue to the next step pass (if action was finished before) +// Usually step do its job once and break the loop, then in the next reconcile loop this step "pass" and the next step is executed +func Reconcile(ctx context.Context, cc *api.CassandraCluster, rack view.RackView, newDataCapacity resource.Quantity, + storageStateClient storagestateclient.StorageStateClient, stsClient sts.StsClient, podsClient pods.PodsClient) error { + + dataPVCs := make([]corev1.PersistentVolumeClaim, 0) + + steps := []func() as.StepResult{ + func() as.StepResult { return makeOldStatefulSetSnapshot(rack) }, + func() as.StepResult { return removeStatefulSetOrphan(ctx, cc, rack, stsClient) }, + func() as.StepResult { return recreateStatefulSetWithNewCapacity(ctx, rack, newDataCapacity, stsClient) }, + func() as.StepResult { return fetchDataPvcs(ctx, cc, rack, storageStateClient, &dataPVCs) }, + func() as.StepResult { return ensureAllPVCsHaveNewCapacity(ctx, cc, dataPVCs, rack, storageStateClient) }, + func() as.StepResult { return waitTillAllFilesystemsHaveNewCapacity(cc, dataPVCs, rack) }, + func() as.StepResult { return waitTillStatefulSetAndAllPodsAreReady(ctx, cc, rack, podsClient) }, + } + for _, executeStep := range steps { + if stepResult := executeStep(); stepResult.HasError() { + return stepResult.Error() + } else if stepResult.ShouldBreakReconcileLoop() { + return nil + } + } + + return nil +} + +// RevertAnyStorageUpsizeBeyondUpsizeAction reverts any storage capacity changes if upsize action IS NOT started +// current action should finish, then upsize action should be started and then these changes should be applied +func RevertAnyStorageUpsizeBeyondUpsizeAction(rack view.RackView, newStatefulSet *appsv1.StatefulSet) { + if !IsStarted(rack.RackStatus()) { + _, current := findDataCapacity(rack.LivingStatefulSet().Spec.VolumeClaimTemplates) + index, requested := findDataCapacity(newStatefulSet.Spec.VolumeClaimTemplates) + if !requested.Equal(current) { + dataPvcResources := &newStatefulSet.Spec.VolumeClaimTemplates[index].Spec.Resources + if dataPvcResources.Requests == nil { + dataPvcResources.Requests = corev1.ResourceList{} + } + dataPvcResources.Requests[corev1.ResourceStorage] = current + rack.Log(). + Infof("Storage Resize request detected, postponing resize from %s to %s until other actions are done", + requested.String(), current.String()) + } + } +} diff --git a/controllers/cassandracluster/storageupsize/action_api_test.go b/controllers/cassandracluster/storageupsize/action_api_test.go new file mode 100644 index 00000000..4bea23b3 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/action_api_test.go @@ -0,0 +1,160 @@ +package storageupsize + +import ( + "testing" + + v2 "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/view/stub" + "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func TestRevertAnyStorageUpsizeBeyondUpsizeAction(t *testing.T) { + + const InitialCapacity = "10Gi" + const CapacityAfterUpsize = "999Gi" + + t.Run("upsize action in-progress, do not revert upsize", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, CapacityAfterUpsize), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + CassandraLastAction: v2.CassandraLastAction{ + Name: v2.ActionStorageUpsize.Name, + Status: v2.StatusOngoing, + }, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("upsize action in-progress, sts resized already, nothing to do", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, CapacityAfterUpsize), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, CapacityAfterUpsize), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + CassandraLastAction: v2.CassandraLastAction{ + Name: v2.ActionStorageUpsize.Name, + Status: v2.StatusOngoing, + }, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("no upsize requested, nothing changes", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + Phase: v2.ClusterPhaseRunning.Name, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(InitialCapacity), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("upsize requested but action not started yet, revert change", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, CapacityAfterUpsize), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + CassandraLastAction: v2.CassandraLastAction{ + Name: v2.ActionScaleUp.Name, + Status: v2.StatusOngoing, + }, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(InitialCapacity), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("upsize requested but action not started yet (previous upsize done), revert change", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, CapacityAfterUpsize), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + CassandraLastAction: v2.CassandraLastAction{ + Name: v2.ActionStorageUpsize.Name, + Status: v2.StatusDone, + }, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(InitialCapacity), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("handle unspecified resources", func(t *testing.T) { + currentSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + newSts := &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvcWithoutSpecifiedResources(consts.DataPVCName), + }}} + rack := stub.RackView{ + LivingStatefulSetStub: currentSts.DeepCopy(), + RackStatusStub: &v2.CassandraRackStatus{ + CassandraLastAction: v2.CassandraLastAction{ + Name: v2.ActionStorageUpsize.Name, + Status: v2.StatusDone, + }, + }, + } + + newStsWithRevertApplied := newSts.DeepCopy() + RevertAnyStorageUpsizeBeyondUpsizeAction(rack, newStsWithRevertApplied) + + assert.Equal(t, resource.MustParse(InitialCapacity), + newStsWithRevertApplied.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) +} diff --git a/controllers/cassandracluster/storageupsize/actionstep/action_step_result.go b/controllers/cassandracluster/storageupsize/actionstep/action_step_result.go new file mode 100644 index 00000000..0da009f8 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/actionstep/action_step_result.go @@ -0,0 +1,69 @@ +package actionstep + +type StepResult interface { + HasError() bool + Error() error + ShouldBreakReconcileLoop() bool +} + +func Error(err error) StepResult { + return errorResult{err: err} +} + +func Break() StepResult { + return successfulBreakSingleton +} + +func Pass() StepResult { + return passSingleton +} + +type errorResult struct { + err error +} + +var _ StepResult = errorResult{} + +func (e errorResult) HasError() bool { + return e.err != nil +} + +func (e errorResult) Error() error { + return e.err +} + +func (e errorResult) ShouldBreakReconcileLoop() bool { + return true +} + +type successfulBreak struct{} + +var successfulBreakSingleton StepResult = successfulBreak{} + +func (b successfulBreak) HasError() bool { + return false +} + +func (b successfulBreak) Error() error { + return nil +} + +func (b successfulBreak) ShouldBreakReconcileLoop() bool { + return true +} + +type pass struct{} + +var passSingleton StepResult = pass{} + +func (b pass) HasError() bool { + return false +} + +func (b pass) Error() error { + return nil +} + +func (b pass) ShouldBreakReconcileLoop() bool { + return false +} diff --git a/controllers/cassandracluster/storageupsize/cc.go b/controllers/cassandracluster/storageupsize/cc.go new file mode 100644 index 00000000..f966f2b4 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/cc.go @@ -0,0 +1,32 @@ +package storageupsize + +func AnalyzeDataCapacityChange(oldCapacity, newCapacity string) CapacityChange { + oldParsed := silentParseResourceQuantity(oldCapacity) + newParsed := silentParseResourceQuantity(newCapacity) + + if oldCapacity == newCapacity { + return CapacityNoChange + } + + cmp := newParsed.Cmp(oldParsed) + if cmp == 0 { + // Same numeric value, only syntactic change (e.g. 1024Mi -> 1Gi) + return CapacitySyntacticChange + } + + if cmp > 0 { + return CapacityUpsize + } + + return CapacityDownsize + +} + +type CapacityChange int + +const ( + CapacityNoChange CapacityChange = iota + CapacitySyntacticChange + CapacityUpsize + CapacityDownsize +) diff --git a/controllers/cassandracluster/storageupsize/cc_test.go b/controllers/cassandracluster/storageupsize/cc_test.go new file mode 100644 index 00000000..10b9a509 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/cc_test.go @@ -0,0 +1,29 @@ +package storageupsize + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAnalyzeDataCapacityChange(t *testing.T) { + tests := []struct { + name string + oldCapacity string + newCapacity string + expectedDiff CapacityChange + }{ + {name: "valid increase", oldCapacity: "10Gi", newCapacity: "20Gi", expectedDiff: CapacityUpsize}, + {name: "no change", oldCapacity: "10Gi", newCapacity: "10Gi", expectedDiff: CapacityNoChange}, + {name: "syntactic change", oldCapacity: "1Gi", newCapacity: "1024Mi", expectedDiff: CapacitySyntacticChange}, + {name: "syntactic change", oldCapacity: "1T", newCapacity: "1000G", expectedDiff: CapacitySyntacticChange}, + {name: "capacity decrease", oldCapacity: "20Gi", newCapacity: "10Gi", expectedDiff: CapacityDownsize}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + diff := AnalyzeDataCapacityChange(tt.oldCapacity, tt.newCapacity) + assert.Equal(t, tt.expectedDiff, diff) + }) + } +} diff --git a/controllers/cassandracluster/storageupsize/lastapplied/last_applied.go b/controllers/cassandracluster/storageupsize/lastapplied/last_applied.go new file mode 100644 index 00000000..128147d5 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/lastapplied/last_applied.go @@ -0,0 +1,56 @@ +package lastapplied + +import ( + "archive/zip" + "bytes" + "encoding/base64" + + "github.com/banzaicloud/k8s-objectmatcher/patch" + json "github.com/json-iterator/go" + "github.com/sirupsen/logrus" + appsv1 "k8s.io/api/apps/v1" +) + +func GetOriginalSts(sts *appsv1.StatefulSet) (appsv1.StatefulSet, error) { + stsOrig, err := patch.DefaultAnnotator.GetOriginalConfiguration(sts) + stsOrigObj := appsv1.StatefulSet{} + err = json.ConfigCompatibleWithStandardLibrary.Unmarshal(stsOrig, &stsOrigObj) + if err != nil { + logrus.Debug("cannot deserialize stsOrig") + } + return stsOrigObj, err +} + +func EncodeLastAppliedConfigAnnotation(originalStatefulSet appsv1.StatefulSet) (string, error) { + marshal, err := json.ConfigCompatibleWithStandardLibrary.Marshal(originalStatefulSet) + if err != nil { + return "", err + } + marshalWithoutNulls, _, err := patch.DeleteNullInJson(marshal) + if err != nil { + return "", err + } + zipped, err := zipAndBase64EncodeAnnotation(marshalWithoutNulls) + if err != nil { + return "", err + } + return zipped, nil +} + +func zipAndBase64EncodeAnnotation(original []byte) (string, error) { + buf := new(bytes.Buffer) + zipWriter := zip.NewWriter(buf) + writer, err := zipWriter.Create("original") + if err != nil { + return "", err + } + _, err = writer.Write(original) + if err != nil { + return "", err + } + err = zipWriter.Close() + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(buf.Bytes()), nil +} diff --git a/controllers/cassandracluster/storageupsize/pods.go b/controllers/cassandracluster/storageupsize/pods.go new file mode 100644 index 00000000..351c8db9 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/pods.go @@ -0,0 +1,15 @@ +package storageupsize + +import ( + "github.com/cscetbon/casskop/controllers/cassandracluster/cassandrapod" + v1 "k8s.io/api/core/v1" +) + +func allPodsReady(podList *v1.PodList) bool { + for _, pod := range podList.Items { + if !cassandrapod.IsReady(&pod) { + return false + } + } + return true +} diff --git a/controllers/cassandracluster/storageupsize/pvc.go b/controllers/cassandracluster/storageupsize/pvc.go new file mode 100644 index 00000000..021ab67d --- /dev/null +++ b/controllers/cassandracluster/storageupsize/pvc.go @@ -0,0 +1,139 @@ +package storageupsize + +import ( + "context" + "errors" + "fmt" + "strings" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize/actionstep" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + "github.com/sirupsen/logrus" + "go.uber.org/multierr" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +func findDataCapacity(pvc []corev1.PersistentVolumeClaim) (int, resource.Quantity) { + for i, template := range pvc { + if template.Name == consts.DataPVCName { + if template.Spec.Resources.Requests == nil { + return i, resource.Quantity{} + } + return i, template.Spec.Resources.Requests[corev1.ResourceStorage] + } + } + return -1, resource.Quantity{} +} + +func silentParseResourceQuantity(qs string) resource.Quantity { + q, _ := resource.ParseQuantity(qs) + return q +} + +func fetchDataPvcs(ctx context.Context, cc *api.CassandraCluster, rack view.RackView, + storageStateClient storagestateclient.StorageStateClient, outputPVCs *[]corev1.PersistentVolumeClaim) actionstep.StepResult { + + dataPVCs, err := getAllDataPvcs(ctx, cc, rack, storageStateClient) + if err != nil { + return actionstep.Error(err) + } + *outputPVCs = dataPVCs + return actionstep.Pass() +} + +func getAllDataPvcs(ctx context.Context, cc *api.CassandraCluster, rack view.RackView, + storageStateClient storagestateclient.StorageStateClient) ([]corev1.PersistentVolumeClaim, error) { + + if rack.LivingStatefulSet() == nil { + return nil, errors.New(fmt.Sprintf("[%s]: cannot fetch PVC list for storage upsize because"+ + "livingStatefulSet is nil for DC-Rack %s "+ + "(should not see this message, PVC should not be listed before statefulSet is recreated with new capacity)", + cc.Name, rack.DcRackName())) + } + statefulSetName := rack.LivingStatefulSet().Name + + pvcs, err := storageStateClient.ListPVC(ctx, cc.Namespace, rack.GetLabelsForCassandraDCRack(cc)) + if err != nil { + return nil, err + } + + dataPVCs := make([]corev1.PersistentVolumeClaim, 0) + for _, pvc := range pvcs.Items { + if strings.HasPrefix(pvc.Name, consts.DataPVCName+"-"+statefulSetName) { + dataPVCs = append(dataPVCs, pvc) + } + } + + expectedNodesPerRacks := *rack.LivingStatefulSet().Spec.Replicas + + if len(dataPVCs) != int(expectedNodesPerRacks) { + errMsg := fmt.Sprintf("[%s]: Number of Data PVCs (%d) different than expected Replicas (%d) for DC-Rack %s", + cc.Name, len(dataPVCs), expectedNodesPerRacks, rack.DcRackName()) + logrus.Warn(errMsg) + return nil, errors.New(errMsg) + } + + return dataPVCs, nil +} + +func ensureAllPVCsHaveNewCapacity(ctx context.Context, cc *api.CassandraCluster, dataPVCs []corev1.PersistentVolumeClaim, + rack view.RackView, storageStateClient storagestateclient.StorageStateClient) actionstep.StepResult { + + requestedCapacity := silentParseResourceQuantity(cc.GetDataCapacityForDCName(rack.DcName())) + + anythingChanged := false + var multiError error + for _, pvc := range dataPVCs { + if pvc.Spec.Resources.Requests["storage"] != requestedCapacity { + anythingChanged = true + if pvc.Spec.Resources.Requests == nil { + pvc.Spec.Resources.Requests = corev1.ResourceList{} + } + pvc.Spec.Resources.Requests["storage"] = requestedCapacity + err := storageStateClient.UpdatePVC(ctx, &pvc) + if err != nil { + rack.Log().Errorf("Error updating PVC[%s] capacity to %v", pvc.Name, requestedCapacity) + multiError = multierr.Append(multiError, err) + } + rack.Log().Infof("Update PVC[%s] capacity to %v successful", pvc.Name, requestedCapacity) + } + } + + if multiError != nil { + return actionstep.Error(multiError) + } + + if anythingChanged { + return actionstep.Break() + } + return actionstep.Pass() +} + +func waitTillAllFilesystemsHaveNewCapacity(cc *api.CassandraCluster, dataPVCs []corev1.PersistentVolumeClaim, + rack view.RackView) actionstep.StepResult { + + requestedCapacity := silentParseResourceQuantity(cc.GetDataCapacityForDCName(rack.DcName())) + + var resized, notResizedYet []string + + for _, pvc := range dataPVCs { + if pvc.Status.Capacity["storage"].Equal(requestedCapacity) { + resized = append(resized, pvc.Name) + } else { + notResizedYet = append(notResizedYet, pvc.Name) + } + } + + if len(notResizedYet) == 0 { + rack.Log().Infof("All PVs for DC-Rack %s resized to %s", rack.DcRackName(), requestedCapacity.String()) + return actionstep.Pass() + } + + rack.Log().Infof("Still waiting for PVs to be resized for DC-Rack %s. Resized: [%v], Not resized yet: [%v]", + rack.DcRackName(), resized, notResizedYet) + return actionstep.Break() +} diff --git a/controllers/cassandracluster/storageupsize/pvc_test.go b/controllers/cassandracluster/storageupsize/pvc_test.go new file mode 100644 index 00000000..a92a7904 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/pvc_test.go @@ -0,0 +1,135 @@ +package storageupsize + +import ( + "context" + "errors" + "testing" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + storagestateclientmock "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient/mock" + rackviewstub "github.com/cscetbon/casskop/controllers/cassandracluster/view/stub" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var testCtx = context.Background() + +func Test_ensureAllPVCsHaveNewCapacity_happyPath(t *testing.T) { + const expectedCapacity = "25Gi" + + dataPvc0 := pvc("data-dc-rack1-0", "10Gi") + dataPvc1 := pvc("data-dc-rack1-1", "25Gi") + dataPvc2 := pvc("data-dc-rack1-2", "") + pvcs := []corev1.PersistentVolumeClaim{dataPvc0, dataPvc1, dataPvc2} + cc := &api.CassandraCluster{ + Spec: api.CassandraClusterSpec{ + DataCapacity: expectedCapacity, + }, + } + rackView := rackviewstub.RackView{CompleteRackNameStub: api.CompleteRackName{DcName: "dc"}} + fakeClientScheme := scheme.Scheme + fakeClientScheme.AddKnownTypes(api.GroupVersion, cc) + cl := fake.NewClientBuilder(). + WithScheme(fakeClientScheme). + WithRuntimeObjects(&dataPvc0, &dataPvc1, &dataPvc2). + WithStatusSubresource(cc).Build() + storageStateClient := storagestateclient.New(cl) + + t.Run("should update all PVCs to new capacity", func(t *testing.T) { + res := ensureAllPVCsHaveNewCapacity(testCtx, cc, pvcs, rackView, storageStateClient) + + assert.False(t, res.HasError()) + assert.NoError(t, res.Error()) + assertPvcCapacity(t, cl, "data-dc-rack1-0", expectedCapacity) + assertPvcCapacity(t, cl, "data-dc-rack1-1", expectedCapacity) + assertPvcCapacity(t, cl, "data-dc-rack1-2", expectedCapacity) + }) + + t.Run("should handle PVC already at expected capacity", func(t *testing.T) { + dataPvc0 = pvc("data-dc-rack1-0", expectedCapacity) + dataPvc1 = pvc("data-dc-rack1-1", expectedCapacity) + dataPvc2 = pvc("data-dc-rack1-2", expectedCapacity) + assert.NoError(t, cl.Update(testCtx, &dataPvc0)) + assert.NoError(t, cl.Update(testCtx, &dataPvc1)) + assert.NoError(t, cl.Update(testCtx, &dataPvc2)) + pvcs = []corev1.PersistentVolumeClaim{dataPvc0, dataPvc1, dataPvc2} + + res := ensureAllPVCsHaveNewCapacity(testCtx, cc, pvcs, rackView, storageStateClient) + + assert.False(t, res.HasError()) + assert.NoError(t, res.Error()) + assertPvcCapacity(t, cl, "data-dc-rack1-0", expectedCapacity) + assertPvcCapacity(t, cl, "data-dc-rack1-1", expectedCapacity) + assertPvcCapacity(t, cl, "data-dc-rack1-2", expectedCapacity) + }) +} + +func Test_ensureAllPVCsHaveNewCapacity_errors(t *testing.T) { + const expectedCapacity = "25Gi" + + dataPvc0 := pvc("data-dc-rack1-0", "10Gi") + dataPvc1 := pvc("data-dc-rack1-1", "25Gi") + dataPvc2 := pvc("data-dc-rack1-2", "") + pvcs := []corev1.PersistentVolumeClaim{dataPvc0, dataPvc1, dataPvc2} + cc := &api.CassandraCluster{ + Spec: api.CassandraClusterSpec{ + DataCapacity: expectedCapacity, + }, + } + rackView := rackviewstub.RackView{CompleteRackNameStub: api.CompleteRackName{DcName: "dc"}} + + storageStateClient := &storagestateclientmock.StorageStateClient{} + storageStateClient.On("UpdatePVC", mock.Anything, mock.Anything). + Return(errors.New("update failed")).Times(3) + + res := ensureAllPVCsHaveNewCapacity(testCtx, cc, pvcs, rackView, storageStateClient) + + assert.True(t, res.HasError()) + assert.EqualError(t, res.Error(), "update failed; update failed") +} + +func pvc(name, capacity string) corev1.PersistentVolumeClaim { + var resources corev1.ResourceList + if capacity != "" { + resources = corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(capacity), + } + } + return corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: resources, + }, + }, + } +} + +func pvcWithoutSpecifiedResources(name string) corev1.PersistentVolumeClaim { + return corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + }, + Spec: corev1.PersistentVolumeClaimSpec{}, + } +} + +func assertPvcCapacity(t *testing.T, cl client.WithWatch, pvcName, expectedCapacity string) { + updatedPvc := corev1.PersistentVolumeClaim{} + err := cl.Get(testCtx, types.NamespacedName{Namespace: "default", Name: pvcName}, &updatedPvc) + assert.NoError(t, err) + finalDataCapacity := updatedPvc.Spec.Resources.Requests[corev1.ResourceStorage] + assert.Equal(t, expectedCapacity, finalDataCapacity.String()) +} diff --git a/controllers/cassandracluster/storageupsize/rackstatus.go b/controllers/cassandracluster/storageupsize/rackstatus.go new file mode 100644 index 00000000..b4b200d5 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/rackstatus.go @@ -0,0 +1,61 @@ +package storageupsize + +import ( + "errors" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize/actionstep" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + json "github.com/json-iterator/go" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +func isStatefulSetSnapshottedAlready(dcRackStatus *api.CassandraRackStatus) bool { + return dcRackStatus.StatefulSetSnapshotBeforeStorageResize != "" +} + +func makeOldStatefulSetSnapshot(rack view.RackView) actionstep.StepResult { + if isStatefulSetSnapshottedAlready(rack.RackStatus()) { + return actionstep.Pass() + } + + livingStatefulSet := rack.LivingStatefulSet() + if livingStatefulSet == nil { + return actionstep.Error(errors.New("StatefulSet snapshot not exist and StatefulSet itself is not found, cannot proceed with storage upsize")) + } + statefulSetSnapshotJson, err := prepareStatefulSetSnapshot(livingStatefulSet) + if err != nil { + return actionstep.Error(err) + } + rack.RackStatus().StatefulSetSnapshotBeforeStorageResize = statefulSetSnapshotJson + return actionstep.Break() +} + +func unmarshallSnapshottedStatefulSet(rack view.RackView) (*appsv1.StatefulSet, error) { + marshalledStatefulSet := rack.RackStatus().StatefulSetSnapshotBeforeStorageResize + newStatefulSet := &appsv1.StatefulSet{} + err := json.Unmarshal([]byte(marshalledStatefulSet), newStatefulSet) + if err != nil { + return nil, errors.New("cannot unmarshall snapshotted statefulSet for storage upsize: " + err.Error()) + } + return newStatefulSet, nil +} + +func startUpsizeAction(rack view.RackView) { + lastAction := &rack.RackStatus().CassandraLastAction + rack.RackStatus().Phase = api.ClusterPhasePending.Name + lastAction.Status = api.StatusOngoing + lastAction.Name = api.ActionStorageUpsize.Name + lastAction.StartTime = ptr.To(metav1.Now()) + lastAction.EndTime = nil +} + +func finalizeUpsizeAction(rackStatus *api.CassandraRackStatus) { + lastAction := &rackStatus.CassandraLastAction + lastAction.Status = api.StatusDone + lastAction.Name = api.ActionStorageUpsize.Name + lastAction.EndTime = ptr.To(metav1.Now()) + rackStatus.StatefulSetSnapshotBeforeStorageResize = "" +} diff --git a/controllers/cassandracluster/storageupsize/sts.go b/controllers/cassandracluster/storageupsize/sts.go new file mode 100644 index 00000000..6cc3f297 --- /dev/null +++ b/controllers/cassandracluster/storageupsize/sts.go @@ -0,0 +1,193 @@ +package storageupsize + +import ( + "context" + "errors" + "fmt" + + "github.com/banzaicloud/k8s-objectmatcher/patch" + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/pods" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize/actionstep" + "github.com/cscetbon/casskop/controllers/cassandracluster/storageupsize/lastapplied" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + json "github.com/json-iterator/go" + appsv1 "k8s.io/api/apps/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func prepareStatefulSetSnapshot(livingStatefulSet *appsv1.StatefulSet) (string, error) { + statefulSetSnapshot := livingStatefulSet.DeepCopy() + + statefulSetSnapshot.GenerateName = "" + statefulSetSnapshot.SelfLink = "" + statefulSetSnapshot.UID = "" + statefulSetSnapshot.ResourceVersion = "" + statefulSetSnapshot.Generation = 0 + statefulSetSnapshot.CreationTimestamp = metav1.Time{} + statefulSetSnapshot.DeletionTimestamp = nil + statefulSetSnapshot.DeletionGracePeriodSeconds = nil + statefulSetSnapshot.ManagedFields = nil + + statefulSetSnapshot.TypeMeta = metav1.TypeMeta{} + statefulSetSnapshot.Status = appsv1.StatefulSetStatus{} + + statefulSetSnapshotJson, err := json.ConfigCompatibleWithStandardLibrary.Marshal(statefulSetSnapshot) + if err != nil { + return "", err + } + return string(statefulSetSnapshotJson), nil +} + +func applyPVCModification(newStatefulSet *appsv1.StatefulSet, newDataCapacity resource.Quantity) error { + err := setNewDataCapacity(newStatefulSet, newDataCapacity) + if err != nil { + return err + } + return enrichWithCleanLastAppliedAnnotation(newStatefulSet, newDataCapacity) +} + +// enrichWithCleanLastAppliedAnnotation +// +// 1. Why not create statefulSet directly from the CR? +// - because other changes may have been introduced (e.g. scaling or other changes in the pod template) +// we don't want other actions to interfere with the resize process +// +// 2. Why not create statefulSet from the old CR (last-applied-configuration)? +// - because other changes may also have been introduced +// -- someone starts a scale-in +// -- then changes dataCapacity +// -- the first rack will still be done correctly, +// but in the second, at the end of the resize, +// a StatefulSet with a smaller number of replicas will be applied immediately, without calling decommission +// +// 3. Why do we need to manually handle last-applied annotation on the newStatefulSet? +// - Banzai stores the original object in annotations and performs a 3-way merge on update +// - livingStatefulSet is an object fetched from k8s API, so it contains Kubernetes defaults (added by the k8s API server) +// - newStatefulSet is livingStatefulSet after marshal+unmarshal +// - if we simply did +// `patch.DefaultAnnotator.SetLastAppliedAnnotation(newStatefulSet)` +// we would put into the annotations an object with Kubernetes defaults (added by the k8s API server) +// - that would force an update during the 3-way merge after the resize +// (StatefulSet generated from the CR would be clean and would not match the polluted last-applied in the stored StatefulSet) +func enrichWithCleanLastAppliedAnnotation(newStatefulSet *appsv1.StatefulSet, newDataCapacity resource.Quantity) error { + + // best effort = swallow all errors: + // if we cannot get/edit/encode original sts -> we skip setting last-applied annotation, would lead to extra update after resize (no pod restart) + + originalStatefulSet, err := lastapplied.GetOriginalSts(newStatefulSet) + if err != nil { + return nil + } + + err = setNewDataCapacity(&originalStatefulSet, newDataCapacity) + if err != nil { + return nil + } + + lastApplied, err := lastapplied.EncodeLastAppliedConfigAnnotation(originalStatefulSet) + if err != nil { + return nil + } + newStatefulSet.Annotations[patch.LastAppliedConfig] = lastApplied + + return nil +} + +func removeStatefulSetOrphan(ctx context.Context, cc *api.CassandraCluster, rack view.RackView, stsClient sts.StsClient) actionstep.StepResult { + if !rack.IsStatefulSetAliveNow() { + return actionstep.Pass() + } + + if doesStatefulSetHaveNewCapacity(cc, rack.LivingStatefulSet()) { + return actionstep.Pass() + } + + rack.Log().Info("Deleting StatefulSet with orphan option") + err := stsClient.DeleteStatefulSetWithOrphanOption(ctx, cc.Namespace, rack.LivingStatefulSet().Name) + if err != nil { + return actionstep.Error(err) + } + + return actionstep.Break() +} + +func doesStatefulSetHaveNewCapacity(cc *api.CassandraCluster, livingStatefulSet *appsv1.StatefulSet) bool { + requested := silentParseResourceQuantity(cc.Spec.DataCapacity) + _, current := findDataCapacity(livingStatefulSet.Spec.VolumeClaimTemplates) + return requested.Equal(current) +} + +func recreateStatefulSetWithNewCapacity(ctx context.Context, rack view.RackView, newDataCapacity resource.Quantity, + stsClient sts.StsClient) actionstep.StepResult { + + if rack.IsStatefulSetAliveNow() { + return actionstep.Pass() + } + + rack.Log().Info("Creating StatefulSet with new capacity") + + newStatefulSet, err := unmarshallSnapshottedStatefulSet(rack) + if err != nil { + return actionstep.Error(err) + } + + err = applyPVCModification(newStatefulSet, newDataCapacity) + if err != nil { + return actionstep.Error(err) + } + + err = stsClient.CreateStatefulSet(ctx, newStatefulSet) + if err != nil { + return actionstep.Error(err) + } + + return actionstep.Break() +} + +func waitTillStatefulSetAndAllPodsAreReady(ctx context.Context, cc *api.CassandraCluster, rack view.RackView, + podsClient pods.PodsClient) actionstep.StepResult { + + if !doesStatefulSetHaveNewCapacity(cc, rack.LivingStatefulSet()) { + rack.Log().Infof("Resize action is in progress, statefulset need to be re-created with new capacity") + return actionstep.Break() + } + + if sts.IsStatefulSetReady(rack.LivingStatefulSet()) { + podList, err := podsClient.ListPods(ctx, cc.Namespace, rack.GetLabelsForCassandraDCRack(cc)) + if err != nil { + return actionstep.Error(err) + } + expectedNodesPerRacks := *rack.LivingStatefulSet().Spec.Replicas + if len(podList.Items) != int(expectedNodesPerRacks) { + errMsg := fmt.Sprintf("Number of pods (%d) different than expected Replicas (%d) for DC-Rack %s", + len(podList.Items), expectedNodesPerRacks, rack.DcRackName()) + rack.Log().Warn(errMsg) + return actionstep.Error(errors.New(errMsg)) + } + if allPodsReady(podList) { + rack.Log().Info("Resize action finalization, " + + "all pods are ready with new DataCapacity, we can finalize the action") + finalizeUpsizeAction(rack.RackStatus()) + return actionstep.Pass() + } + } + + rack.Log().Info("Resize action is in progress, " + + "we wait for all pods to be ready with new DataCapacity before finalizing the action") + return actionstep.Break() +} + +func setNewDataCapacity(statefulSet *appsv1.StatefulSet, dataCapacity resource.Quantity) error { + for i, template := range statefulSet.Spec.VolumeClaimTemplates { + if template.Name == consts.DataPVCName { + template.Spec.Resources.Requests["storage"] = dataCapacity + statefulSet.Spec.VolumeClaimTemplates[i] = template + return nil + } + } + return errors.New(fmt.Sprintf("no %s pvc found in statefulSet %s", consts.DataPVCName, statefulSet.Name)) +} diff --git a/controllers/cassandracluster/storageupsize/sts_test.go b/controllers/cassandracluster/storageupsize/sts_test.go new file mode 100644 index 00000000..58a4c5ff --- /dev/null +++ b/controllers/cassandracluster/storageupsize/sts_test.go @@ -0,0 +1,264 @@ +package storageupsize + +import ( + "archive/zip" + "bytes" + "context" + "encoding/base64" + "errors" + "testing" + + "github.com/banzaicloud/k8s-objectmatcher/patch" + v2 "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/consts" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" + "github.com/cscetbon/casskop/controllers/cassandracluster/view/stub" + json "github.com/json-iterator/go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + "sigs.k8s.io/controller-runtime/pkg/client/interceptor" +) + +func Test_applyPVCModification(t *testing.T) { + + const OldStsKey = "banzaicloud.com/last-applied" + const InitialCapacity = "5Gi" + const CapacityAfterUpsize = "10Gi" + getStsBeforeChange := func() *appsv1.StatefulSet { + return &appsv1.StatefulSet{Spec: appsv1.StatefulSetSpec{VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, InitialCapacity), + }}} + } + + t.Run("no data pvc - error expected", func(t *testing.T) { + statefulSet := &appsv1.StatefulSet{ObjectMeta: metav1.ObjectMeta{Name: "dc1-rack1"}} + + err := applyPVCModification(statefulSet, resource.MustParse(CapacityAfterUpsize)) + + assert.EqualError(t, err, "no data pvc found in statefulSet dc1-rack1") + }) + + t.Run("no old sts - should just apply new capacity", func(t *testing.T) { + statefulSet := getStsBeforeChange() + + err := applyPVCModification(statefulSet, resource.MustParse(CapacityAfterUpsize)) + + assert.NoError(t, err) + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + statefulSet.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + assert.Empty(t, statefulSet.Annotations[OldStsKey]) + }) + + t.Run("old sts malformed - should just apply new capacity", func(t *testing.T) { + statefulSet := getStsBeforeChange() + statefulSet.Annotations = map[string]string{ + OldStsKey: "malformed-annotation", + } + + err := applyPVCModification(statefulSet, resource.MustParse(CapacityAfterUpsize)) + + assert.NoError(t, err) + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + statefulSet.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + assert.Equal(t, "malformed-annotation", statefulSet.Annotations[OldStsKey]) + }) + + t.Run("old sts without data pvc - should just apply new capacity", func(t *testing.T) { + oldStsWithoutDataPvc := string(toJson(t, &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: "dc1-rack1"}, + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc("some-other-pvc1", "1Gi"), + pvc("some-other-pvc2", "1Gi"), + }, + }, + })) + statefulSet := getStsBeforeChange() + statefulSet.Annotations = map[string]string{ + OldStsKey: oldStsWithoutDataPvc, + } + + err := applyPVCModification(statefulSet, resource.MustParse(CapacityAfterUpsize)) + + assert.NoError(t, err) + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + statefulSet.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + assert.Equal(t, oldStsWithoutDataPvc, unzip(statefulSet.Annotations[OldStsKey])) + }) + + t.Run("old sts exists - should apply new capacity to current AND old spec", func(t *testing.T) { + statefulSet := getStsBeforeChange() + statefulSet.Annotations = map[string]string{ + OldStsKey: string(toJson(t, statefulSet)), + } + + err := applyPVCModification(statefulSet, resource.MustParse(CapacityAfterUpsize)) + + assert.NoError(t, err) + assert.Equal(t, resource.MustParse(CapacityAfterUpsize), + statefulSet.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + assert.Equal(t, string(toJson(t, removeAnnotations(statefulSet.DeepCopy()))), unzip(statefulSet.Annotations[OldStsKey])) + }) +} + +func Test_recreateStatefulSetWithNewCapacity(t *testing.T) { + + t.Run("statefulSet already exists - no op", func(t *testing.T) { + rack := stub.RackView{ + LivingStatefulSetStub: &appsv1.StatefulSet{}, + } + + result := recreateStatefulSetWithNewCapacity(testCtx, rack, resource.MustParse("15Gi"), nil) + + assert.False(t, result.HasError()) + assert.NoError(t, result.Error()) + assert.False(t, result.ShouldBreakReconcileLoop()) + }) + + t.Run("statefulSet snapshot not exist - error", func(t *testing.T) { + rack := stub.RackView{ + RackStatusStub: &v2.CassandraRackStatus{ + StatefulSetSnapshotBeforeStorageResize: "", + }, + } + + result := recreateStatefulSetWithNewCapacity(testCtx, rack, resource.MustParse("15Gi"), nil) + + assert.True(t, result.HasError()) + assert.Contains(t, result.Error().Error(), "cannot unmarshall snapshotted statefulSet for storage upsize") + assert.True(t, result.ShouldBreakReconcileLoop()) + }) + + t.Run("cannot find data PVC - error", func(t *testing.T) { + stsSnapshot := string(toJson(t, &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: "dc1-rack1", Namespace: "default"}, + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc("some other pvc", "9Gi"), + }, + }, + })) + rack := stub.RackView{ + RackStatusStub: &v2.CassandraRackStatus{ + StatefulSetSnapshotBeforeStorageResize: stsSnapshot, + }, + } + + result := recreateStatefulSetWithNewCapacity(testCtx, rack, resource.MustParse("15Gi"), nil) + + assert.True(t, result.HasError()) + assert.Contains(t, result.Error().Error(), "no data pvc found in statefulSet dc1-rack1") + assert.True(t, result.ShouldBreakReconcileLoop()) + }) + + t.Run("statefulSet created successfully - break loop, sts should be created with proper capacity", func(t *testing.T) { + stsSnapshot := string(toJson(t, &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: "dc1-rack1", Namespace: "default"}, + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, "9Gi"), + }, + }, + })) + rack := stub.RackView{ + RackStatusStub: &v2.CassandraRackStatus{ + StatefulSetSnapshotBeforeStorageResize: stsSnapshot, + }, + } + cl := fake.NewClientBuilder().Build() + + result := recreateStatefulSetWithNewCapacity(testCtx, rack, resource.MustParse("15Gi"), sts.NewClient(cl)) + + assert.False(t, result.HasError()) + assert.NoError(t, result.Error()) + assert.True(t, result.ShouldBreakReconcileLoop()) + createdSts := &appsv1.StatefulSet{} + assert.NoError(t, cl.Get(testCtx, types.NamespacedName{Namespace: "default", Name: "dc1-rack1"}, createdSts)) + assert.Equal(t, resource.MustParse("15Gi"), + createdSts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests[corev1.ResourceStorage]) + }) + + t.Run("statefulSet creation fails - error", func(t *testing.T) { + stsSnapshot := string(toJson(t, &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: "dc1-rack1", Namespace: "default"}, + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + pvc(consts.DataPVCName, "9Gi"), + }, + }, + })) + rack := stub.RackView{ + RackStatusStub: &v2.CassandraRackStatus{ + StatefulSetSnapshotBeforeStorageResize: stsSnapshot, + }, + } + cl := interceptor.NewClient(fake.NewClientBuilder().Build(), interceptor.Funcs{ + Create: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.CreateOption) error { + return errors.New("creation failed") + }, + }) + + result := recreateStatefulSetWithNewCapacity(testCtx, rack, resource.MustParse("15Gi"), sts.NewClient(cl)) + + assert.True(t, result.HasError()) + assert.Contains(t, result.Error().Error(), "creation failed") + assert.True(t, result.ShouldBreakReconcileLoop()) + assert.True(t, apierrors.IsNotFound(cl.Get( + testCtx, + types.NamespacedName{Namespace: "default", Name: "dc1-rack1"}, + &appsv1.StatefulSet{}, + ))) + }) +} + +func removeAnnotations(sts *appsv1.StatefulSet) *appsv1.StatefulSet { + sts.Annotations = map[string]string{} + return sts +} + +func toJson(t *testing.T, sts *appsv1.StatefulSet) []byte { + marshalled, err := json.ConfigCompatibleWithStandardLibrary.Marshal(sts) + require.NoError(t, err) + out, _, err := patch.DeleteNullInJson(marshalled) + require.NoError(t, err) + return out +} + +func unzip(in string) string { + decoded, err := base64.StdEncoding.DecodeString(in) + if err != nil { + return in + } + + reader, err := zip.NewReader(bytes.NewReader(decoded), int64(len(decoded))) + if err != nil { + return in + } + + if len(reader.File) == 0 { + return in + } + + file := reader.File[0] + rc, err := file.Open() + if err != nil { + return in + } + defer rc.Close() + + buf := new(bytes.Buffer) + _, err = buf.ReadFrom(rc) + if err != nil { + return in + } + + return buf.String() +} diff --git a/controllers/cassandracluster/storageupsize_test.go b/controllers/cassandracluster/storageupsize_test.go new file mode 100644 index 00000000..7c95c652 --- /dev/null +++ b/controllers/cassandracluster/storageupsize_test.go @@ -0,0 +1,291 @@ +package cassandracluster + +import ( + "context" + "fmt" + "testing" + + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/pkg/k8s" + "github.com/jarcoal/httpmock" + "github.com/stretchr/testify/assert" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +func TestStorageUpsize(t *testing.T) { + overrideDelayWaitWithNoDelay() + defer restoreDefaultDelayWait() + + const InitialCapacity = "3Gi" + const NewCapacity = "10Gi" + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + assert := assert.New(t) + + // setup cluster + rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-2racks-with-storage.yaml") + assert.Equal(int32(3), rcc.cc.Spec.NodesPerRacks) + + cassandraCluster := rcc.cc.DeepCopy() + datacenters := cassandraCluster.Spec.Topology.DC + assert.Equal(1, len(datacenters)) + assert.Equal(2, len(datacenters[0].Rack)) + assertClusterInitialized(assert, rcc) + + // check initial sts capacity + for _, dc := range datacenters { + for _, rack := range dc.Rack { + assertStsCapacity(assert, rcc, dc, rack.Name, InitialCapacity) + } + } + + // simulate all PVCs ready + simulatePVCsReadyForWholeCluster(assert, rcc, datacenters, InitialCapacity) + + // mock no joining nodes + dc := datacenters[0] + sts1Name := cassandraCluster.Name + fmt.Sprintf("-%s-%s", dc.Name, dc.Rack[0].Name) + firstPod := podHost(sts1Name, 0, rcc) + registerJolokiaOperationJoiningNodes(firstPod, 0) + + // request storage upsize + cassandraCluster.Spec.DataCapacity = NewCapacity + assert.NoError(rcc.Client.Update(context.TODO(), cassandraCluster)) + + // reconcile storage upsize rack by rack + for _, currentRack := range cassandraCluster.Spec.Topology.DC[0].Rack { + currentDcRackName := cassandraCluster.GetDCRackName(dc.Name, currentRack.Name) + + // should start on current rack + reconcileValidation(t, rcc, *req) + assert.Equal(NewCapacity, cassandraCluster.Spec.DataCapacity) + assertUpsizeInProgress(assert, rcc, currentDcRackName) + + // should remove current rack orphan option + reconcileValidation(t, rcc, *req) + assertStsNotFound(assert, rcc, dc, currentRack.Name) + + // should recreate current rack with new capacity + reconcileValidation(t, rcc, *req) + assertStsCapacity(assert, rcc, dc, currentRack.Name, NewCapacity) + + // simulate current rack sts is ready + simulateStsIsReady(assert, rcc, dc, currentRack.Name) + + // should update current rack pvcs to new capacity + reconcileValidation(t, rcc, *req) + assertRackPVCs(assert, rcc, dc, currentRack.Name, NewCapacity) + + // should do nothing till current rack pvcs are not resized + reconcileValidation(t, rcc, *req) + assertUpsizeInProgress(assert, rcc, currentDcRackName) + + simulateCurrentRackPvcsAreUpsizedByProvisioner(assert, rcc, dc, currentRack.Name) + + // should finalize storage upsize on current rack + reconcileValidation(t, rcc, *req) + assertUpsizeDoneOnRack(assert, rcc, currentDcRackName) + } + + // should finalize storage upsize globally + reconcileValidation(t, rcc, *req) + assertUpsizeDoneGlobally(assert, rcc) +} + +func simulatePVCsReadyForWholeCluster(assert *assert.Assertions, rcc *CassandraClusterReconciler, datacenters api.DCSlice, expectedCapacity string) { + for _, dc := range datacenters { + for _, rack := range dc.Rack { + stfsName := rcc.cc.Name + fmt.Sprintf("-%s-%s", dc.Name, rack.Name) + for i := int32(0); i < rcc.cc.Spec.NodesPerRacks; i++ { + pvcName := fmt.Sprintf("%s-%s-%d", "data", stfsName, i) + pvc := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + Namespace: rcc.cc.Namespace, + Labels: k8s.LabelsForCassandraDCRack(rcc.cc, dc.Name, rack.Name), + }, + Spec: corev1.PersistentVolumeClaimSpec{ + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + "storage": resource.MustParse(expectedCapacity), + }, + }, + }, + } + assert.NoError(rcc.Client.Create(context.TODO(), &pvc)) + } + } + } +} + +func assertStsNotFound(assert *assert.Assertions, rcc *CassandraClusterReconciler, dc api.DC, rackName string) { + _, err := getSts(rcc, dc, rackName) + assert.True(apierrors.IsNotFound(err)) +} + +func assertStsCapacity(assert *assert.Assertions, rcc *CassandraClusterReconciler, dc api.DC, rackName string, expectedCapacity string) { + currentRackSts, err := getSts(rcc, dc, rackName) + assert.NoError(err) + assert.Equal(resource.MustParse(expectedCapacity), currentRackSts.Spec.VolumeClaimTemplates[0].Spec.Resources.Requests["storage"]) +} + +func simulateStsIsReady(assert *assert.Assertions, rcc *CassandraClusterReconciler, dc api.DC, rackName string) { + currentRackSts, err := getSts(rcc, dc, rackName) + assert.NoError(err) + currentRackSts.Status.Replicas = *currentRackSts.Spec.Replicas + currentRackSts.Status.ReadyReplicas = *currentRackSts.Spec.Replicas + assert.NoError(rcc.Client.Status().Update(ctx, currentRackSts)) +} + +func getSts(rcc *CassandraClusterReconciler, dc api.DC, rackName string) (*appsv1.StatefulSet, error) { + var currentRackSts = &appsv1.StatefulSet{} + currentRackStsName := rcc.cc.Name + fmt.Sprintf("-%s-%s", dc.Name, rackName) + currentRackStsNamespacedName := types.NamespacedName{Namespace: rcc.cc.Namespace, Name: currentRackStsName} + err := rcc.Client.Get(context.TODO(), currentRackStsNamespacedName, currentRackSts) + return currentRackSts, err +} + +func assertRackPVCs(assert *assert.Assertions, rcc *CassandraClusterReconciler, dc api.DC, rackName string, expectedCapacity string) { + currentRackPvcs, err := rcc.ListPVC(ctx, rcc.cc.Namespace, k8s.LabelsForCassandraDCRack(rcc.cc, dc.Name, rackName)) + assert.NoError(err) + assert.Equal(3, len(currentRackPvcs.Items)) + for _, pvc := range currentRackPvcs.Items { + assert.Equal(resource.MustParse(expectedCapacity), pvc.Spec.Resources.Requests["storage"]) + } +} + +func simulateCurrentRackPvcsAreUpsizedByProvisioner(assert *assert.Assertions, rcc *CassandraClusterReconciler, dc api.DC, rackName string) { + currentRackPvcs, err := rcc.ListPVC(ctx, rcc.cc.Namespace, k8s.LabelsForCassandraDCRack(rcc.cc, dc.Name, rackName)) + assert.NoError(err) + for _, pvc := range currentRackPvcs.Items { + if pvc.Status.Capacity == nil { + pvc.Status.Capacity = corev1.ResourceList{} + } + pvc.Status.Capacity["storage"] = pvc.Spec.Resources.Requests["storage"] + assert.NoError(rcc.Client.Status().Update(context.TODO(), &pvc)) + } +} + +func assertClusterInitialized(assert *assert.Assertions, rcc *CassandraClusterReconciler) { + assertClusterStatusPhase(assert, rcc, api.ClusterPhaseRunning) + assertClusterStatusLastAction(assert, rcc, api.ClusterPhaseInitial, api.StatusDone) + for dcRackName := range rcc.cc.Status.CassandraRackStatus { + assertRackStatusPhase(assert, rcc, dcRackName, api.ClusterPhaseRunning) + assertRackStatusLastAction(assert, rcc, dcRackName, api.ClusterPhaseInitial, api.StatusDone) + } +} + +func assertUpsizeInProgress(assert *assert.Assertions, rcc *CassandraClusterReconciler, dcRackName string) { + assert.NotEmpty(rcc.cc.Status.CassandraRackStatus[dcRackName].StatefulSetSnapshotBeforeStorageResize) + + assertClusterStatusPhase(assert, rcc, api.ClusterPhasePending) + assertClusterStatusLastAction(assert, rcc, api.ActionStorageUpsize, api.StatusOngoing) + assertRackStatusPhase(assert, rcc, dcRackName, api.ClusterPhasePending) + assertRackStatusLastAction(assert, rcc, dcRackName, api.ActionStorageUpsize, api.StatusOngoing) +} + +func assertUpsizeDoneOnRack(assert *assert.Assertions, rcc *CassandraClusterReconciler, dcRackName string) { + assertClusterStatusPhase(assert, rcc, api.ClusterPhasePending) + assertClusterStatusLastAction(assert, rcc, api.ActionStorageUpsize, api.StatusOngoing) + assertRackStatusPhase(assert, rcc, dcRackName, api.ClusterPhasePending) + assertRackStatusLastAction(assert, rcc, dcRackName, api.ActionStorageUpsize, api.StatusDone) +} + +func assertUpsizeDoneGlobally(assert *assert.Assertions, rcc *CassandraClusterReconciler) { + assertClusterStatusPhase(assert, rcc, api.ClusterPhaseRunning) + assertClusterStatusLastAction(assert, rcc, api.ActionStorageUpsize, api.StatusDone) + for dcRackName := range rcc.cc.Status.CassandraRackStatus { + assertRackStatusPhase(assert, rcc, dcRackName, api.ClusterPhaseRunning) + assertRackStatusLastAction(assert, rcc, dcRackName, api.ActionStorageUpsize, api.StatusDone) + } +} + +func TestStorageUpsizeDoesNotStartWhenOtherOperationInProgress(t *testing.T) { + + overrideDelayWaitWithNoDelay() + defer restoreDefaultDelayWait() + + const InitialCapacity = "3Gi" + const NewCapacity = "10Gi" + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + assert := assert.New(t) + + // setup cluster + rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") + assert.Equal(int32(3), rcc.cc.Spec.NodesPerRacks) + + cassandraCluster := rcc.cc.DeepCopy() + datacenters := cassandraCluster.Spec.Topology.DC + assert.Equal(1, len(datacenters)) + assert.Equal(1, len(datacenters[0].Rack)) + assertClusterInitialized(assert, rcc) + + // check initial sts capacity + dc := datacenters[0] + rack := dc.Rack[0] + assertStsCapacity(assert, rcc, dc, rack.Name, InitialCapacity) + + // mock no joining nodes + sts1Name := cassandraCluster.Name + fmt.Sprintf("-%s-%s", dc.Name, rack.Name) + firstPod := podHost(sts1Name, 0, rcc) + registerJolokiaOperationJoiningNodes(firstPod, 0) + + // request scale out - assert operation started + cassandraCluster.Spec.NodesPerRacks = 4 + rcc.Client.Update(context.TODO(), cassandraCluster) + + reconcileValidation(t, rcc, *req) + assert.GreaterOrEqual(jolokiaCallsCount(firstPod), 0) + assertStatefulsetReplicas(ctx, t, rcc, 4, cassandraCluster.Namespace, sts1Name) + assertClusterStatusLastAction(assert, rcc, api.ActionScaleUp, api.StatusOngoing) + assertRackStatusLastAction(assert, rcc, "dc1-rack1", api.ActionScaleUp, api.StatusOngoing) + + // request storage upsize while scale-out ongoing - assert new capacity accepted but upsize is not started and sts capacity untouched + cassandraCluster = rcc.cc.DeepCopy() + cassandraCluster.Spec.DataCapacity = NewCapacity + assert.NoError(rcc.Client.Update(context.TODO(), cassandraCluster)) + + reconcileValidation(t, rcc, *req) + + cassandraCluster = rcc.cc.DeepCopy() + assert.Equal(NewCapacity, cassandraCluster.Spec.DataCapacity) + assertStsCapacity(assert, rcc, dc, rack.Name, InitialCapacity) + + assertStatefulsetReplicas(ctx, t, rcc, 4, cassandraCluster.Namespace, sts1Name) + assertClusterStatusLastAction(assert, rcc, api.ActionScaleUp, api.StatusOngoing) + assertRackStatusLastAction(assert, rcc, "dc1-rack1", api.ActionScaleUp, api.StatusOngoing) + + // scale-out finishes - capacity still unchanged + simulateNewPodsReady(t, rcc, sts1Name, dc, 3, 4) + registerJolokiaOperationJoiningNodes(firstPod, 0) + + reconcileValidation(t, rcc, *req) + + assert.GreaterOrEqual(jolokiaCallsCount(firstPod), 1) + assertClusterStatusPhase(assert, rcc, api.ClusterPhaseRunning) + assertRackStatusPhase(assert, rcc, "dc1-rack1", api.ClusterPhaseRunning) + assertClusterStatusLastAction(assert, rcc, api.ActionScaleUp, api.StatusDone) + assertRackStatusLastAction(assert, rcc, "dc1-rack1", api.ActionScaleUp, api.StatusDone) + assertStsCapacity(assert, rcc, dc, rack.Name, InitialCapacity) + + // next reconcile should initialize storage upsize action + simulatePVCsReadyForWholeCluster(assert, rcc, datacenters, InitialCapacity) + + reconcileValidation(t, rcc, *req) + + assertUpsizeInProgress(assert, rcc, cassandraCluster.GetDCRackName(dc.Name, rack.Name)) + assertStsCapacity(assert, rcc, dc, rack.Name, InitialCapacity) + + // next two reconciles do: 1. sts removal,2. sts re-creation with new capacity + reconcileValidation(t, rcc, *req) + reconcileValidation(t, rcc, *req) + assertStsCapacity(assert, rcc, dc, rack.Name, NewCapacity) +} diff --git a/controllers/cassandracluster/sts/sts_client.go b/controllers/cassandracluster/sts/sts_client.go new file mode 100644 index 00000000..d138319b --- /dev/null +++ b/controllers/cassandracluster/sts/sts_client.go @@ -0,0 +1,47 @@ +package sts + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func NewClient(client client.Client) StsClient { + return &stsClient{client: client} +} + +type StsClient interface { + CreateStatefulSet(ctx context.Context, statefulSet *appsv1.StatefulSet) error + DeleteStatefulSetWithOrphanOption(ctx context.Context, namespace, name string) error +} + +var _ StsClient = (*stsClient)(nil) + +type stsClient struct { + client client.Client +} + +func (c *stsClient) CreateStatefulSet(ctx context.Context, statefulSet *appsv1.StatefulSet) error { + err := c.client.Create(ctx, statefulSet) + if err != nil { + if apierrors.IsAlreadyExists(err) { + return fmt.Errorf("statefulset already exists: %v", err) + } + return fmt.Errorf("failed to create cassandra statefulset: %v", err) + } + return nil +} + +func (c *stsClient) DeleteStatefulSetWithOrphanOption(ctx context.Context, namespace, name string) error { + ss := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + return c.client.Delete(ctx, ss, client.PropagationPolicy(metav1.DeletePropagationOrphan)) +} diff --git a/controllers/cassandracluster/sts/sts_utils.go b/controllers/cassandracluster/sts/sts_utils.go new file mode 100644 index 00000000..0fbcb21d --- /dev/null +++ b/controllers/cassandracluster/sts/sts_utils.go @@ -0,0 +1,11 @@ +package sts + +import appsv1 "k8s.io/api/apps/v1" + +func IsStatefulSetNotReady(statefulSet *appsv1.StatefulSet) bool { + return !IsStatefulSetReady(statefulSet) +} + +func IsStatefulSetReady(statefulSet *appsv1.StatefulSet) bool { + return statefulSet.Status.ReadyReplicas == *statefulSet.Spec.Replicas +} diff --git a/controllers/cassandracluster/testdata/cassandracluster-2racks-with-storage.yaml b/controllers/cassandracluster/testdata/cassandracluster-2racks-with-storage.yaml new file mode 100644 index 00000000..5d425816 --- /dev/null +++ b/controllers/cassandracluster/testdata/cassandracluster-2racks-with-storage.yaml @@ -0,0 +1,23 @@ +apiVersion: db.orange.com/v2 +kind: CassandraCluster +metadata: + name: cassandra-demo + labels: + cluster: k8s.pic + namespace: ns +spec: + dataCapacity: 3Gi + nodesPerRacks: 3 + deletePVC: true + autoPilot: true + resources: + limits: &limits + cpu: 1 + memory: 2Gi + requests: *limits + topology: + dc: + - name: dc1 + rack: + - name: rack1 + - name: rack2 diff --git a/controllers/cassandracluster/view/rack_view.go b/controllers/cassandracluster/view/rack_view.go new file mode 100644 index 00000000..bef287f4 --- /dev/null +++ b/controllers/cassandracluster/view/rack_view.go @@ -0,0 +1,19 @@ +package view + +import ( + api "github.com/cscetbon/casskop/api/v2" + "github.com/sirupsen/logrus" + appsv1 "k8s.io/api/apps/v1" +) + +type RackView interface { + ClusterName() string + DcName() api.DcName + RackName() api.RackName + DcRackName() api.DcRackName + RackStatus() *api.CassandraRackStatus + LivingStatefulSet() *appsv1.StatefulSet + IsStatefulSetAliveNow() bool + GetLabelsForCassandraDCRack(cc *api.CassandraCluster) map[string]string + Log() *logrus.Entry +} diff --git a/controllers/cassandracluster/view/stub/rack_view_stub.go b/controllers/cassandracluster/view/stub/rack_view_stub.go new file mode 100644 index 00000000..020db268 --- /dev/null +++ b/controllers/cassandracluster/view/stub/rack_view_stub.go @@ -0,0 +1,54 @@ +package stub + +import ( + api "github.com/cscetbon/casskop/api/v2" + "github.com/cscetbon/casskop/controllers/cassandracluster/view" + "github.com/cscetbon/casskop/pkg/k8s" + "github.com/sirupsen/logrus" + appsv1 "k8s.io/api/apps/v1" +) + +type RackView struct { + ClusterNameStub string + CompleteRackNameStub api.CompleteRackName + RackStatusStub *api.CassandraRackStatus + LivingStatefulSetStub *appsv1.StatefulSet +} + +var _ view.RackView = RackView{} + +func (v RackView) ClusterName() string { + return v.ClusterNameStub +} + +func (v RackView) DcName() api.DcName { + return v.CompleteRackNameStub.DcName +} + +func (v RackView) RackName() api.RackName { + return v.CompleteRackNameStub.RackName +} + +func (v RackView) DcRackName() api.DcRackName { + return v.CompleteRackNameStub.DcRackName +} + +func (v RackView) RackStatus() *api.CassandraRackStatus { + return v.RackStatusStub +} + +func (v RackView) LivingStatefulSet() *appsv1.StatefulSet { + return v.LivingStatefulSetStub +} + +func (v RackView) IsStatefulSetAliveNow() bool { + return v.LivingStatefulSetStub != nil +} + +func (v RackView) GetLabelsForCassandraDCRack(cc *api.CassandraCluster) map[string]string { + return k8s.LabelsForCassandraDCRack(cc, v.DcName().String(), v.RackName().String()) +} + +func (v RackView) Log() *logrus.Entry { + return logrus.WithFields(logrus.Fields{}) +} diff --git a/go.mod b/go.mod index a33dc2aa..a6db927f 100644 --- a/go.mod +++ b/go.mod @@ -19,7 +19,7 @@ require ( github.com/r3labs/diff v0.0.0-20190801153147-a71de73c46ad github.com/robfig/cron/v3 v3.0.1 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.11.1 github.com/swarvanusg/go_jolokia v0.0.0-20190213021437-3cd2b3fc4f36 github.com/thoas/go-funk v0.4.0 github.com/zput/zxcTool v1.3.6 @@ -69,6 +69,7 @@ require ( github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/stretchr/objx v0.5.2 // indirect github.com/x448/float16 v0.8.4 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect diff --git a/go.sum b/go.sum index 31ece8ac..b1753c1b 100644 --- a/go.sum +++ b/go.sum @@ -232,12 +232,16 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/ssdb/gossdb v0.0.0-20180723034631-88f6b59b84ec/go.mod h1:QBvMkMya+gXctz3kmljlUCu/yB3GZ6oee+dUozsezQE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/swarvanusg/go_jolokia v0.0.0-20190213021437-3cd2b3fc4f36 h1:ePA8jiIU9AWpv9jQFjA3iokytYoqLs/QPgKXHl2CGAI= github.com/swarvanusg/go_jolokia v0.0.0-20190213021437-3cd2b3fc4f36/go.mod h1:L3LcER9PmnZhT1zT0ZMiJP7S8SKOKa5HefyyOj/5byg= github.com/syndtr/goleveldb v0.0.0-20181127023241-353a9fca669c/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0= diff --git a/main.go b/main.go index 26b8b54c..5f07fd18 100644 --- a/main.go +++ b/main.go @@ -43,6 +43,8 @@ import ( api "github.com/cscetbon/casskop/api/v2" "github.com/cscetbon/casskop/controllers/cassandrabackup" "github.com/cscetbon/casskop/controllers/cassandracluster" + "github.com/cscetbon/casskop/controllers/cassandracluster/storagestateclient" + "github.com/cscetbon/casskop/controllers/cassandracluster/sts" "github.com/cscetbon/casskop/controllers/cassandrarestore" "github.com/operator-framework/operator-lib/leader" "github.com/sirupsen/logrus" @@ -197,9 +199,11 @@ func main() { os.Exit(1) } if err = (&cassandracluster.CassandraClusterReconciler{ - Client: mgr.GetClient(), - Log: ctrl.Log.WithName("controllers").WithName("CassandraCluster"), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + StorageStateClient: storagestateclient.New(mgr.GetClient()), + StsClient: sts.NewClient(mgr.GetClient()), + Log: ctrl.Log.WithName("controllers").WithName("CassandraCluster"), + Scheme: mgr.GetScheme(), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "CassandraCluster") os.Exit(1) diff --git a/pkg/k8s/util.go b/pkg/k8s/util.go index d7a16723..3188e1e9 100644 --- a/pkg/k8s/util.go +++ b/pkg/k8s/util.go @@ -35,6 +35,10 @@ func AddOwnerRefToObject(o metav1.Object, r metav1.OwnerReference) { o.SetOwnerReferences(append(o.GetOwnerReferences(), r)) } +func LabelsForCassandraDCRackStrongTypes(cc *api.CassandraCluster, dcName api.DcName, rackName api.RackName) map[string]string { + return LabelsForCassandraDCRack(cc, dcName.String(), rackName.String()) +} + // labelsForCassandra returns the labels for selecting the resources // belonging to the given name. func LabelsForCassandraDCRack(cc *api.CassandraCluster, dcName string, rackName string) map[string]string { diff --git a/test/kuttl/storage-upsize/00-assert.yaml b/test/kuttl/storage-upsize/00-assert.yaml new file mode 100644 index 00000000..0cd35503 --- /dev/null +++ b/test/kuttl/storage-upsize/00-assert.yaml @@ -0,0 +1,37 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: cassandra-e2e-dc1-rack1 +status: + currentReplicas: 1 + replicas: 1 +--- +apiVersion: db.orange.com/v2 +kind: CassandraCluster +metadata: + name: cassandra-e2e +status: + cassandraRackStatus: + dc1-rack1: + cassandraLastAction: + name: Initializing + status: Done + phase: Running + lastClusterAction: Initializing + lastClusterActionStatus: Done + phase: Running +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: data-cassandra-e2e-dc1-rack1-0 +spec: + resources: + requests: + storage: 1Gi + storageClassName: openebs-lvm-sc + volumeMode: Filesystem +status: + capacity: + storage: 1Gi + phase: Bound diff --git a/test/kuttl/storage-upsize/00-createCluster.yaml b/test/kuttl/storage-upsize/00-createCluster.yaml new file mode 100644 index 00000000..62ce8239 --- /dev/null +++ b/test/kuttl/storage-upsize/00-createCluster.yaml @@ -0,0 +1,44 @@ +# +# requirements: +# - Cluster nodes should support LVM +# - OpenEBS LVM Operator should be installed and configured in the cluster. +# kubectl apply -f https://openebs.github.io/charts/lvm-operator.yaml +# +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: openebs-lvm-sc +allowVolumeExpansion: true +parameters: + storage: lvm + volgroup: lvmvg +provisioner: local.csi.openebs.io +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer +--- +apiVersion: db.orange.com/v2 +kind: CassandraCluster +metadata: + name: cassandra-e2e +spec: + dataCapacity: 1Gi + dataStorageClass: openebs-lvm-sc + nodesPerRacks: 1 + cassandraImage: cassandra:4.0.2 + serverVersion: 4.0.0 + deletePVC: true + autoPilot: true + backRestSidecar: + resources: + limits: + memory: 128Mi + cpu: 100m + resources: + limits: + cpu: 200m + memory: 512Mi + topology: + dc: + - name: dc1 + rack: + - name: rack1 \ No newline at end of file diff --git a/test/kuttl/storage-upsize/01-assert.yaml b/test/kuttl/storage-upsize/01-assert.yaml new file mode 100644 index 00000000..21dc7cfa --- /dev/null +++ b/test/kuttl/storage-upsize/01-assert.yaml @@ -0,0 +1,35 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: cassandra-e2e-dc1-rack1 +status: + currentReplicas: 1 + replicas: 1 +--- +apiVersion: db.orange.com/v2 +kind: CassandraCluster +metadata: + name: cassandra-e2e +status: + cassandraRackStatus: + dc1-rack1: + cassandraLastAction: + name: StorageUpsize + status: Done + phase: Running + lastClusterAction: StorageUpsize + lastClusterActionStatus: Done + phase: Running +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: data-cassandra-e2e-dc1-rack1-0 +spec: + resources: + requests: + storage: 2Gi +status: + capacity: + storage: 2Gi + phase: Bound diff --git a/test/kuttl/storage-upsize/01-storageUpsize.yaml b/test/kuttl/storage-upsize/01-storageUpsize.yaml new file mode 100644 index 00000000..1e4d2c13 --- /dev/null +++ b/test/kuttl/storage-upsize/01-storageUpsize.yaml @@ -0,0 +1,6 @@ +apiVersion: db.orange.com/v2 +kind: CassandraCluster +metadata: + name: cassandra-e2e +spec: + dataCapacity: 2Gi diff --git a/test/kuttl/storage-upsize/openebs-lvm-on-kind.md b/test/kuttl/storage-upsize/openebs-lvm-on-kind.md new file mode 100644 index 00000000..8087ac45 --- /dev/null +++ b/test/kuttl/storage-upsize/openebs-lvm-on-kind.md @@ -0,0 +1,391 @@ +# Setting up OpenEBS LVM LocalPV on kind for Online PVC Expansion + +This guide explains how to set up LVM (Logical Volume Manager) inside kind (Kubernetes in Docker) containers to enable online PVC expansion testing with OpenEBS LVM LocalPV. + +## Overview + +Online PVC expansion allows you to resize persistent volume claims without restarting pods. This is useful for testing applications that need to handle storage expansion dynamically. + +In casskop domain it is used to test the storage upsize feature. +This feature requires online PVC expansion capability from the underlying storage provider which is available on all major cloud providers. + +## Prerequisites + +- Docker installed and running +- kind installed (`go install sigs.k8s.io/kind@latest` or via package manager) +- kubectl installed and configured +- sudo access (for creating directories) + +## Step 1: Create a kind Cluster with Extra Mounts + +First, create a kind configuration file that mounts extra storage paths: + +```yaml +# kind-lvm-config.yaml +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: +- role: control-plane +- role: worker + extraMounts: + - hostPath: /tmp/kind-worker1-lvm + containerPath: /mnt/disks +- role: worker + extraMounts: + - hostPath: /tmp/kind-worker2-lvm + containerPath: /mnt/disks +``` + +Create the necessary directories and the cluster: + +```bash +sudo mkdir -p /tmp/kind-worker1-lvm /tmp/kind-worker2-lvm +kind create cluster --config kind-lvm-config.yaml --name lvm-test +``` + +## Step 2: Set up LVM Inside Each kind Worker Node + +For each worker node, you need to install LVM tools and create a volume group. + +### Manual Setup + +```bash +# Get the worker node names +kubectl get nodes + +# For the first worker node +docker exec -it kind-lvm-test-worker bash + +# Inside the container, run: +apt-get update +apt-get install -y lvm2 + +# Create a loop device (simulating a physical disk) +truncate -s 10G /mnt/disks/disk.img +losetup -f /mnt/disks/disk.img +LOOP_DEVICE=$(losetup -j /mnt/disks/disk.img | cut -d: -f1) +echo "Loop device: $LOOP_DEVICE" + +# Create LVM physical volume +pvcreate $LOOP_DEVICE + +# Create LVM volume group (name must match StorageClass configuration) +vgcreate lvmvg $LOOP_DEVICE + +# Verify the setup +vgs +pvs +lvs + +# Exit the container +exit +``` + +Repeat the same process for the second worker node: + +```bash +docker exec -it kind-lvm-test-worker2 bash +# ... repeat the same commands above ... +exit +``` + +### Automated Setup Script + +Alternatively, use this script to automate the LVM setup: + +```bash +#!/bin/bash +# setup-lvm-in-kind.sh + +CLUSTER_NAME=${1:-lvm-test} +WORKER_NODES=$(kind get nodes --name $CLUSTER_NAME | grep worker) + +for NODE in $WORKER_NODES; do + echo "Setting up LVM on $NODE..." + + docker exec $NODE bash -c ' + apt-get update -qq && apt-get install -y -qq lvm2 > /dev/null 2>&1 + mkdir -p /mnt/disks + truncate -s 10G /mnt/disks/disk.img + LOOP_DEVICE=$(losetup -f) + losetup $LOOP_DEVICE /mnt/disks/disk.img + pvcreate $LOOP_DEVICE + vgcreate lvmvg $LOOP_DEVICE + echo "LVM setup complete on $(hostname)" + vgs + ' +done + +echo "LVM setup complete on all worker nodes!" +``` + +Make it executable and run: + +```bash +chmod +x setup-lvm-in-kind.sh +./setup-lvm-in-kind.sh lvm-test +``` + +## Step 3: Install OpenEBS LVM LocalPV + +Install the OpenEBS LVM operator: + +```bash +kubectl apply -f https://openebs.github.io/charts/lvm-operator.yaml +``` + +Wait for all OpenEBS pods to be ready: + +```bash +kubectl get pods -n openebs -w +``` + +You should see pods like: +- `openebs-lvm-localpv-controller` +- `openebs-lvm-localpv-node` (one per worker node) + +## Step 4: Create a StorageClass with Volume Expansion Enabled + +Create a StorageClass that uses the LVM volume group: + +```yaml +# openebs-lvm-sc.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: openebs-lvm-sc +provisioner: local.csi.openebs.io +parameters: + storage: "lvm" + volgroup: "lvmvg" +allowVolumeExpansion: true +volumeBindingMode: WaitForFirstConsumer +``` + +Apply the StorageClass: + +```bash +kubectl apply -f openebs-lvm-sc.yaml +``` + +**Important:** The `volgroup` parameter must match the volume group name you created in Step 2 (`lvmvg`). + +## Step 5: Create a Test PVC + +Create a PersistentVolumeClaim using the new StorageClass: + +```yaml +# test-lvm-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: lvm-pvc +spec: + storageClassName: openebs-lvm-sc + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +``` + +Apply the PVC: + +```bash +kubectl apply -f test-lvm-pvc.yaml +``` + +## Step 6: Create a Test Application + +Create a deployment that uses the PVC: + +```yaml +# test-lvm-pod.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-lvm-app +spec: + replicas: 1 + selector: + matchLabels: + app: test-lvm + template: + metadata: + labels: + app: test-lvm + spec: + containers: + - name: test-container + image: nginx + volumeMounts: + - name: data + mountPath: /data + command: ["/bin/sh"] + args: ["-c", "while true; do df -h /data; sleep 30; done"] + volumes: + - name: data + persistentVolumeClaim: + claimName: lvm-pvc +``` + +Apply the deployment: + +```bash +kubectl apply -f test-lvm-pod.yaml +``` + +Wait for the pod to be running: + +```bash +kubectl get pods -l app=test-lvm -w +``` + +## Step 7: Test Online PVC Expansion + +Now you can test the online expansion feature: + +### Check Current Size + +```bash +# Check PVC size +kubectl get pvc lvm-pvc + +# Get the pod name +POD_NAME=$(kubectl get pod -l app=test-lvm -o jsonpath='{.items[0].metadata.name}') + +# Check filesystem size inside the pod +kubectl exec $POD_NAME -- df -h /data +``` + +### Expand the PVC + +Expand the PVC while the pod is still running: + +```bash +kubectl patch pvc lvm-pvc -p '{"spec":{"resources":{"requests":{"storage":"3Gi"}}}}' +``` + +### Monitor the Expansion + +Watch the PVC status change: + +```bash +kubectl get pvc lvm-pvc -w +``` + +You should see the status transition through: +1. `Resizing` - Expansion in progress +2. `FileSystemResizePending` - Volume expanded, filesystem resize pending +3. `Bound` - Expansion complete + +### Verify the New Size + +Check that the filesystem has been resized without pod restart: + +```bash +kubectl exec $POD_NAME -- df -h /data +``` + +You should see the increased storage size reflected in the output. + +### Verify Pod Was Not Restarted + +```bash +kubectl get pod $POD_NAME -o jsonpath='{.status.containerStatuses[0].restartCount}' +``` + +This should return `0`, confirming no restart occurred. + +## Verification Commands + +Here's a quick reference of useful verification commands: + +```bash +# Check LVM setup on a worker node +docker exec kind-lvm-test-worker vgs +docker exec kind-lvm-test-worker pvs +docker exec kind-lvm-test-worker lvs + +# Check OpenEBS components +kubectl get pods -n openebs +kubectl get sc +kubectl get pvc +kubectl get pv + +# Check PVC events +kubectl describe pvc lvm-pvc + +# Watch pod logs +kubectl logs -l app=test-lvm -f +``` + +## Troubleshooting + +### PVC Stuck in Pending + +**Problem:** PVC remains in `Pending` state. + +**Solutions:** +- Verify LVM is properly set up on worker nodes: `docker exec kind-lvm-test-worker vgs` +- Check OpenEBS LVM controller logs: `kubectl logs -n openebs -l app=openebs-lvm-controller` +- Ensure volume group name in StorageClass matches: `volgroup: "lvmvg"` + +### Expansion Not Working + +**Problem:** PVC expansion is stuck or fails. + +**Solutions:** +- Check if `allowVolumeExpansion: true` is set in StorageClass +- Verify there's enough space in the volume group: `docker exec kind-lvm-test-worker vgs` +- Check OpenEBS node pod logs: `kubectl logs -n openebs -l app=openebs-lvm-node` + +### Loop Device Not Found After Restart + +**Problem:** Loop devices disappear after kind container restart. + +**Solution:** Loop devices are ephemeral in containers. After restarting kind, you'll need to re-run the LVM setup commands or script. + +## Important Notes + +1. **Ephemeral Setup:** Loop devices created in kind containers don't persist across container restarts. For permanent testing environments, consider using a VM with actual block devices. + +2. **Volume Group Naming:** The volume group name (`lvmvg`) must match exactly between: + - The LVM setup commands (`vgcreate lvmvg`) + - The StorageClass parameters (`volgroup: "lvmvg"`) + +3. **Storage Limits:** The loop device size (10G in this example) limits the total storage available for all PVCs on that node. + +4. **Production Use:** This setup is intended for development and testing only. For production, use actual block devices or cloud provider storage solutions. + +5. **Multiple Workers:** Each worker node needs its own LVM volume group setup. The automated script handles this automatically. + +## Cleanup + +To clean up the environment: + +```bash +# Delete the test resources +kubectl delete deployment test-lvm-app +kubectl delete pvc lvm-pvc +kubectl delete sc openebs-lvm-sc + +# Delete OpenEBS +kubectl delete -f https://openebs.github.io/charts/lvm-operator.yaml + +# Delete the kind cluster +kind delete cluster --name lvm-test + +# Remove temporary directories +sudo rm -rf /tmp/kind-worker1-lvm /tmp/kind-worker2-lvm +``` + +## Additional Resources + +- [OpenEBS LVM LocalPV Documentation](https://github.com/openebs/lvm-localpv) +- [Kubernetes Volume Expansion Documentation](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#expanding-persistent-volumes-claims) +- [kind Documentation](https://kind.sigs.k8s.io/) + +## License + +This documentation is provided as-is for educational and testing purposes. \ No newline at end of file diff --git a/website/docs/5_operations/1_cluster_operations.md b/website/docs/5_operations/1_cluster_operations.md index 4232ebb5..c96a0d33 100644 --- a/website/docs/5_operations/1_cluster_operations.md +++ b/website/docs/5_operations/1_cluster_operations.md @@ -11,7 +11,7 @@ Cluster Operations must only be triggered by a change made on the `CassandraClus Some updates in the `CassandraCluster` CRD object are forbidden and will be gently dismissed by CassKop: -- `spec.dataCapacity` +- `spec.dataCapacity` (volume shrinking is forbidden, but we support online volume expansion, see section StorageUpsize) - `spec.dataStorage` Some Updates in the `CassandraCluster` CRD object will trigger a rolling update of the whole cluster such as : @@ -878,6 +878,26 @@ The UpdateSeedList is done automatically by CassKop when the parameter See [ScaleUp](#scaleup) and [ScaleDown](#updatescaledown). +### StorageUpsize + +- Scope: Only the `data` PersistentVolumeClaim can be resized +- Direction: Storage upsize only; downsize operations are not supported +- Storage Class: The storage class cannot be changed +- Prerequisites: The underlying storage class must support volume expansion (allowVolumeExpansion: true) +- Operation Mode: Live resizing without pod restarts (requires CSI driver support for online expansion) +- Tested Platforms: Validated on Azure Kubernetes Service (AKS) and Google Kubernetes Engine (GKE) + +Execution Strategy: +- Storage resize is performed rack by rack to maintain cluster stability +- All pods within a single rack are resized concurrently +- The operation proceeds to the next rack only after the current rack completes successfully + +Operation Isolation: +- Storage upsize operations are executed independently from other cluster operations +- During an active storage resize, no other configuration changes are applied +- However, such changes are not reverted from CR and will be processed after the resize completes +- Conversely, when other operations are in progress, storage resize requests are queued until completion + ### CorrectCRDConfig The CRD `CassandraCluster` is used to define your cluster configuration. Some fields can't be updated in a kubernetes