w1ndy/k8s-iscsi-xfs-lvm-recover.sh

## k8s-iscsi-xfs-lvm-recover.sh
# Shutdown the Kubernetes cluster first (on every node)
systemctl stop kubelet

# Stop all docker containers (on every node)
docker stop $(docker ps -aq)

# Unmount all ISCSI disks (on every node)
mount | grep iqn
umount --all-targets /dev/sdxx  # replace sdxx with each disk

# Stop the ISCSI server (on the storage node)
systemctl stop iscsid iscsid.socket
systemctl stop targetd

# Temporarily clear target configurations (on the storage node)
targetctl clear
targetcli ls                    # make sure everything is empty

# List all logical volumes in lvm (on the storage node)
lvs

# Repair each logical volume /dev/xxx/pvc-xxx (on the storage node)
mount /dev/xxx/pvc-xxx /mnt     # mount first to recover metadata logs
umount /mnt
xfs_repair /dev/xxx/pvc-xxx     # use -L if necessary (can be destructive!)

# Restore target configurations (on the storage node)
targetctl restore
targetcli ls                    # make sure everything is back

# Kickoff the Kubernetes cluster (on every node)
systemctl start kubelet

# Monitor and restart pods if necessary
kubectl get pods
kubectl delete pod xxx
	# Shutdown the Kubernetes cluster first (on every node)
	systemctl stop kubelet

	# Stop all docker containers (on every node)
	docker stop $(docker ps -aq)

	# Unmount all ISCSI disks (on every node)
	mount \| grep iqn
	umount --all-targets /dev/sdxx # replace sdxx with each disk

	# Stop the ISCSI server (on the storage node)
	systemctl stop iscsid iscsid.socket
	systemctl stop targetd

	# Temporarily clear target configurations (on the storage node)
	targetctl clear
	targetcli ls # make sure everything is empty

	# List all logical volumes in lvm (on the storage node)
	lvs

	# Repair each logical volume /dev/xxx/pvc-xxx (on the storage node)
	mount /dev/xxx/pvc-xxx /mnt # mount first to recover metadata logs
	umount /mnt
	xfs_repair /dev/xxx/pvc-xxx # use -L if necessary (can be destructive!)

	# Restore target configurations (on the storage node)
	targetctl restore
	targetcli ls # make sure everything is back

	# Kickoff the Kubernetes cluster (on every node)
	systemctl start kubelet

	# Monitor and restart pods if necessary
	kubectl get pods
	kubectl delete pod xxx