Skip to content

Instantly share code, notes, and snippets.

@djmcaleese
Created June 10, 2021 15:16
Show Gist options
  • Save djmcaleese/683bf482df38070a5f99f94c6c19de9c to your computer and use it in GitHub Desktop.
Save djmcaleese/683bf482df38070a5f99f94c6c19de9c to your computer and use it in GitHub Desktop.
Requires access to kubectl with the current context set to a functioning Kubernetes cluster. Output will be stored into directory "/tmp/kollector/". A tar file will be generated in the format of kollector-<cluster-name>-<date-and-time>.tgz
#!/bin/bash
##################################
# Tanzu Migrator Kollector
# This tool collects information from a Kubernetes cluster in preparation for a migration to Tanzu Kubernetes Grid cluster.
##################################
# Copyright 2021 VMware - Technical Preview License
# https://flings.vmware.com/sample-data-platform-on-vmware-cloud-foundation-with-vmware-tanzu-for-kubernetes-provisioning/license
##################################
# Warranty Disclaimer:
# It is understood that the technology preview software, open source software,
# documentation, and any updates may contain errors and are provided for limited evaluation only.
##################################
# Limitation of Liability:
# It is understood that the technology preview software is provided without charge for limited evaluation purposes.
# According, there is no liability of VMware and its licensors arising out of or related to this agreement.
##################################
# Security Disclaimer:
# This tool does NOT explicitly collect any data that is sensitive or it might be considered sensitive,
# such as kubeconfig, Kubernetes secrets, SA tokens, and configmaps.
# It is understood that certain K8s objects might embed sensitive information inside them without the knowledge of this tool.
##################################
#set -e
# debug only
#set -x
VERSION="Tanzu Migrator Kollector v1.0.0"
CURDIR=`pwd`
MYDIR=/tmp
TODAY=`date +%Y-%m-%d`
NOW=`date +%Y-%m-%d-T%H%M%S`
STARTTIME=$NOW
printf "INFO: $VERSION script starting to collect data at $NOW...\n"
#Setup and error checking
# we need a kubectl
which kubectl &>/dev/null
if (( $? )) ; then
printf "ERROR: kubectl binary not found. Script exited!\n"
exit 1
fi
# check connectivity with cluster API
kubectl cluster-info &>/dev/null
if (( $? )) ; then
printf "ERROR: Make sure your cluster is up or you are logged in. Script exited!\n"
exit 1
else
printf "INFO: K8s API is accessible ...\n"
fi
# API URL
MYHOST=`kubectl cluster-info 2>/dev/null |grep "Kubernetes master" | awk '{print $6}'|tr -d '[:space:]'`
KCONTEXT=`kubectl config current-context 2>/dev/null | awk '{print $1}'`
# OpenShift Cluster name has ":" on its name, parsing out
# using CLUSTER column from get-contexts
CLUSTERNAME=`kubectl config get-contexts 2>/dev/null | grep $KCONTEXT | awk '{print $3}' | awk -F\: '{print $1}'`
# sometimes context is different from cluster name - example is OpenShift
printf "INFO: Current cluster name is: ${CLUSTERNAME}\n"
printf "INFO: Current kubectl context is: ${KCONTEXT}\n"
if [ ${CLUSTERNAME} = "" ]; then
printf "ERROR: Cannot parse the cluster name from kubectl context. Exiting."
exit 1
fi
KOLLECTORDIR=${MYDIR}/kollector/${CLUSTERNAME}/${STARTTIME}
printf "INFO: Creating temporary directory at ${KOLLECTORDIR}\n"
mkdir -p ${KOLLECTORDIR} 2>/dev/null
if [ ! -d ${KOLLECTORDIR} ]; then
printf "ERROR: Directory $KOLLECTORDIR cannot be created. Script exited!\n"
exit 1
fi
cd ${KOLLECTORDIR}
LOGSTATIC=${KOLLECTORDIR}/${CLUSTERNAME}"-cluster-summary".txt
ERRORS=${KOLLECTORDIR}/${CLUSTERNAME}"-errors".txt
printf "INFO: Start collecting cluster summary information\n"
printf "INFO: Start collecting cluster summary information\n" >> ${LOGSTATIC}
runningPODs=`kubectl get pods -A --field-selector=status.phase=Running 2>/dev/null | grep -v NAME| wc -l|tr -d '[:space:]'`
numNameSpaces=`kubectl get namespaces -A 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numPV=`kubectl get pv 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numPVC=`kubectl get pvc -A 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numSC=`kubectl get sc -A 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numCRDs=`kubectl get crds -A 2>/dev/null| grep -v NAME|wc -l|tr -d '[:space:]'`
numNodes=`kubectl get nodes 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numMasters=`kubectl get node --selector='node-role.kubernetes.io/master' 2>/dev/null | grep -v NAME | wc -l|tr -d '[:space:]'`
numWorkers=`kubectl get node --selector='!node-role.kubernetes.io/master' 2>/dev/null| grep -v NAME | wc -l|tr -d '[:space:]'`
osImage=`kubectl get nodes -o wide 2>/dev/null| grep -v NAME| awk '{print $8, $9, $10}'| sort -u|tr -d '[:space:]'`
k8sVer=`kubectl get nodes -o wide 2>/dev/null | grep -v NAME| awk '{print $5}' | sort -u|tr -d '[:space:]'`
declare storageProvisoner
storageProvisoner=`kubectl describe sc -A 2>/dev/null| grep Provisioner | awk '{print $2}'|tr '\n' ','`
numSVCs=`kubectl get services -A 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numDeploy=`kubectl get deployments -A 3>/dev/null | grep -v NAME| wc -l|tr -d '[:space:]'`
numRoles=`kubectl get clusterroles -A 2>/dev/null| grep -v NAME| wc -l|tr -d '[:space:]'`
numPSPs=`kubectl get psp -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numDCs=`kubectl get deploymentconfigs -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numSCCs=`kubectl get scc -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numRoutes=`kubectl get routes -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numImageStreams=`kubectl get imagestreams -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numImages=`kubectl get images -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numImageTags=`kubectl get imagetags -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
numCSIDrivers=`kubectl get csidrivers -A 2>/dev/null|grep -v NAME|wc -l|tr -d '[:space:]'`
printf "\n"
printf "\n" >> ${LOGSTATIC}
printf "\tCluster Name\t:\t${CLUSTERNAME}\n\tK8s Version\t:\t${k8sVer}\n\tOS-Image\t:\t${osImage}\n"
printf "\tCluster Name\t:\t${CLUSTERNAME}\n\tK8s API\t\t:\t${MYHOST}\n\tK8s Version\t:\t${k8sVer}\n\tOS-Image:\t${osImage}\n" >> ${LOGSTATIC}
printf "\tNode Info\t:\tCluster Nodes ${numNodes}; MasterNodes ${numMasters}; WorkerNodes ${numWorkers};\n"
printf "\tNode Info\t:\tCluster Nodes ${numNodes}; MasterNodes ${numMasters}; WorkerNodes ${numWorkers};\n" >> ${LOGSTATIC}
printf "\tWorkload\t:\tNamespaces ${numNameSpaces}; Pods(Running) ${runningPODs};\n"
printf "\tWorkload\t:\tNamespaces ${numNameSpaces}; Pods(Running) ${runningPODs};\n" >> ${LOGSTATIC}
printf "\tStorage\t\t:\tPersistentVolumes ${numPV}; PersistentVolumeClaims ${numPVC}; StorageClasses ${numSC};\n"
printf "\tStorage\t\t:\tPersistentVolumes ${numPV}; PersistentVolumeClaims ${numPVC}; StorageClasses ${numSC};\n" >> ${LOGSTATIC}
printf "\tStorage\t\t:\tStorage Provisioners: $storageProvisoner\n"
printf "\tStorage\t\t:\tStorage Provisioners: $storageProvisoner\n" >> ${LOGSTATIC}
printf "\tStorage\t\t:\tCSI Drivers: $numCSIDrivers\n"
printf "\tStorage\t\t:\tCSI Drivers: $numCSIDrivers\n" >> ${LOGSTATIC}
printf "\tMiscInfo\t:\tCRDs ${numCRDs}; Services ${numSVCs}; Deployments ${numDeploy}; ClusterRoles ${numRoles}; PSPs ${numPSPs};\n"
printf "\tMiscInfo\t:\tCRDs ${numCRDs}; Services ${numSVCs}; Deployments ${numDeploy}; ClusterRoles ${numRoles}; PSPs ${numPSPs};\n" >> ${LOGSTATIC}
printf "\tOcpInfo\t\t:\tDeploymentConfigs ${numDCs}; Routes ${numRoutes}; SCCs ${numSCCs}; ImageStreams ${numImageStreams}; Images ${numImages}; Imagetags ${numImageTags}\n\n"
printf "\tOcpInfo\t\t:\tDeploymentConfigs ${numDCs}; Routes ${numRoutes}; SCCs ${numSCCs}; ImageStreams ${numImageStreams}; Images ${numImages}; Imagetags ${numImageTags}\n\n" >> ${LOGSTATIC}
printf "INFO: end collecting cluster summary information\n" >> ${LOGSTATIC}
printf "INFO: end collecting cluster summary information, starting the detailed kollection...\n"
#Optional Pause For debuging
#read -p "INFO: Press [Enter] key to start collection..."
#API Stuff
mkdir -p ${KOLLECTORDIR}/cluster-apis/
printf "INFO: start running command kubectl get --raw /apis for cluster $CLUSTERNAME at $NOW ...\n"
kubectl get --raw /apis -A >> ${KOLLECTORDIR}/cluster-apis/${CLUSTERNAME}-cluster-apis.json
NOW=`date +%Y-%m-%d-T%H%M%S`
#full cluster dump
printf "INFO: start running command kubectl cluster-info dump for cluster $CLUSTERNAME at $NOW ...\n"
kubectl cluster-info dump >> ${KOLLECTORDIR}/${CLUSTERNAME}-cluster-dump.json
NOW=`date +%Y-%m-%d-T%H%M%S`
# getting high-level information (not in json) of the cluster and sending to the log
printf "INFO: start running command kubectl get nodes -o wide for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get nodes -o wide for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get nodes -o wide -A >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get nodes -o wide for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get namespaces for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get namespaces for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get namespaces -A >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get namespaces for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get pods for cluster $CLUSTERNAME on $TODAY at $NOW to file ...\n"
printf "INFO: start running command kubectl get pods for cluster $CLUSTERNAME on $TODAY at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get pods -A --field-selector=status.phase=Running 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get pods for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get crds for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get crds for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get crds -A 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get crds for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get pv --sort-by=.spec.capacity.storage - for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get pv --sort-by=.spec.capacity.storage - for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get pv -A --sort-by=.spec.capacity.storage 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get persistent volumes for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get sc for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get sc for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get sc 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get sc for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get csidrivers for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get csidrivers for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get csidrivers 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get csidrivers for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
printf "INFO: start running command kubectl get psp for cluster $CLUSTERNAME at $NOW to file ...\n"
printf "INFO: start running command kubectl get psp for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
kubectl get psp 2>/dev/null >> ${LOGSTATIC}
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: end running command kubectl get psp for cluster $CLUSTERNAME at $NOW to file ...\n" >> ${LOGSTATIC}
#Check if Velero is present on the cluster
printf "INFO: Checking is Velero exists on cluster $CLUSTERNAME...\n"
printf "INFO: Checking is Velero exists on cluster $CLUSTERNAME...\n" >> ${LOGSTATIC}
if oc api-resources | grep -q velero; then # -q stands for quiet in grep. It returns only status code
printf "INFO: Velero exists on cluster $CLUSTERNAME...\n"
printf "INFO: Velero exists on cluster $CLUSTERNAME...\n" >> ${LOGSTATIC}
else
printf "INFO: Velero does not exist on cluster $CLUSTERNAME...\n"
printf "INFO: Velero does not exist on cluster $CLUSTERNAME...\n" >> ${LOGSTATIC}
fi
printf "Getting namespaced api resources...\n"
namespaced_resources_array=( $(kubectl api-resources --namespaced=true -oname) )
printf "Getting cluster api resources...\n"
cluster_resources_array=( $(kubectl api-resources --namespaced=false -oname) )
skip_resources=(configmaps secrets events events.events.k8s.io)
printf "INFO: Collecting cluster scoped resources\n"
for resource in "${namespaced_resources_array[@]}"; do
if [[ "${skip_resources[@]}" =~ $resource ]]; then
printf "INFO: Resource: $resource in skip list, skipping...\n"
continue
fi
printf "INFO: Current resource: $resource \n"
cluster_resources_json=$(kubectl get $resource -A -ojson 2>/dev/null)
jq -c '.items[] | .' <<< "$cluster_resources_json" | while IFS= read -r data; do
#Prettify data
data=$(jq '.' <<< $data)
namespace=$(jq -r '.metadata.namespace' <<< "$data")
resource_name=$(jq -r '.metadata.name' <<< "$data")
resource_kind=$(jq -r '.kind' <<< "$data" | tr '[:upper:]' '[:lower:]')
mkdir -p ${KOLLECTORDIR}/resources/$resource/namespaces/$namespace/
printf "%s" "$data" > ${KOLLECTORDIR}/resources/$resource/namespaces/$namespace/$resource_name.json
done
done
printf "INFO: Finished collecting namespaced resources\n"
printf "INFO: Collecting cluster scoped resources\n"
for resource in "${cluster_resources_array[@]}"; do
if [[ "${skip_resources[@]}" =~ $resource ]]; then
printf "INFO: Resource: $resource in skip list, skipping...\n"
continue
fi
printf "INFO: Current resource: $resource \n"
mkdir -p ${KOLLECTORDIR}/resources/$resource/cluster/
cluster_resources_json=$(kubectl get $resource -ojson 2>/dev/null)
jq -c '.items[] | .' <<< "$cluster_resources_json" | while IFS= read -r data; do
resource_name=$(jq -r '.metadata.name' <<< "$data")
resource_kind=$(jq -r '.kind' <<< "$data" | tr '[:upper:]' '[:lower:]')
data=$(jq '.' <<< $data)
# mkdir -p ${KOLLECTORDIR}/cluster-objects/$resource_kind/
printf "%s" "$data" > ${KOLLECTORDIR}/resources/$resource/cluster/$resource_name.json
done
done
printf "INFO: Finished collecting cluster scoped resources\n"
# collecting and display message to cleanup
printf "INFO: start command tar cvzf ${CURDIR}/${CLUSTERNAME}-${STARTTIME}.tgz ${KOLLECTORDIR}/\n"
tar czf ${CURDIR}/kollector-${CLUSTERNAME}-${STARTTIME}.tgz ${KOLLECTORDIR}/ &>/dev/null || printf "ERROR: Failed tar ${KOLLECTORDIR}\n"
NOW=`date +%Y-%m-%d-T%H%M%S`
printf "INFO: $VERSION script ending data collection at $NOW...\n"
printf "#################################\n"
printf "Please:\n\t1. Send tar file for review: ${CURDIR}/kollector-${CLUSTERNAME}-${STARTTIME}.tgz\n"
printf "\t2. Remove manually the directory ${KOLLECTORDIR} if you wish.\n"
printf "#################################\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment