Skip to content

Instantly share code, notes, and snippets.

@olafbuitelaar
Last active January 2, 2019 20:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save olafbuitelaar/ff6fe9d4ab39696d9ad6ca689cc89986 to your computer and use it in GitHub Desktop.
Save olafbuitelaar/ff6fe9d4ab39696d9ad6ca689cc89986 to your computer and use it in GitHub Desktop.
file to clean stale file handles within a gluster volume
#!/bin/bash
# call as ./clean-stale-gluster-fh.sh --host="192.168.0.42" --volume=test --backup="/backup/gfs/x"
# saner programming env: these switches turn some bugs into errors
set -o errexit -o pipefail -o noclobber -o nounset
glusterVolume="" #the gluster volume to fix
glusterNode="" #the node name as found in gluster v info [vol]
shardFile="" #optional shard file to fix only: 94915a76-2dfb-439e-9863-a3c4020676c9.3859 otherwise the whole .shard volume will be checked
logFile="fix-gluster-fd.log"
backupDir="" #location to store the stale file handles
brickDirectory="" #directory where the brick files are stored on the local machine
debug=n #N/A
verbose=n #N/A
dryrun=n #test what would happen, don't actually do anything
force=n #don't ask for confirmation
! getopt --test > /dev/null
if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
echo "I’m sorry, `getopt --test` failed in this environment."
exit 1
fi
LONGOPTS=volume:,host:,backup:,brick:,shard:,verbose,debug,log,dryrun,force
# -use ! and PIPESTATUS to get exit code with errexit set
# -temporarily store output to be able to check for errors
# -activate quoting/enhanced mode (e.g. by writing out “--options”)
# -pass arguments only via -- "$@" to separate them correctly
! PARSED=$(getopt --options="f" --longoptions=$LONGOPTS --name "$0" -- "$@")
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
# e.g. return value is 1
# then getopt has complained about wrong arguments to stdout
echo "error in arguments"
exit 2
fi
# read getopt’s output this way to handle the quoting right:
#echo $PARSED
eval set -- "$PARSED"
# now enjoy the options in order and nicely split until we see --
while true; do
#echo "$1 "
case "$1" in
-d|--debug)
debug=y
shift
;;
-v|--verbose)
verbose=y
shift
;;
-f|--force)
force=y
shift
;;
--dryrun)
dryrun=y
shift
;;
--volume)
glusterVolume="$2"
shift 2
;;
--host)
glusterNode="$2"
shift 2
;;
--shard)
shardFile="$2"
shift 2
;;
--backup)
backupDir="$2"
shift 2
;;
--brick)
brickDirectory="$2"
shift 2
;;
--log)
logFile="$2"
shift 2
;;
--)
shift
break
;;
*)
echo "Programming error"
exit 3
;;
esac
done
echo "$glusterVolume , $glusterNode , $backupDir, $brickDirectory"
if [[ (! -z $brickDirectory && (-z $glusterVolume || -z $glusterNode)) || -z $backupDir ]];
then
echo "please provide required arguments";
exit 1
fi
if [ -z $brickDirectory ]
then
#echo "xxxx $glusterVolume $glusterNode"
#gluster v info $glusterVolume | sed -n -e 's/Brick[0-9]: $glusterNode: \(.*\)/\1/p' #| sed -n -e 's/Brick[0-9]: "$glusterNode":\(.*\)/\1/p'
brickDirectory=$(gluster v info $glusterVolume | sed -n -e "s/Brick[0-9]: ${glusterNode}:\(.*\)/\1/p" | cut -d ' ' -f 1 ) #this should fail when spaces are used in directory names!
#echo $brickDirectory
fi
#echo "exit"
#exit 0
processedFiles=0
staleFiles=0
falsePositive=0
if [ ! -d "$brickDirectory/.shard" ];
then
echo "brick directory not found"
exit 1
fi
if [[ $dryrun != "y" ]];
then
if [[ $force != "y" ]];
then
read -r -p "Are you sure? [y/N] " response
case "$response" in
[yY][eE][sS]|[yY])
;;
*)
exit 0
;;
esac
fi
if [ ! -d "$backupDir/.shard" ];
then
mkdir -p "$backupDir/.shard"
fi
if [ ! -d "$backupDir/.glusterfs" ];
then
mkdir -p "$backupDir/.glusterfs"
fi
if [ ! -e $logFile ];
then
#assume directory exists
touch $logFile
fi
echo `date` >> $logFile
fi
function checkIfStaleFile(){
# the file is stale when:
# it's size is 0kb
# has permissions ---------T (sticky bit set only)
# has the attribute trusted.glusterfs.dht.linkto set
# has a matching file in the .glusterfs directory and is hard linked
# returns "isStale .glusterfs[file]" if all of above is true
#echo $1
if [ -e $1 -a -f $1 -a ! -s $1 -a -k $1 ]; #must exists, be a file of 0kb and have the stickybit set
then
fileAttrs=$(getfattr -d -m . -e hex $1 2>/dev/null)
gfid=`expr match "$fileAttrs" '.*\(trusted.gfid=0x[0-9a-f]\{32\}\)'` #trusted.gfid=0xe86ca9bf514244c7b72e8bf671ce27bd
gfid0=${gfid:15:2}
gfid1=${gfid:17:2}
gfuuid=${gfid:15:8}"-"${gfid:23:4}"-"${gfid:27:4}"-"${gfid:31:4}"-"${gfid:35:12} #9b5ce642-2863-43a9-a39d-bdbde421ebfb
if [ ! -e "$brickDirectory/.glusterfs/$gfid0/$gfid1/$gfuuid" ];
then
echo "$1 not stale since .glusterfs/$gfid0/$gfid1/$gfuuid doesn't exist"
return 0
fi
linkto=`expr match "$fileAttrs" '.*\(trusted.glusterfs.dht.linkto=0x[0-9a-f]\+\)'`
lsout=$(stat $1)
lsAttrPerm=`expr "$lsout" : '.*\(\-\-\-\-\-\-\-\-\-T\)'`
lsAttrInod=`expr "$lsout" : '.*Inode:.\([0-9]\+\)'`
lsout2=$(stat $brickDirectory/.glusterfs/$gfid0/$gfid1/$gfuuid)
ls2AttrInod=`expr "$lsout2" : '.*Inode:.\([0-9]\+\)'`
#echo $gfid $gfid0 $gfid1 $gfuuid $linkto $lsAttrPerm $lsAttrInod $ls2AttrInod
#final validation for stale file
if [[ ! -z $linkto
&& $lsAttrPerm == "---------T"
&& $lsAttrInod == $ls2AttrInod && $1 -ef "$brickDirectory/.glusterfs/$gfid0/$gfid1/$gfuuid"
]];
then
echo "isStale .glusterfs/$gfid0/$gfid1/$gfuuid"
else
((falsePositive+=1))
echo "$1 not stale since properties don't match: $gfid $gfid0 $gfid1 $gfuuid => $linkto => $lsAttrPerm $lsAttrInod $ls2AttrInod"
fi
else
echo "$1 not stale"
fi
}
function moveStaleFile(){
echo "fix stale: $1 $2"
if [[ -e $1 && -e "$brickDirectory/$2" && -f $1 && -f "$brickDirectory/$2" && $1 -ef "$brickDirectory/$2" && ! -s $1 && -k $1 ]];
then
((staleFiles+=1))
if [[ $dryrun == "y" ]];
then
echo "would move $1 and $2"
else
echo "exec"
mv $1 "$backupDir/.shard/" 2>&1 | tee -a $logFile
mv "$brickDirectory/$2" "$backupDir/.glusterfs/" 2>&1 | tee -a $logFile
fi
fi
}
function handleShardFile(){
((processedFiles+=1))
if [ -e $1 ];
then
#staleCheck=($(checkIfStaleFile $1))
staleCheck=$(checkIfStaleFile $1)
echo $staleCheck
staleCheck=($staleCheck)
if [[ $staleCheck == *"isStale"* ]];
then
echo "remove"
moveStaleFile $1 ${staleCheck[1]}
fi
else
echo "$1 doesn't exist"
fi
}
#main imeplementation
if [ -z $shardFile ];
then
echo "loop"
while IFS= read -rd $'\0' file
do
handleShardFile "$file"
done < <( find "$brickDirectory/.shard" -perm /1000 -empty -print0 ) #
elif [ -e "$brickDirectory/.shard/$shardFile" -a -f "$brickDirectory/.shard/$shardFile" -a ! -s "$brickDirectory/.shard/$shardFile" ];
then
handleShardFile "$brickDirectory/.shard/$shardFile"
else
echo "shard file $shardFile not stale"
fi
closingLog="files: $processedFiles stales: $staleFiles falsePositive: $falsePositive"
if [[ $dryrun != "y" ]];
then
echo `date` $closingLog 2>&1 | tee -a $logFile
else
echo $closingLog
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment