Skip to content

Instantly share code, notes, and snippets.

@sambrightman
Last active October 6, 2016 14:33
Show Gist options
  • Save sambrightman/2f5e79fe5374cadaaa3fc8245ba14b61 to your computer and use it in GitHub Desktop.
Save sambrightman/2f5e79fe5374cadaaa3fc8245ba14b61 to your computer and use it in GitHub Desktop.
Interactive recursive diff with extra hacks for some pipeline specifics
#!/usr/bin/env bash
# works much better if runs have the same name on the same machine (i.e. rename first directory after run)
# much easier to view diffs with colordiff installed
# some files will differ only by dates/times/run times/references to unique job names, just gotta check 'em
# we ignore whitespace in contents
# we try to match filenames that have SGE pid/job names (but not in contents)
# some files will get processsed out-of-order and fail this matching
# maybe do something with find before diff to get a defined name-based ordering?
RUN_ROOT=/data2/processed
saved=()
function main() {
local runa=$1 && shift
local runb=$1 && shift
if [[ ! -d "${RUN_ROOT}/$runa" || ! -d "${RUN_ROOT}/$runb" ]]
then
echo supply two valid run names
exit 1
else
compare "${RUN_ROOT}/$runa" "${RUN_ROOT}/$runb"
fi
}
function compare() {
local dira=$1 && shift
local dirb=$1 && shift
local diff=$(diff -r --brief "$dira" "$dirb")
local prev_line
IFS=$'\n'
for line in $diff
do
# echo
# echo LINE
# echo $line
# echo LINE
# echo
if [[ ! "$line" =~ ^Only ]]
then
if [[ "$prev_line" =~ ^Only ]]
then
# previous line was unpaired Only, check it
check_line "$dira" "$dirb" "$prev_line" "$prev_prev_line" || break
fi
check_line "$dira" "$dirb" "$line" "$prev_line" || break
elif [[ -n "$prev_line" && "$prev_line" =~ ^Only ]]
then
local filea=$(echo "$prev_line" | awk '{ print gensub("(.*):", "\\1/", "", $3) $4 }')
local fileb=$(echo "$line" | awk '{ print gensub("(.*):", "\\1/", "", $3) $4 }')
if pair "$dira" "$dirb" "$filea" "$fileb"
then
line=$(diff --brief "$filea" "$fileb")
if [[ -n "$line" ]]
then
prev_line=$prev_prev_line
check_line "$dira" "$dirb" "$line" "$prev_line" || break
else
# echo pair no differ
prev_line=$prev_prev_line
continue
fi
else
# echo no pair
# Two Only lines but they do not match, danger here!
check_line "$dira" "$dirb" "$prev_line" "$prev_prev_line" || break;
fi
else
# echo no prev or prev not Only $prev_line
# we have no previous line, or an Only line but previous was not, do nothing & update previous
:
fi
prev_prev_line=$prev_line
prev_line=$line
done
echo saved:
echo
for item in "${saved[@]}"
do
echo "$item"
done
}
function pair() {
# the horror...
local dira=$1 && shift
local dirb=$1 && shift
local filea=$1 && shift
local fileb=$1 && shift
filea=$(pretty "$dira" "$dirb" "$filea")
fileb=$(pretty "$dira" "$dirb" "$fileb")
if [[ "$(dirname "$filea")" == "$(dirname "$filea")" && "${filea%_*}" == "${fileb%_*}" ]]
then
local exta="${filea##*.}"
local extb="${fileb##*.}"
[[ "$exta" == "$extb" ]] && return 0
if [[ "$(basename "$(dirname "$filea")")" == "logs" ]]
then
[[ "${exta:0:1}" == "o" && "${exta:0:1}" == "${extb:0:1}" ]] && return 0
[[ "${exta:0:1}" == "e" && "${exta:0:1}" == "${extb:0:1}" ]] && return 0
[[ "${exta:0:2}" == "po" && "${exta:0:2}" == "${extb:0:2}" ]] && return 0
[[ "${exta:0:2}" == "pe" && "${exta:0:2}" == "${extb:0:2}" ]] && return 0
fi
fi
return 1
}
function pretty() {
local dira=$1 && shift
local dirb=$1 && shift
local line=$1 && shift
local pretty
pretty=${line//$dira\//}
pretty=${pretty//$dirb\//}
echo "$pretty"
}
function check_line() {
local dira=$1 && shift
local dirb=$1 && shift
local line=$1 && shift
local prev_line=$1 && shift
pretty "$dira" "$dirb" "$line"
echo
while true
do
local initial
[[ "$line" =~ differ ]] && initial="(d)iff/"
read -r -n1 -p "${initial}(a)dd/(i)gnore/${prev_line:+(b)ack/}(q)uit? " action
case $action in
d)
if [ -z "$initial" ]
then
echo
echo "Cannot diff one file"
else
local filea=$(echo "$line" | awk '{ print $2 }')
local fileb=$(echo "$line" | awk '{ print $4 }')
local formatter=cat
which colordiff >/dev/null 2>&1 && formatter=colordiff
diff -wu "$filea" "$fileb" | $formatter | less -FR
fi
;;
a)
saved+=( $line )
break
;;
i)
break
;;
b)
if [ -z "$prev_line" ]
then
echo
echo "Cannot go back further"
else
echo
check_line "$dira" "$dirb" "$prev_line" || return 1
check_line "$dira" "$dirb" "$line" "$prev_line" || return 1
break
fi
;;
q)
echo
return 1
;;
*)
echo
echo "Not a valid action"
echo
echo LINE IS "$line"
echo
;;
esac
done
echo
return 0
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment