Last active
October 6, 2016 14:33
-
-
Save sambrightman/2f5e79fe5374cadaaa3fc8245ba14b61 to your computer and use it in GitHub Desktop.
Interactive recursive diff with extra hacks for some pipeline specifics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# works much better if runs have the same name on the same machine (i.e. rename first directory after run) | |
# much easier to view diffs with colordiff installed | |
# some files will differ only by dates/times/run times/references to unique job names, just gotta check 'em | |
# we ignore whitespace in contents | |
# we try to match filenames that have SGE pid/job names (but not in contents) | |
# some files will get processsed out-of-order and fail this matching | |
# maybe do something with find before diff to get a defined name-based ordering? | |
RUN_ROOT=/data2/processed | |
saved=() | |
function main() { | |
local runa=$1 && shift | |
local runb=$1 && shift | |
if [[ ! -d "${RUN_ROOT}/$runa" || ! -d "${RUN_ROOT}/$runb" ]] | |
then | |
echo supply two valid run names | |
exit 1 | |
else | |
compare "${RUN_ROOT}/$runa" "${RUN_ROOT}/$runb" | |
fi | |
} | |
function compare() { | |
local dira=$1 && shift | |
local dirb=$1 && shift | |
local diff=$(diff -r --brief "$dira" "$dirb") | |
local prev_line | |
IFS=$'\n' | |
for line in $diff | |
do | |
# echo | |
# echo LINE | |
# echo $line | |
# echo LINE | |
# echo | |
if [[ ! "$line" =~ ^Only ]] | |
then | |
if [[ "$prev_line" =~ ^Only ]] | |
then | |
# previous line was unpaired Only, check it | |
check_line "$dira" "$dirb" "$prev_line" "$prev_prev_line" || break | |
fi | |
check_line "$dira" "$dirb" "$line" "$prev_line" || break | |
elif [[ -n "$prev_line" && "$prev_line" =~ ^Only ]] | |
then | |
local filea=$(echo "$prev_line" | awk '{ print gensub("(.*):", "\\1/", "", $3) $4 }') | |
local fileb=$(echo "$line" | awk '{ print gensub("(.*):", "\\1/", "", $3) $4 }') | |
if pair "$dira" "$dirb" "$filea" "$fileb" | |
then | |
line=$(diff --brief "$filea" "$fileb") | |
if [[ -n "$line" ]] | |
then | |
prev_line=$prev_prev_line | |
check_line "$dira" "$dirb" "$line" "$prev_line" || break | |
else | |
# echo pair no differ | |
prev_line=$prev_prev_line | |
continue | |
fi | |
else | |
# echo no pair | |
# Two Only lines but they do not match, danger here! | |
check_line "$dira" "$dirb" "$prev_line" "$prev_prev_line" || break; | |
fi | |
else | |
# echo no prev or prev not Only $prev_line | |
# we have no previous line, or an Only line but previous was not, do nothing & update previous | |
: | |
fi | |
prev_prev_line=$prev_line | |
prev_line=$line | |
done | |
echo saved: | |
echo | |
for item in "${saved[@]}" | |
do | |
echo "$item" | |
done | |
} | |
function pair() { | |
# the horror... | |
local dira=$1 && shift | |
local dirb=$1 && shift | |
local filea=$1 && shift | |
local fileb=$1 && shift | |
filea=$(pretty "$dira" "$dirb" "$filea") | |
fileb=$(pretty "$dira" "$dirb" "$fileb") | |
if [[ "$(dirname "$filea")" == "$(dirname "$filea")" && "${filea%_*}" == "${fileb%_*}" ]] | |
then | |
local exta="${filea##*.}" | |
local extb="${fileb##*.}" | |
[[ "$exta" == "$extb" ]] && return 0 | |
if [[ "$(basename "$(dirname "$filea")")" == "logs" ]] | |
then | |
[[ "${exta:0:1}" == "o" && "${exta:0:1}" == "${extb:0:1}" ]] && return 0 | |
[[ "${exta:0:1}" == "e" && "${exta:0:1}" == "${extb:0:1}" ]] && return 0 | |
[[ "${exta:0:2}" == "po" && "${exta:0:2}" == "${extb:0:2}" ]] && return 0 | |
[[ "${exta:0:2}" == "pe" && "${exta:0:2}" == "${extb:0:2}" ]] && return 0 | |
fi | |
fi | |
return 1 | |
} | |
function pretty() { | |
local dira=$1 && shift | |
local dirb=$1 && shift | |
local line=$1 && shift | |
local pretty | |
pretty=${line//$dira\//} | |
pretty=${pretty//$dirb\//} | |
echo "$pretty" | |
} | |
function check_line() { | |
local dira=$1 && shift | |
local dirb=$1 && shift | |
local line=$1 && shift | |
local prev_line=$1 && shift | |
pretty "$dira" "$dirb" "$line" | |
echo | |
while true | |
do | |
local initial | |
[[ "$line" =~ differ ]] && initial="(d)iff/" | |
read -r -n1 -p "${initial}(a)dd/(i)gnore/${prev_line:+(b)ack/}(q)uit? " action | |
case $action in | |
d) | |
if [ -z "$initial" ] | |
then | |
echo | |
echo "Cannot diff one file" | |
else | |
local filea=$(echo "$line" | awk '{ print $2 }') | |
local fileb=$(echo "$line" | awk '{ print $4 }') | |
local formatter=cat | |
which colordiff >/dev/null 2>&1 && formatter=colordiff | |
diff -wu "$filea" "$fileb" | $formatter | less -FR | |
fi | |
;; | |
a) | |
saved+=( $line ) | |
break | |
;; | |
i) | |
break | |
;; | |
b) | |
if [ -z "$prev_line" ] | |
then | |
echo | |
echo "Cannot go back further" | |
else | |
echo | |
check_line "$dira" "$dirb" "$prev_line" || return 1 | |
check_line "$dira" "$dirb" "$line" "$prev_line" || return 1 | |
break | |
fi | |
;; | |
q) | |
echo | |
return 1 | |
;; | |
*) | |
echo | |
echo "Not a valid action" | |
echo | |
echo LINE IS "$line" | |
echo | |
;; | |
esac | |
done | |
echo | |
return 0 | |
} | |
main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment