Skip to content

Instantly share code, notes, and snippets.

@helix84
Last active August 17, 2018 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save helix84/65291d4df1b144acb2ccc3d67b6a2a24 to your computer and use it in GitHub Desktop.
Save helix84/65291d4df1b144acb2ccc3d67b6a2a24 to your computer and use it in GitHub Desktop.
#!/bin/sh
#
# colorize DSpace CSV import output (diff) using ANSI sequences for viewing in the terminal
# colorize added and removed values (green and grey)
# mark errorneous or suspicious values or characters (red background)
#
# Usage:
# /dspace/bin/dspace metadata-import -e dspaceadmin@example.com -f import.csv | tee ~/changes/2016-09-06.txt | ~/bin/colorize-import.sh | less -R
# or
# ~/bin/colorize-import.sh ~/changes/2016-09-06.txt
# Development note:
# sed '' | sed '' | sed ''
# is (about 5x) faster than
# sed -e '' -e '' -e ''
ARROW="$(tput setab 1)-->$(tput setab 9) "
while read line
do
echo "$line" \
| sed -e "s,^\(+ Add.*\)\\(|\),\1$(tput setab 1)\2$(tput setab 9), # mark pipe before value" \
| sed -e "s,^\(+ Add.*\)\(: \)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark space before value" \
| sed -e "s,^\(+ Add.*\)\(: \)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark \xa0 before value" \
| sed -e "s,^\(+ Add.*\)\( \)$,\1$(tput setab 1)\2$(tput setab 9), # mark space after value" \
| sed -e "s,^\(+ Add.*\)\( \)$,\1$(tput setab 1)\2$(tput setab 9), # mark \xa0 after value" \
| sed -e "s,^\(+ Add.*\)\(: .*\)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark double space in value" \
| sed -e "s,^\(+ Add.*\)\(dc\.title\[.*\)\(\[.*\]\),\1\2$(tput setab 1)\3$(tput setab 9), # mark brackets in title (usually title in another language)" \
| sed -e "s,^\(+ Add.*\)\(utb\.identifier\.wok): \)\([^0]\),\1\2$(tput setab 1)\3$(tput setab 9), # WoS identifiers nowadays start with 000" \
| sed -e "s,^+ Add.*,$(tput setaf 2)&$(tput sgr0), # color added values green" \
| sed -e "s,^- Remove.*,$(tput dim)&$(tput sgr0), # color removed values grey" \
| sed -e "s,^\(.*\)\(ˇ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C7 caron accent sign" \
| sed -e "s,^\(.*\)\(˝\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02DD double acute accent sign" \
| sed -e "s,^\(.*\)\(˚\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02DA ring accent sign" \
| sed -e "s,^\(.*\)\(ı\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0131 Latin Small Letter Dotless I in place of í" \
| sed -e "s,^\(.*\)\( ́\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0301 combining acute sign" \
| sed -e "s,^\(.*\)\(´\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+00B4 acute sign" \
| sed -e "s,^\(.*\)\(ˇ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C7 caron sign" \
| sed -e "s,^\(.*\)\(˘\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02D8 breve sign" \
| sed -e "s,^\(.*\)\( ̈\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0308 combining diaeresis" \
| sed -e "s,^\(.*\)\(¨\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+00A8 diaeresis" \
| sed -e "s,^\(.*\)\(ˆ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C6 circumflex accent sign" \
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E09D in place of ft ligature " \
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E104 in place of fl ligature " \
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E103 in place of fi ligature " \
| sed -e "s,^\(.*\)\(Ā\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0100 Ā in place of “ U+201C Left Double Quotation Mark " \
| sed -e "s,^\(.*\)\(Ĝ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+011C Ĝ in place of ř " \
| sed -e "s,^\(.*\)\(Ĥ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0124 Ĥ in place of ů " \
| sed -e "s,^\(.*\)\(௅ \)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0BC5 ௅ in place of – U+2013 En Dash " \
#done < /dev/stdin
done < "${1:-/dev/stdin}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment