Last active
August 17, 2018 14:44
-
-
Save helix84/65291d4df1b144acb2ccc3d67b6a2a24 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# colorize DSpace CSV import output (diff) using ANSI sequences for viewing in the terminal | |
# colorize added and removed values (green and grey) | |
# mark errorneous or suspicious values or characters (red background) | |
# | |
# Usage: | |
# /dspace/bin/dspace metadata-import -e dspaceadmin@example.com -f import.csv | tee ~/changes/2016-09-06.txt | ~/bin/colorize-import.sh | less -R | |
# or | |
# ~/bin/colorize-import.sh ~/changes/2016-09-06.txt | |
# Development note: | |
# sed '' | sed '' | sed '' | |
# is (about 5x) faster than | |
# sed -e '' -e '' -e '' | |
ARROW="$(tput setab 1)-->$(tput setab 9) " | |
while read line | |
do | |
echo "$line" \ | |
| sed -e "s,^\(+ Add.*\)\\(|\),\1$(tput setab 1)\2$(tput setab 9), # mark pipe before value" \ | |
| sed -e "s,^\(+ Add.*\)\(: \)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark space before value" \ | |
| sed -e "s,^\(+ Add.*\)\(: \)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark \xa0 before value" \ | |
| sed -e "s,^\(+ Add.*\)\( \)$,\1$(tput setab 1)\2$(tput setab 9), # mark space after value" \ | |
| sed -e "s,^\(+ Add.*\)\( \)$,\1$(tput setab 1)\2$(tput setab 9), # mark \xa0 after value" \ | |
| sed -e "s,^\(+ Add.*\)\(: .*\)\( \),\1\2$(tput setab 1)\3$(tput setab 9), # mark double space in value" \ | |
| sed -e "s,^\(+ Add.*\)\(dc\.title\[.*\)\(\[.*\]\),\1\2$(tput setab 1)\3$(tput setab 9), # mark brackets in title (usually title in another language)" \ | |
| sed -e "s,^\(+ Add.*\)\(utb\.identifier\.wok): \)\([^0]\),\1\2$(tput setab 1)\3$(tput setab 9), # WoS identifiers nowadays start with 000" \ | |
| sed -e "s,^+ Add.*,$(tput setaf 2)&$(tput sgr0), # color added values green" \ | |
| sed -e "s,^- Remove.*,$(tput dim)&$(tput sgr0), # color removed values grey" \ | |
| sed -e "s,^\(.*\)\(ˇ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C7 caron accent sign" \ | |
| sed -e "s,^\(.*\)\(˝\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02DD double acute accent sign" \ | |
| sed -e "s,^\(.*\)\(˚\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02DA ring accent sign" \ | |
| sed -e "s,^\(.*\)\(ı\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0131 Latin Small Letter Dotless I in place of í" \ | |
| sed -e "s,^\(.*\)\( ́\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0301 combining acute sign" \ | |
| sed -e "s,^\(.*\)\(´\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+00B4 acute sign" \ | |
| sed -e "s,^\(.*\)\(ˇ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C7 caron sign" \ | |
| sed -e "s,^\(.*\)\(˘\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02D8 breve sign" \ | |
| sed -e "s,^\(.*\)\( ̈\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+0308 combining diaeresis" \ | |
| sed -e "s,^\(.*\)\(¨\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+00A8 diaeresis" \ | |
| sed -e "s,^\(.*\)\(ˆ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark standalone U+02C6 circumflex accent sign" \ | |
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E09D in place of ft ligature " \ | |
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E104 in place of fl ligature " \ | |
| sed -e "s,^\(.*\)\(\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+E103 in place of fi ligature " \ | |
| sed -e "s,^\(.*\)\(Ā\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0100 Ā in place of “ U+201C Left Double Quotation Mark " \ | |
| sed -e "s,^\(.*\)\(Ĝ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+011C Ĝ in place of ř " \ | |
| sed -e "s,^\(.*\)\(Ĥ\)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0124 Ĥ in place of ů " \ | |
| sed -e "s,^\(.*\)\( \)\(.*\)$,$ARROW\1$(tput setab 1)\2$(tput setab 9)\3, # mark U+0BC5 in place of – U+2013 En Dash " \ | |
#done < /dev/stdin | |
done < "${1:-/dev/stdin}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment