Skip to content

Instantly share code, notes, and snippets.

@jn0
Created February 20, 2017 15:41
Show Gist options
  • Save jn0/e17b4aa728532976e9dd9cc4aacf8acf to your computer and use it in GitHub Desktop.
Save jn0/e17b4aa728532976e9dd9cc4aacf8acf to your computer and use it in GitHub Desktop.
#!/bin/bash
# a tool to extract data from those MS Winsoze "web-archives" arrived as a ".DOC" file...
# run as ```splitmime.sh < /tmp/where-it-is.doc``` and check for "./C_/" directory.
div=''
typeset -i nl=0
typeset -i ol=0
file_header='yes'
header='no'
file=''
cte=''
qpd() {
perl -MMIME::QuotedPrint -pe '$_=MIME::QuotedPrint::decode($_);'
}
decode() {
local cte="$1"
local name="$2"
# echo "#####: ($cte) [$name]">/dev/tty
case "$cte" in
base64) base64 -d < "${name}.xxx" > "${name}" && rm "${name}.xxx";;
quoted-printable) qpd < "${name}.xxx" > "${name}" && rm "${name}.xxx";;
*) echo "Unsupported '$cte' for '$name'.">/dev/tty; return 1;;
esac
}
while read line; do
let nl+=1
line=$(echo /$line | tr -d '\r' | cut -d/ -f2-)
# echo "$nl [$line]" > /dev/tty
if (( nl == 1 )); then
[[ "/$line" =~ '/MIME-Version:' ]] || { echo "Not a MIME file."; exit 1; }
echo "$nl: $line" > /dev/tty
continue
fi
if [ -z "$div" ]; then
if [[ "/$line" =~ '/Content-Type:' ]]; then
# Content-Type: multipart/related; boundary="----=_NextPart_01D28B89"
line=$(echo "$line" | tr -s '[ \t]' '_')
[[ "$line" =~ 'Content-Type:_multipart/related;_boundary=' ]] \
|| { echo "No boundary."; exit 1; }
div="--$(echo "$line" | cut -d\" -f2)"
echo "$nl: Divider=[$div]">/dev/tty
fi
continue
fi
if [ "/$line" = "/${div}--" ]; then
echo "$nl: Last divider ($file:$ol)." >/dev/tty
file_header='no'
if [ -n "$cte" ] && [ -n "$file" ]; then
decode "$cte" "$file";
fi
file=''; ol=0
break
elif [ "/$line" = "/${div}" ]; then
echo "$nl: Divider ($file:$ol)." >/dev/tty
file_header='no'
if [ -n "$file" -a -f "${file}.xxx" ]; then
decode "$cte" "$file"
fi
header='yes'
file=''; ol=0
continue
fi
[ "$file_header" = 'yes' ] && continue
if [ "$header" = 'yes' ]; then
# echo "$nl: [$line]">/dev/tty
if [ -z "$line" ]; then
# echo "$nl: header ends.">/dev/tty
header='no'
elif [[ "/$line" =~ '/Content-Location:' ]]; then
file=$(echo "/$line" | tr -d '[ \t]' | cut -d: -f2-)
if [[ "$file" =~ 'file:///' ]]; then
file="./$(echo "${file:8}" | tr ':' '_')"
dirn=$(dirname "$file")
flnm=$(basename "$file")
mkdir -p "$dirn" || { echo "Cannot mkdir '$dirn'."; exit 1; }
echo "$nl: Will save '$flnm' to '$dirn/'">/dev/tty
touch "${file}.xxx" || { echo "Cannot touch '${file}.xxx'."; exit 1; }
else
echo "$nl: Cannot save to '$file'">/dev/tty
file=''; ol=0;
break
fi
elif [[ "/$line" =~ 'Content-Transfer-Encoding:' ]]; then
cte=$(echo "/$line" | tr -d '[ \t]' | cut -d: -f2)
echo "$nl: Content-Transfer-Encoding: [$cte]">/dev/tty
fi
continue
fi
if [ -n "$file" ]; then
echo "$line" >> "${file}.xxx"
let ol+=1
fi
done
# EOF #
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment