Skip to content

Instantly share code, notes, and snippets.

@dberkholz
Last active January 20, 2016 07:42
Show Gist options
  • Save dberkholz/44d42e65f7947162f894 to your computer and use it in GitHub Desktop.
Save dberkholz/44d42e65f7947162f894 to your computer and use it in GitHub Desktop.
Takes an Excel spreadsheet, fetches URLs, annotates them with spreadsheet data and combines into a single PDF
#!/bin/bash
VERSION=0.3
help() {
echo "${0##*/} v${VERSION}"
echo
echo "${0##*/} makes expenses easier by handling receipt->PDF generation."
echo
echo "It creates an annotated PDF based on an Excel spreadsheet containing"
echo "arbitrary data in one column (such as receipt numbers), URLs to PDF/JPG"
echo "files in another column, and can handle any other data in other"
echo "columns, as long as it doesn't contain commas."
echo
echo "This spreadsheet could be created through an Expensify export or"
echo "other methods."
echo
echo "Depends on ImageMagick and csvkit"
echo
echo "Usage: ${0##*/} -i INFILE -o OUTFILE -s SHEET -d DATA_COLUMN -u URL_COLUMN [-t] [-V]"
echo
echo "-i INFILE The input Excel file (.xls, .xlsx)"
echo "-o OUTFILE The output PDF file"
echo "-s SHEET The Excel sheet name (quote if contains spaces)"
echo "-d DATA_COLUMN The column number containing annotation data"
echo "-u URL_COLUMN The column number containing the URLs to fetch (jpg, pdf)"
echo "-t Save the temporary working directory"
echo "-V Show version and exit"
echo
exit 1
}
fetch_url() {
local URL=${1}
local FILE=${1##*/}
[[ -f ${FILE} ]] || curl -s -O ${URL}
echo ${FILE}
}
convert_pdf_to_jpg() {
local INFILE=${1}
local OUTFILES=( ${INFILE%.*}.jpg )
if [[ ${INFILE##*.} == pdf ]]; then
convert -density 150 ${INFILE} ${OUTFILES}
# If there's not a single one, then it's multiple pages
if [[ -f ${OUTFILES[0]} ]]; then
echo "Single-page PDF" >/dev/stderr
echo ${OUTFILES[@]}
continue
fi
echo "Multipage PDF" >/dev/stderr
OUTFILES=
local FILE
for FILE in ${INFILE%.*}-[0-9].jpg ${INFILE%.*}-[0-9][0-9].jpg; do
[[ -f ${FILE} ]] && OUTFILES+=( ${FILE} )
done
else
echo "JPG" >/dev/stderr
fi
echo ${OUTFILES[@]}
}
annotate_jpgs() {
local NUM=${1}; shift
declare -a FILES=( ${@} )
local FILE=${FILES[0]}
local ANN=${FILE%.*}.ann.jpg
# Annotate the first file only
convert -background '#ffffff' -gravity center -fill black -size 90x90 \
caption:"${NUM}" \
${FILE} +swap -gravity northeast -composite ${ANN}
FILES[0]=${ANN}
echo ${FILES[@]}
}
combine_files() {
local -a OUT=${1}; shift
convert ${@} ${OUT}
}
main() {
local URL INFILE XLS OUTPUT_NAME SAVE_TEMP NDATA=1 NURL=9
local SHEET='Expensify Report Export'
declare -a JPGS ANNS OUTFILES
[[ ${#} -eq 0 ]] && help
while getopts ti:o:s:d:u:V OPTS; do
case ${OPTS} in
i)
XLS=${OPTARG}
;;
o)
OUTPUT_NAME=${OPTARG}
;;
s)
SHEET=${OPTARG}
;;
d)
NDATA=${OPTARG}
;;
u)
NURL=${OPTARG}
;;
t)
SAVE_TEMP="true"
;;
V)
echo "${0##*/} v${VERSION}"
return 1
;;
\?)
echo "Invalid option" >/dev/stderr
return 1
;;
esac
done
TEMPDIR="tmp-${RANDOM}"
[[ ! -d ${TEMPDIR} ]] && mkdir ${TEMPDIR}
pushd ${TEMPDIR} >/dev/null
declare -a LINE
while IFS=, read -a LINE; do
NUM=${LINE[NDATA-1]}
URL=${LINE[NURL-1]}
[[ ${URL:0:4} == http ]] || continue
echo -n "Processing line ${NUM}: " >/dev/stderr
INFILE=$(fetch_url ${URL})
JPGS=( $(convert_pdf_to_jpg ${INFILE}) )
ANNS=( $(annotate_jpgs ${NUM} ${JPGS[@]}) )
OUTFILES+=( ${ANNS[@]} )
done \
< <(in2csv --sheet "${SHEET}" ../${XLS})
# Combine files into final PDF
echo "Creating final PDF ${OUTPUT_NAME}" >/dev/stderr
combine_files ../${OUTPUT_NAME} ${OUTFILES[@]}
popd >/dev/null
[[ ${SAVE_TEMP} == "true" ]] || rm -rf ${TEMPDIR}
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment