Created
April 19, 2016 04:02
-
-
Save DocumentAlchemy/9b249ba745fc168c0e82d7045d7a83fa to your computer and use it in GitHub Desktop.
Shell script that extracts all images from PDF, PPT/X, DOC/X and XLS/X documents
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Extracts images from Microsoft Office or PDF files using the DocumentAlchemy API. | |
# USAGE: extract-images.sh <FILES> | |
# | |
# EXAMPLE: extract-images.sh MyDeck.pptx *.doc | |
# A ZIP archive containing the extracted images (if any) will be | |
# created for each document submitted. | |
# This is your DocumentAlchemy API key. It may be set via the | |
# environment variable named `DA_API_KEY`. | |
# (See <https://documentalchemy.com/> to get one of your own.) | |
API_KEY=${DA_API_KEY:-"403l1zh3dkbakyb9"} | |
# This is a flag that can prevent this script from `echo`ing | |
# unnecessary information. It can be set via the environment | |
# variable `QUIET`. A clever person could make this into a | |
# command line parameter like `-q`. | |
QUIET=${QUIET:-FALSE} | |
# Note that you can set environment variables on a per-invocation | |
# basis by prefixing NAME=VALUE before the command. For example: | |
# API_KEY=my-key QUIET=true doc2docx *.doc | |
# EXIT_CODE tracks the number of files we couldn't extract from. | |
EXIT_CODE=0 | |
# Loop over the command line parameters.... | |
for doc in "$@"; do | |
# ...testing that is is an accessible file... | |
if ! [ -s "$doc" ]; then | |
$QUIET || echo "WARNING: File '$doc' was not found and will be ignored." | |
EXIT_CODE=$((EXIT_CODE+1)) | |
else | |
# ...if so, POST to DocumentAlchemy to extract the images... | |
outfile="`dirname "$doc"`/images-from-`basename "$doc"`.zip"; | |
$QUIET || echo "Extracting images from '$doc' into '`basename "$outfile"`'..."; | |
response=$(curl --silent \ | |
--write-out %{http_code} -H "Authorization: da.key=$API_KEY" \ | |
-X POST --form "document=@$doc" \ | |
https://documentalchemy.com/api/v1/document/-/rendition/images.zip \ | |
-o "$outfile") | |
# ...and report success or failure. | |
if ! [ "$response" -eq "200" ]; then | |
$QUIET || echo "WARNING: Expected a 200 response for file '$doc', found $response instead."; | |
EXIT_CODE=$((EXIT_CODE+1)) | |
else | |
$QUIET || echo "...OK. File '$outfile' created." | |
fi | |
fi | |
done | |
# Exit with the number of documents that could not be converted. | |
exit $EXIT_CODE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment