Created
October 7, 2010 16:39
-
-
Save astanin/615423 to your computer and use it in GitHub Desktop.
Grep wrapper to search in PDF files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# grep wrapper to look into PDF files | |
# supports -i, -c, -v options | |
# depends on pdftotext | |
# known bugs: | |
# FIXED 1. does not suppot spaces in the pattern | |
# 2. does not support grep -r | |
# WONTFIX 3. does not support grep -f (it is used to keep pattern) | |
function usage() { | |
echo `basename $0`' <grep options> <pattern> <list of pdf files>' | |
} | |
if [ $# -lt 2 ] ; then | |
usage | |
exit -1 | |
fi | |
grepoptions='' | |
patternfile="/tmp/pdfgrep-$$" | |
while [ $# -gt 1 ] ; do | |
a="$1" | |
if [ ${a:0:1} == '-' ] ; then | |
grepoptions="$grepoptions $a" | |
shift | |
continue | |
else # first non-option argument | |
echo "$a" > "$patternfile" | |
shift | |
break | |
fi | |
done | |
# --color=always is recommended as default | |
command="egrep $grepoptions -f $patternfile" | |
N=$# | |
for f in "$@" ; do | |
# ignore non-PDF files | |
if (file "$f"|grep PDF>/dev/null) ; then | |
if [ $N -gt 1 ] ; then | |
( pdftotext "$f" - | $command ) | \ | |
sed "s/^/$f:/" | |
else | |
pdftotext "$f" - | $command | |
fi | |
fi | |
done | |
rm "$patternfile" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment