Created
June 15, 2016 11:54
-
-
Save icyrizard/6e413332284e61d3e5883eea5f8f200b to your computer and use it in GitHub Desktop.
Download images from a csv file using wget in Parallel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
COLUMN=1 # csv column to extract | |
RENAME=false # if we should rename the file, note that is was really specific for my problem. | |
THREADS=16 # threads to use by parallel | |
#Set Script Name variable | |
SCRIPT=`basename ${BASH_SOURCE[0]}` | |
#Set fonts for Help. | |
NORM=`tput sgr0` | |
BOLD=`tput bold` | |
REV=`tput smso` | |
# Help function | |
function HELP { | |
echo -e \\n"Help documentation for ${SCRIPT}."\\n | |
echo -e "Basic usage: ./$SCRIPT"\\n | |
echo "Command line switches are optional. The following switches are recognized." | |
echo "-f csv file = required should be last argument" | |
echo "-c column, default $COLUMN" | |
echo "-t threads, default $THREADS" | |
echo "-r renamd, should be renamed - work in progress here because this is really specific renaming" | |
echo -e "-h --Displays this help message. No further functions are performed."\\n | |
echo -e "Example: ./${BOLD}$SCRIPT -rc 2 -f file.csv"\\n | |
exit 1 | |
} | |
#Check the number of arguments. If none are passed, print help and exit. | |
NUMARGS=$# | |
if [ $NUMARGS -eq 0 ]; then | |
HELP | |
exit 1 | |
fi | |
while getopts ::c::r:h:f FLAG; do | |
case $FLAG in | |
t) | |
THREADS=$OPTARG | |
;; | |
c) | |
COLUMN=$OPTARG | |
;; | |
r) | |
RENAME=true | |
;; | |
h) #show help | |
HELP | |
;; | |
\?) | |
echo -e \\n"Option -${BOLD}$OPTARG${NORM} not allowed." | |
HELP | |
;; | |
esac | |
done | |
shift $((OPTIND-1)) | |
FILE=$1 | |
# shift ops, all optional args are now removed $1 will have to be the filename | |
if [ "$RENAME" = true ]; then | |
mkdir -p images && cat $FILE | tail -n +2 | cut -d ',' -f$COLUMN | grep http | sed -e 's/^[ \t\r]*//' | \ | |
(cd images; parallel -j$THREADS -d'\r\n' --gnu 'wget {}; mv {/} `echo "{/}" | tr "." "_" | cut -d "_" -f1,3 | tr "_" "."`') | |
else | |
mkdir -p images && cat $FILE | tail -n +2 | cut -d ',' -f$COLUMN | grep http | sed -e 's/^[ \t\r]*//' | \ | |
(cd images; parallel -j$THREADS -d'\r\n' --gnu 'wget {};') | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment