Skip to content

Instantly share code, notes, and snippets.

@icyrizard
Created June 15, 2016 11:54
Show Gist options
  • Save icyrizard/6e413332284e61d3e5883eea5f8f200b to your computer and use it in GitHub Desktop.
Save icyrizard/6e413332284e61d3e5883eea5f8f200b to your computer and use it in GitHub Desktop.
Download images from a csv file using wget in Parallel
#!/bin/bash
COLUMN=1 # csv column to extract
RENAME=false # if we should rename the file, note that is was really specific for my problem.
THREADS=16 # threads to use by parallel
#Set Script Name variable
SCRIPT=`basename ${BASH_SOURCE[0]}`
#Set fonts for Help.
NORM=`tput sgr0`
BOLD=`tput bold`
REV=`tput smso`
# Help function
function HELP {
echo -e \\n"Help documentation for ${SCRIPT}."\\n
echo -e "Basic usage: ./$SCRIPT"\\n
echo "Command line switches are optional. The following switches are recognized."
echo "-f csv file = required should be last argument"
echo "-c column, default $COLUMN"
echo "-t threads, default $THREADS"
echo "-r renamd, should be renamed - work in progress here because this is really specific renaming"
echo -e "-h --Displays this help message. No further functions are performed."\\n
echo -e "Example: ./${BOLD}$SCRIPT -rc 2 -f file.csv"\\n
exit 1
}
#Check the number of arguments. If none are passed, print help and exit.
NUMARGS=$#
if [ $NUMARGS -eq 0 ]; then
HELP
exit 1
fi
while getopts ::c::r:h:f FLAG; do
case $FLAG in
t)
THREADS=$OPTARG
;;
c)
COLUMN=$OPTARG
;;
r)
RENAME=true
;;
h) #show help
HELP
;;
\?)
echo -e \\n"Option -${BOLD}$OPTARG${NORM} not allowed."
HELP
;;
esac
done
shift $((OPTIND-1))
FILE=$1
# shift ops, all optional args are now removed $1 will have to be the filename
if [ "$RENAME" = true ]; then
mkdir -p images && cat $FILE | tail -n +2 | cut -d ',' -f$COLUMN | grep http | sed -e 's/^[ \t\r]*//' | \
(cd images; parallel -j$THREADS -d'\r\n' --gnu 'wget {}; mv {/} `echo "{/}" | tr "." "_" | cut -d "_" -f1,3 | tr "_" "."`')
else
mkdir -p images && cat $FILE | tail -n +2 | cut -d ',' -f$COLUMN | grep http | sed -e 's/^[ \t\r]*//' | \
(cd images; parallel -j$THREADS -d'\r\n' --gnu 'wget {};')
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment