Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Scrape a website with wget and get uniques emails from it or scrap websites whois to get email
#!/bin/bash
DOMAINS=$1
TMP_DIR=$2
if [[ "$DOMAINS" == "" || "$TMP_DIR" == "" ]] ; then
echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) TMP_DIR"
echo -e "Example : ./script.sh mydomains.txt /tmp"
exit 1
fi
while read domain; do
echo -e "----- Start scraping $domain - Please wait -----"
wget \
--quiet \
--recursive \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains $domain, www.$domain \
--no-parent \
--directory-prefix=$TMP_DIR \
--progress=bar \
--tries=3 \
$domain
done < "$DOMAINS"
wait
### Split scrap and grep to prepare multiprocess script (coming soon)
while read domain; do
echo -e "----- Start email finding and filtering for $domain - Please wait -----"
grep -R -E -oh "\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b" $TMP_DIR/$domain/* | sort |uniq > $TMP_DIR/$domain.emails.txt
echo -e "----- Emails saved to $TMP_DIR/$domain.emails.txt -----"
done < "$DOMAINS"
wait
echo -e "----- Finished -----"
#!/bin/bash
DOMAINS=$1
OUTPUT=$2
if [[ "$DOMAINS" == "" || "$OUTPUT" == "" ]] ; then
echo -e "Usage : script.sh DOMAINS.txt (with one domain per line) OUTPUT.txt"
echo -e "Example : ./script.sh mydomains.txt /tmp/output.txt"
exit 1
fi
mkdir -p /tmp/
touch /tmp/krusty.hack
while read domain; do
echo -e "Looking $domain"
whois $domain |grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" >> /tmp/krusty.hack
sleep 10
done < "$DOMAINS"
sort /tmp/krusty.hack |uniq > $OUTPUT
rm /tmp/krusty.hack
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment