Created
June 1, 2020 12:37
-
-
Save nknskn/bdc002d7d514143cde3d74f9a0a71067 to your computer and use it in GitHub Desktop.
crawl web contents and search hidden pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
function Usage () { | |
echo "Usage: $(basename $0) -d <domain> -p <port> [-s]" | |
echo " -d: domain" | |
echo " -p: port" | |
echo " -s: ssl option" | |
echo | |
echo "Depends on : wget, python3, CeWL, cutycapt, tree, openssl" | |
exit 1 | |
} | |
COLOR_RED="\e[31m" | |
COLOR_GRN="\e[32m" | |
COLOR_YLW="\e[33m" | |
COLOR_BLU="\e[34m" | |
COLOR_OFF="\e[m" | |
CURRENTDIR=$(pwd) | |
#--------------------------------- | |
# check args | |
#--------------------------------- | |
SSLFLAG=0 | |
SCHEMA="http" | |
while getopts ":d:p:sh" OPT | |
do | |
case ${OPT} in | |
d) TDOMAIN=$OPTARG;; | |
p) TPORT=$OPTARG;; | |
s) SSLFLAG=1 | |
set SCHEMA="https";; | |
h) Usage ;; | |
:) echo "[!] Undefined option."; Usage ;; | |
\?) echo "[!] Undefined option."; Usage ;; | |
esac | |
done | |
#--------------------------------- | |
# Preparation | |
#--------------------------------- | |
echo -e -n "[I] Updating CeWL..." | |
cd $(dirname $(locate cewl.rb)) | |
bundle install 1>/dev/null 2>&1 | |
echo "done." | |
echo -e "[I] Preparation for working directory." | |
mkdir -p ${CURRENTDIR}/crawled/${TDOMAIN} && cd ${CURRENTDIR}/crawled/${TDOMAIN} | |
echo -e " pwd: $(pwd)\n" | |
TARGET="${SCHEMA}://${TDOMAIN}:${TPORT}" | |
echo -e -n "[I] Scan target:\t${TARGET}" | |
if [ ${SSLFLAG} -eq 1 ]; then | |
echo ", SSL" | |
#--------------------------------- | |
# Checking certificate | |
#--------------------------------- | |
SSLSUBJECTS=$(echo | openssl s_client -connect ${TDOMAIN}:${TPORT} 2>/dev/null | openssl x509 -noout -text | grep "DNS:" | sed 's/^[ \t]*//') | |
echo -e "[I] Subjects:\t\t${SSLSUBJECTS}" | |
if [ "`echo ${SSLSUBJECTS} | grep -i ${TDOMAIN}`" ]; then | |
echo -e "[-] looks certificate is ok." | |
else | |
echo -e "${COLOR_RED}[+]${COLOR_OFF} Subjects is mismatching with the domain!!" | |
fi | |
else | |
echo | |
fi | |
echo | |
#--------------------------------- | |
# Downloading contents | |
#--------------------------------- | |
function downloadContents () { | |
echo -e -n "[I] Downloading contents..." | |
wget -e robots=off --strict-comments --no-check-certificate --recursive --level inf --random-wait --convert-links --no-parent --adjust-extension ${TARGET} --output-file ${TDOMAIN}.wget-log | |
wget -e robots=off --strict-comments --no-check-certificate --recursive --level inf --random-wait --convert-links --no-parent --adjust-extension ${TARGET}/robots.txt | |
echo -e "completed!" | |
echo | |
} | |
#--------------------------------- | |
# Creating dictionary using cewl | |
#--------------------------------- | |
function genWordlist () { | |
echo -e -n "[I] Generating wordlist by CeWL ... " | |
python3 -m http.server 1>/dev/null 2>&1 & | |
SERVERPID=$! | |
cewl -w ${TDOMAIN}.txt "http://localhost:8000/${TDOMAIN}" 1>/dev/null 2>&1 | |
kill ${SERVERPID} | |
echo -e "completed!" | |
echo | |
} | |
#--------------------------------- | |
# Extract urls from "href" and "src" | |
#--------------------------------- | |
function searchHiddenPage () { | |
linklistfile=$1 | |
echo -e "[I] Checking:\t${linklistfile}" | |
IGNORELIST="# about:blank" | |
while read line; do | |
PAGENAME=$(echo ${line} | rev | cut -d"/" -f1 | rev | cut -d"#" -f1) | |
if echo -e ${IGNORELIST} | grep -s -q "${PAGENAME}"; then | |
echo -e "\tSkip: ${line}" | |
else | |
tree -if | grep ${PAGENAME} -s -q | |
if [ $? -eq 1 ]; then | |
if echo ${line} | grep -s -q -E "https?://"; then | |
SUSPICIOUSURL="${line}" | |
else | |
SUSPICIOUSURL="${SCHEMA}://${TDOMAIN}:${TPORT}/${line}" | |
fi | |
PNGFILE="${TDOMAIN}-${TPORT}_${PAGENAME}.png" | |
echo -e "${COLOR_RED}[+]${COLOR_OFF} Suspicious page: " ${line} | |
echo -e " URL: ${SUSPICIOUSURL}" | |
cutycapt --url="${SUSPICIOUSURL}" --out="${PNGFILE}" | |
fi | |
fi | |
done < "${linklistfile}" | |
echo | |
} | |
function extractLinks () { | |
echo -e "[I] Extracting links..." | |
TDIR=$(ls -tF1 | grep "/" | head -n1) | |
FILELIST=($(ls -d1 $(find ${TDIR} -type f -not -name "*\.jpg" -not -name "*\.png" -not -name "*\.gif"))) | |
for f in ${FILELIST[@]} | |
do | |
grep 'href="' ${f} | sed 's/.*href="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-href.tmp | |
grep "href='" ${f} | sed "s/.*href='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-href.tmp | |
grep 'src="' ${f} | sed 's/.*src="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-src.tmp | |
grep "src='" ${f} | sed "s/.*src='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-src.tmp | |
grep 'action="' ${f} | sed 's/.*action="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-action.tmp | |
grep "action='" ${f} | sed "s/.*action='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-action.tmp | |
done | |
cat ${TDOMAIN}-href.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-href.txt | |
cat ${TDOMAIN}-src.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-src.txt | |
cat ${TDOMAIN}-action.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-action.txt | |
rm -f ./*.tmp | |
echo -e "[I] Possible hidden pages searching..." | |
searchHiddenPage "${TDOMAIN}-href.txt" | |
searchHiddenPage "${TDOMAIN}-src.txt" | |
searchHiddenPage "${TDOMAIN}-action.txt" | |
if ls *.png > /dev/null ; then | |
echo "<HTML><BODY><BR>" > SuspiciousPagesImage.html | |
ls -1 *.png | awk -F : '{ print $1":\n<BR><IMG SRC=\""$1""$2"\" width=600><BR>"}' >> SuspiciousPagesImage.html | |
echo "</BODY></HTML>" >> SuspiciousPagesImage.html | |
echo -e "${COLOR_BLU}[+]${COLOR_OFF} Created html for images:\t`pwd`/SuspiciousPagesImage.html" | |
firefox ./SuspiciousPagesImage.html & | |
fi | |
echo -e "[I] Completed!" | |
echo | |
} | |
#--------------------------------- | |
# Search version strings | |
#--------------------------------- | |
# TBD | |
function searchSoftwareVersion () { | |
RULEFILEURL="https://raw.githubusercontent.com/augustd/burp-suite-software-version-checks/master/src/main/resources/burp/match-rules.tab" | |
RULEFILE="match_rules.tab" | |
if [ ! -e ${RULEFILE} ]; then | |
wget -e robots=off ${RULEFILEURL} -O ${RULEFILE} -q | |
fi | |
TDIR=$(ls -tF1 | grep "/" | head -n1) | |
FILELIST=($(ls -d1 $(find ${TDIR} -type f -not -name "*\.jpg" -not -name "*\.png" -not -name "*\.gif"))) | |
for f in ${FILELIST[@]} | |
do | |
IFS="$(echo -e '\t')" | |
while read line | |
do | |
LINE=(${line}) | |
PATTERN=${LINE[0]} | |
grep -E "${PATTERN}" ${f} 2>/dev/null | |
done < ./${RULEFILE} | |
unset IFS | |
done | |
} | |
#searchSoftwareVersion | |
#--------------------------------- | |
# Search admin console/login page | |
#--------------------------------- | |
#TBD | |
downloadContents | |
genWordlist | |
extractLinks |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment