Skip to content

Instantly share code, notes, and snippets.

@nknskn
Created June 1, 2020 12:37
Show Gist options
  • Save nknskn/bdc002d7d514143cde3d74f9a0a71067 to your computer and use it in GitHub Desktop.
Save nknskn/bdc002d7d514143cde3d74f9a0a71067 to your computer and use it in GitHub Desktop.
crawl web contents and search hidden pages
#!/bin/bash
function Usage () {
echo "Usage: $(basename $0) -d <domain> -p <port> [-s]"
echo " -d: domain"
echo " -p: port"
echo " -s: ssl option"
echo
echo "Depends on : wget, python3, CeWL, cutycapt, tree, openssl"
exit 1
}
COLOR_RED="\e[31m"
COLOR_GRN="\e[32m"
COLOR_YLW="\e[33m"
COLOR_BLU="\e[34m"
COLOR_OFF="\e[m"
CURRENTDIR=$(pwd)
#---------------------------------
# check args
#---------------------------------
SSLFLAG=0
SCHEMA="http"
while getopts ":d:p:sh" OPT
do
case ${OPT} in
d) TDOMAIN=$OPTARG;;
p) TPORT=$OPTARG;;
s) SSLFLAG=1
set SCHEMA="https";;
h) Usage ;;
:) echo "[!] Undefined option."; Usage ;;
\?) echo "[!] Undefined option."; Usage ;;
esac
done
#---------------------------------
# Preparation
#---------------------------------
echo -e -n "[I] Updating CeWL..."
cd $(dirname $(locate cewl.rb))
bundle install 1>/dev/null 2>&1
echo "done."
echo -e "[I] Preparation for working directory."
mkdir -p ${CURRENTDIR}/crawled/${TDOMAIN} && cd ${CURRENTDIR}/crawled/${TDOMAIN}
echo -e " pwd: $(pwd)\n"
TARGET="${SCHEMA}://${TDOMAIN}:${TPORT}"
echo -e -n "[I] Scan target:\t${TARGET}"
if [ ${SSLFLAG} -eq 1 ]; then
echo ", SSL"
#---------------------------------
# Checking certificate
#---------------------------------
SSLSUBJECTS=$(echo | openssl s_client -connect ${TDOMAIN}:${TPORT} 2>/dev/null | openssl x509 -noout -text | grep "DNS:" | sed 's/^[ \t]*//')
echo -e "[I] Subjects:\t\t${SSLSUBJECTS}"
if [ "`echo ${SSLSUBJECTS} | grep -i ${TDOMAIN}`" ]; then
echo -e "[-] looks certificate is ok."
else
echo -e "${COLOR_RED}[+]${COLOR_OFF} Subjects is mismatching with the domain!!"
fi
else
echo
fi
echo
#---------------------------------
# Downloading contents
#---------------------------------
function downloadContents () {
echo -e -n "[I] Downloading contents..."
wget -e robots=off --strict-comments --no-check-certificate --recursive --level inf --random-wait --convert-links --no-parent --adjust-extension ${TARGET} --output-file ${TDOMAIN}.wget-log
wget -e robots=off --strict-comments --no-check-certificate --recursive --level inf --random-wait --convert-links --no-parent --adjust-extension ${TARGET}/robots.txt
echo -e "completed!"
echo
}
#---------------------------------
# Creating dictionary using cewl
#---------------------------------
function genWordlist () {
echo -e -n "[I] Generating wordlist by CeWL ... "
python3 -m http.server 1>/dev/null 2>&1 &
SERVERPID=$!
cewl -w ${TDOMAIN}.txt "http://localhost:8000/${TDOMAIN}" 1>/dev/null 2>&1
kill ${SERVERPID}
echo -e "completed!"
echo
}
#---------------------------------
# Extract urls from "href" and "src"
#---------------------------------
function searchHiddenPage () {
linklistfile=$1
echo -e "[I] Checking:\t${linklistfile}"
IGNORELIST="# about:blank"
while read line; do
PAGENAME=$(echo ${line} | rev | cut -d"/" -f1 | rev | cut -d"#" -f1)
if echo -e ${IGNORELIST} | grep -s -q "${PAGENAME}"; then
echo -e "\tSkip: ${line}"
else
tree -if | grep ${PAGENAME} -s -q
if [ $? -eq 1 ]; then
if echo ${line} | grep -s -q -E "https?://"; then
SUSPICIOUSURL="${line}"
else
SUSPICIOUSURL="${SCHEMA}://${TDOMAIN}:${TPORT}/${line}"
fi
PNGFILE="${TDOMAIN}-${TPORT}_${PAGENAME}.png"
echo -e "${COLOR_RED}[+]${COLOR_OFF} Suspicious page: " ${line}
echo -e " URL: ${SUSPICIOUSURL}"
cutycapt --url="${SUSPICIOUSURL}" --out="${PNGFILE}"
fi
fi
done < "${linklistfile}"
echo
}
function extractLinks () {
echo -e "[I] Extracting links..."
TDIR=$(ls -tF1 | grep "/" | head -n1)
FILELIST=($(ls -d1 $(find ${TDIR} -type f -not -name "*\.jpg" -not -name "*\.png" -not -name "*\.gif")))
for f in ${FILELIST[@]}
do
grep 'href="' ${f} | sed 's/.*href="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-href.tmp
grep "href='" ${f} | sed "s/.*href='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-href.tmp
grep 'src="' ${f} | sed 's/.*src="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-src.tmp
grep "src='" ${f} | sed "s/.*src='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-src.tmp
grep 'action="' ${f} | sed 's/.*action="\([^"]*\)".*$/\1/' >> ${TDOMAIN}-action.tmp
grep "action='" ${f} | sed "s/.*action='\([^']*\)'.*$/\1/" >> ${TDOMAIN}-action.tmp
done
cat ${TDOMAIN}-href.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-href.txt
cat ${TDOMAIN}-src.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-src.txt
cat ${TDOMAIN}-action.tmp | sort -u | sed '/^$/d' > ${TDOMAIN}-action.txt
rm -f ./*.tmp
echo -e "[I] Possible hidden pages searching..."
searchHiddenPage "${TDOMAIN}-href.txt"
searchHiddenPage "${TDOMAIN}-src.txt"
searchHiddenPage "${TDOMAIN}-action.txt"
if ls *.png > /dev/null ; then
echo "<HTML><BODY><BR>" > SuspiciousPagesImage.html
ls -1 *.png | awk -F : '{ print $1":\n<BR><IMG SRC=\""$1""$2"\" width=600><BR>"}' >> SuspiciousPagesImage.html
echo "</BODY></HTML>" >> SuspiciousPagesImage.html
echo -e "${COLOR_BLU}[+]${COLOR_OFF} Created html for images:\t`pwd`/SuspiciousPagesImage.html"
firefox ./SuspiciousPagesImage.html &
fi
echo -e "[I] Completed!"
echo
}
#---------------------------------
# Search version strings
#---------------------------------
# TBD
function searchSoftwareVersion () {
RULEFILEURL="https://raw.githubusercontent.com/augustd/burp-suite-software-version-checks/master/src/main/resources/burp/match-rules.tab"
RULEFILE="match_rules.tab"
if [ ! -e ${RULEFILE} ]; then
wget -e robots=off ${RULEFILEURL} -O ${RULEFILE} -q
fi
TDIR=$(ls -tF1 | grep "/" | head -n1)
FILELIST=($(ls -d1 $(find ${TDIR} -type f -not -name "*\.jpg" -not -name "*\.png" -not -name "*\.gif")))
for f in ${FILELIST[@]}
do
IFS="$(echo -e '\t')"
while read line
do
LINE=(${line})
PATTERN=${LINE[0]}
grep -E "${PATTERN}" ${f} 2>/dev/null
done < ./${RULEFILE}
unset IFS
done
}
#searchSoftwareVersion
#---------------------------------
# Search admin console/login page
#---------------------------------
#TBD
downloadContents
genWordlist
extractLinks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment