Last active Oct 21, 2019
Finding WordPress in Alexa top 1 million sites, see
while IFS=',' read -r POS HOSTNAME; do
if cat checked.csv | grep -qxF "$HOSTNAME"; then
echo "Skipping $HOSTNAME, already checked."
# Look for `/wp-content/` in the HTML output
# ISWPORGCONTENT=$(curl -s -L -m 5 $HOSTNAME 2>&1 | tee "html/$HOSTNAME.txt" | grep "/wp-content/")
# Check the login cookie, see
ISWPCOOKIE=$(curl -s -L -m 5 --head $HOSTNAME/wp-login.php 2>&1 | grep "=WP+Cookie+check;")
# Look for readme.html, skipping because we check for the cookie already
# ISWPORG=$(curl -s -L -m 5 $HOSTNAME/readme.html 2>&1 | grep "")
# Look for sites, no need again because of the cookie check
# ISWPCOM=$(curl -s -L -m 5 --head $HOSTNAME 2>&1 | grep "visit")
if [[ $ISWPCOOKIE ]]; then
echo "$POS - $HOSTNAME is WP"
echo "$POS,$HOSTNAME" >> topwp.csv
echo "$POS - $HOSTNAME is not WP"
echo $HOSTNAME >> checked.csv
done < top-1m.csv
