Skip to content

Instantly share code, notes, and snippets.

@HexagonWin
Last active August 15, 2023 15:53
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Embed
What would you like to do?

Revisions

  1. HexagonWin revised this gist Aug 15, 2023. 2 changed files with 103 additions and 89 deletions.
    13 changes: 9 additions & 4 deletions b_scrape.sh
    @@ -1,3 +1,4 @@
    QUERYNUM=1
    # Completely free to modify or whatever. Made by hexagonwin <hexagonwin@disroot.org>
    # This thing is FREE SOFTWARE or whatever you choose to call it
    FIRST=1
    @@ -14,7 +15,6 @@ OUTFILE=urls
    NewlyAdded=999
    #QUERY="site:*.egloos.com"
    QUERYFILE=query
    QUERYNUM=1
    eko(){
    echo -ne "$1"
    echo -ne "$1" >> $LOGFILE
    @@ -46,11 +46,15 @@ rm -f $COOKIE
    eko "Init BDomainListRetrieve Agent $VER\n"
    eko " -Log $LOGFILE, cookie $COOKIE, output to $OUTFILE on $OUTPUT, $(wc -l < $QUERYFILE) queries\n"

    while read -r QUERY; do
    #while read -r QUERY_ORIG; do
    while true; do
    QUERY_ORIG=$(sed -n "${QUERYNUM}p" < $QUERYFILE)

    # Only run when $QUERY is not empty
    if [ -z "$QUERY" ]; then
    if [ -z "$QUERY_ORIG" ]; then
    eko "Skipping #$QUERYNUM; Query is empty\n\n"
    else
    QUERY="$QUERY_ORIG site:egloos.com -pds"
    eko "[$QUERY] Query #$QUERYNUM Starting\n"
    while ! [ $NewlyAdded -eq 0 ]; do
    # We count output lines, we get newly added ones via this
    @@ -147,7 +151,8 @@ while read -r QUERY; do
    ((QUERYNUM++))
    FIRST=1
    PAGE=1
    done < "$QUERYFILE"
    done
    # done < "$QUERYFILE"

    eko "\n -> Finished fetching..\n"
    Finish_Cleanup
    179 changes: 94 additions & 85 deletions g_scrape.sh
    @@ -1,11 +1,11 @@
    # Completely free to modify or whatever. Made by hexagonwin <hexagonwin@disroot.org>
    # This thing is FREE SOFTWARE or whatever you choose to call it
    FIRST=169 #0
    PAGE=18 #1
    QUERYNUM=320 #1
    FIRST=150 #0
    PAGE=16 #1
    QUERYNUM=22 #1

    UA="Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
    DELAY="2"
    DELAY="1"
    OUTPUT=./out
    VER=1.2-and
    LOGFILE=$OUTPUT/google_log
    @@ -31,103 +31,112 @@ rm -f $COOKIE
    eko "Init GDomainListRetrieve Agent $VER\n"
    eko " -Log $LOGFILE, cookie $COOKIE, output to $OUTFILE on $OUTPUT, $(wc -l < $QUERYFILE) queries\n"

    while read -r QUERY; do
    eko "[$QUERY] Query #$QUERYNUM Starting\n"
    while ! [ $NewlyAdded -eq 0 ]; do
    # We count output lines, we get newly added ones via this
    SavedLines=$(wc -l < $OUTPUT/$OUTFILE)
    # We save saved html -> ./WEB/GOOGLEHTML/From*.htm
    eko "[$QUERY] PAGE $PAGE (from $FIRST)"
    STATUS=$(curl -s -w "%{http_code}" -b $COOKIE -c $COOKIE "https://www.google.com/search?q=$(echo $QUERY | jq '@uri' -jRr)&start=$FIRST" -A "$UA" -o ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm)
    if [ $STATUS -eq 302 ]; then
    eko "..302!\n -> We got an IP ban."
    rm $COOKIE
    eko "\n -> Cleared cookie.."
    eko "\n -> Resetting IP.."
    CURIP=$(curl -s icanhazip.com)
    eko "$CURIP"
    svc data disable
    eko "..off."
    sleep 2
    svc data enable
    eko ".on.."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    NEWIP=$(curl -s icanhazip.com)
    eko "..$NEWIP.."
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are identical.\n"

    eko " -> Trying 3G/LTE switch mitigation"
    settings put global preferred_network_mode1 0 # Switch to 3G
    eko "..3G."
    #while read -r QUERY; do
    while true; do
    QUERY_ORIG=$(sed -n "${QUERYNUM}p" < $QUERYFILE)
    # Only run when $QUERY is not empty
    if [ -z "$QUERY_ORIG" ]; then
    eko "Skipping #$QUERYNUM; Query is empty\n\n"
    else
    QUERY="$QUERY_ORIG site:egloos.com -pds"
    eko "[$QUERY] Query #$QUERYNUM Starting\n"
    while ! [ $NewlyAdded -eq 0 ]; do
    # We count output lines, we get newly added ones via this
    SavedLines=$(wc -l < $OUTPUT/$OUTFILE)
    # We save saved html -> ./WEB/GOOGLEHTML/From*.htm
    eko "[$QUERY] PAGE $PAGE (from $FIRST)"
    STATUS=$(curl -s -w "%{http_code}" -b $COOKIE -c $COOKIE "https://www.google.com/search?q=$(echo $QUERY | jq '@uri' -jRr)&start=$FIRST" -A "$UA" -o ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm)
    if [ $STATUS -eq 302 ]; then
    eko "..302!\n -> We got an IP ban."
    rm $COOKIE
    eko "\n -> Cleared cookie.."
    eko "\n -> Resetting IP.."
    CURIP=$(curl -s icanhazip.com)
    eko "$CURIP"
    svc data disable
    eko "..off."
    sleep 2
    svc data enable
    eko ".on.."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    sleep 1 # Just to prevent issues lol
    settings put global preferred_network_mode1 9 # Switch to 4G/LTE
    eko ".LTE.."
    # Wait until we get inet /TODO : Make this modular
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    CURIP=$(curl -s icanhazip.com)
    eko "..$CURIP"
    NEWIP=$(curl -s icanhazip.com)
    eko "..$NEWIP.."
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are *still* identical.\n"
    eko " -> Stopped at PAGE $PAGE from $FIRST on [$QUERY]\n"
    export PAGE=$PAGE
    export FIRST=$FIRST
    eko " -> Please modify file $0 and edit the values of PAGE and FIRST\n"
    eko " -> Please modify file $QUERYFILE and remove all lines before current query ($QUERY)\n"
    eko "Quitting program..\n"
    exit 1
    eko "ERROR!\n -> Old and new IPs are identical.\n"

    eko " -> Trying 3G/LTE switch mitigation"
    settings put global preferred_network_mode1 0 # Switch to 3G
    eko "..3G."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    sleep 1 # Just to prevent issues lol
    settings put global preferred_network_mode1 9 # Switch to 4G/LTE
    eko ".LTE.."
    # Wait until we get inet /TODO : Make this modular
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    CURIP=$(curl -s icanhazip.com)
    eko "..$CURIP"
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are *still* identical.\n"
    eko " -> Stopped at PAGE $PAGE from $FIRST on [$QUERY]\n"
    export PAGE=$PAGE
    export FIRST=$FIRST
    eko " -> Please modify file $0 and edit the values of PAGE and FIRST\n"
    eko " -> Please modify file $QUERYFILE and remove all lines before current query ($QUERY)\n"
    eko "Quitting program..\n"
    exit 1
    else
    eko " done!\n"
    eko " -> Mitigation successful\n"
    continue
    fi
    else
    eko " done!\n"
    eko " -> Mitigation successful\n"
    continue
    fi
    eko " -> Re-running current action..\n"
    continue
    elif ! [ $STATUS -eq 200 ]; then
    eko "..CODE $STATUS! Terminating\n"
    exit 1
    else
    eko " done!\n"
    eko "..200"
    fi
    eko " -> Re-running current action..\n"
    continue
    elif ! [ $STATUS -eq 200 ]; then
    eko "..CODE $STATUS! Terminating\n"
    exit 1
    else
    eko "..200"
    fi
    pup "body > div:nth-child(3) > div > div > div > div > div > a > span:nth-child(2) > span text{}" < ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm >> $OUTPUT/$OUTFILE
    NewlyAdded=$(( $(wc -l < $OUTPUT/$OUTFILE) - $SavedLines ))
    eko "..Added $NewlyAdded\n"
    ((PAGE++))
    FIRST=$(($FIRST + $NewlyAdded))
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    sleep $DELAY
    pup "body > div:nth-child(3) > div > div > div > div > div > a > span:nth-child(2) > span text{}" < ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm >> $OUTPUT/$OUTFILE
    NewlyAdded=$(( $(wc -l < $OUTPUT/$OUTFILE) - $SavedLines ))
    eko "..Added $NewlyAdded\n"
    ((PAGE++))
    FIRST=$(($FIRST + $NewlyAdded))
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    sleep $DELAY
    done
    NewlyAdded=999
    eko "[$QUERY] Query #$QUERYNUM Finished\n\n"
    fi
    ((QUERYNUM++))
    FIRST=0
    PAGE=1
    done
    NewlyAdded=999
    eko "[$QUERY] Query #$QUERYNUM Finished\n\n"
    ((QUERYNUM++))
    FIRST=0
    PAGE=1
    done < "$QUERYFILE"
    # done < "$QUERYFILE"

    eko "\n -> Finished fetching..\n"
    eko "Cleaning up.."
  2. HexagonWin created this gist Aug 15, 2023.
    153 changes: 153 additions & 0 deletions b_scrape.sh
    @@ -0,0 +1,153 @@
    # Completely free to modify or whatever. Made by hexagonwin <hexagonwin@disroot.org>
    # This thing is FREE SOFTWARE or whatever you choose to call it
    FIRST=1
    # Bing starts at FIRST=1, Google starts at FIRST=0
    PAGE=1

    UA="Mozilla/5.0 (MSIE 9.0; Windows NT 6.1; Trident/5.0)"
    DELAY="2"
    OUTPUT=./out
    VER=1.2-and
    LOGFILE=$OUTPUT/bing_log
    COOKIE=$OUTPUT/bing_cookie.txt
    OUTFILE=urls
    NewlyAdded=999
    #QUERY="site:*.egloos.com"
    QUERYFILE=query
    QUERYNUM=1
    eko(){
    echo -ne "$1"
    echo -ne "$1" >> $LOGFILE
    }

    Finish_Cleanup(){
    eko "Cleaning up.."
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    eko "Sorting.."
    sort $OUTPUT/$OUTFILE -o $OUTPUT/$OUTFILE
    eko "Unduplicating.."
    uniq $OUTPUT/$OUTFILE $OUTPUT/${OUTFILE}-tmp
    cat $OUTPUT/${OUTFILE}-tmp > $OUTPUT/$OUTFILE
    rm $OUTPUT/${OUTFILE}-tmp
    eko "Finished!\n\n"
    eko "We saved $(wc -l < $OUTPUT/$OUTFILE) items\n"
    }

    if ! [ -f "$QUERYFILE" ]; then
    eko "Query file $QUERYFILE nonexistent. Quitting..\n"
    exit 1
    fi

    mkdir -p $OUTPUT $OUTPUT/BINGHTML
    touch $OUTPUT/$OUTFILE
    touch $LOGFILE
    rm -f $COOKIE

    eko "Init BDomainListRetrieve Agent $VER\n"
    eko " -Log $LOGFILE, cookie $COOKIE, output to $OUTFILE on $OUTPUT, $(wc -l < $QUERYFILE) queries\n"

    while read -r QUERY; do
    # Only run when $QUERY is not empty
    if [ -z "$QUERY" ]; then
    eko "Skipping #$QUERYNUM; Query is empty\n\n"
    else
    eko "[$QUERY] Query #$QUERYNUM Starting\n"
    while ! [ $NewlyAdded -eq 0 ]; do
    # We count output lines, we get newly added ones via this
    SavedLines=$(wc -l < $OUTPUT/$OUTFILE)
    # We save saved html -> ./WEB/HTML/From*.htm
    eko "[$QUERY] PAGE $PAGE (from $FIRST)"
    STATUS=$(curl -s -w "%{http_code}" -b $COOKIE -c $COOKIE "https://www.bing.com/search?q=$(echo $QUERY | jq '@uri' -jRr)&first=$FIRST" -A "$UA" -o ${OUTPUT}/BINGHTML/Q${QUERYNUM}_P${PAGE}.htm)
    if [ $STATUS -eq 302 ]; then
    eko "..302!\n -> We got an IP ban."
    rm $COOKIE
    eko "\n -> Cleared cookie.."
    eko "\n -> Resetting IP.."
    CURIP=$(curl -s icanhazip.com)
    eko "$CURIP"
    svc data disable
    eko "..off."
    sleep 2
    svc data enable
    eko ".on.."
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    NEWIP=$(curl -s icanhazip.com)
    eko "..$NEWIP.."
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are identical.\n"
    eko " -> Trying 3G/LTE switch mitigation"
    settings put global preferred_network_mode1 0 # Switch to 3G
    eko "..3G."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    sleep 1 # Just to prevent issues lol
    settings put global preferred_network_mode1 9 # Switch to 4G/LTE
    eko ".LTE.."
    # Wait until we get inet /TODO : Make this modular
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    CURIP=$(curl -s icanhazip.com)
    eko "..$CURIP"
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are *still* identical.\n"
    eko " -> Stopped at PAGE $PAGE from $FIRST on [$QUERY]\n"
    export PAGE=$PAGE
    export FIRST=$FIRST
    eko " -> Please modify file $0 and edit the values of PAGE and FIRST\n"
    eko " -> Please modify file $QUERYFILE and remove all lines before current query ($QUERY)\n"
    eko "Quitting program..\n"
    Finish_Cleanup
    exit 1
    else
    eko " done!\n"
    eko " -> Mitigation successful\n"
    continue
    fi
    else
    eko " done!\n"
    fi
    eko " -> Re-running current action..\n"
    continue
    elif ! [ $STATUS -eq 200 ]; then
    eko "..CODE $STATUS! Terminating\n"
    exit 1
    else
    eko "..200"
    fi
    # awk only extracts domain portion of it i.e. areaz.egloos.com from http://areaz.egloos.com/1234/
    pup "ol#b_results > li.b_algo > h2 > a attr{href}" < ${OUTPUT}/BINGHTML/Q${QUERYNUM}_P${PAGE}.htm | awk -F[/:] '{print $4}' >> $OUTPUT/$OUTFILE
    NewlyAdded=$(( $(wc -l < $OUTPUT/$OUTFILE) - $SavedLines ))
    eko "..Added $NewlyAdded\n"
    ((PAGE++))
    FIRST=$(($FIRST + $NewlyAdded))
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    sleep $DELAY
    done
    eko "[$QUERY] Query #$QUERYNUM Finished\n\n"
    fi
    NewlyAdded=999
    ((QUERYNUM++))
    FIRST=1
    PAGE=1
    done < "$QUERYFILE"

    eko "\n -> Finished fetching..\n"
    Finish_Cleanup
    142 changes: 142 additions & 0 deletions g_scrape.sh
    @@ -0,0 +1,142 @@
    # Completely free to modify or whatever. Made by hexagonwin <hexagonwin@disroot.org>
    # This thing is FREE SOFTWARE or whatever you choose to call it
    FIRST=169 #0
    PAGE=18 #1
    QUERYNUM=320 #1

    UA="Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25"
    DELAY="2"
    OUTPUT=./out
    VER=1.2-and
    LOGFILE=$OUTPUT/google_log
    COOKIE=$OUTPUT/google_cookie.txt
    OUTFILE=urls
    NewlyAdded=999
    QUERYFILE=query
    eko(){
    echo -ne "$1"
    echo -ne "$1" >> $LOGFILE
    }

    if ! [ -f "$QUERYFILE" ]; then
    eko "Query file $QUERYFILE nonexistent. Quitting..\n"
    exit 1
    fi

    mkdir -p $OUTPUT $OUTPUT/GOOGLEHTML
    touch $OUTPUT/$OUTFILE
    touch $LOGFILE
    rm -f $COOKIE

    eko "Init GDomainListRetrieve Agent $VER\n"
    eko " -Log $LOGFILE, cookie $COOKIE, output to $OUTFILE on $OUTPUT, $(wc -l < $QUERYFILE) queries\n"

    while read -r QUERY; do
    eko "[$QUERY] Query #$QUERYNUM Starting\n"
    while ! [ $NewlyAdded -eq 0 ]; do
    # We count output lines, we get newly added ones via this
    SavedLines=$(wc -l < $OUTPUT/$OUTFILE)
    # We save saved html -> ./WEB/GOOGLEHTML/From*.htm
    eko "[$QUERY] PAGE $PAGE (from $FIRST)"
    STATUS=$(curl -s -w "%{http_code}" -b $COOKIE -c $COOKIE "https://www.google.com/search?q=$(echo $QUERY | jq '@uri' -jRr)&start=$FIRST" -A "$UA" -o ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm)
    if [ $STATUS -eq 302 ]; then
    eko "..302!\n -> We got an IP ban."
    rm $COOKIE
    eko "\n -> Cleared cookie.."
    eko "\n -> Resetting IP.."
    CURIP=$(curl -s icanhazip.com)
    eko "$CURIP"
    svc data disable
    eko "..off."
    sleep 2
    svc data enable
    eko ".on.."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    NEWIP=$(curl -s icanhazip.com)
    eko "..$NEWIP.."
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are identical.\n"

    eko " -> Trying 3G/LTE switch mitigation"
    settings put global preferred_network_mode1 0 # Switch to 3G
    eko "..3G."
    # Wait until we get inet
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    sleep 1 # Just to prevent issues lol
    settings put global preferred_network_mode1 9 # Switch to 4G/LTE
    eko ".LTE.."
    # Wait until we get inet /TODO : Make this modular
    while true; do
    if [ $(ip add sh dev rmnet0 | grep inet | wc -l) -ne 0 ]; then
    break
    fi
    sleep 1
    done
    eko "online"
    sleep 2
    CURIP=$(curl -s icanhazip.com)
    eko "..$CURIP"
    if [ "$CURIP" = "$NEWIP" ]; then
    eko "ERROR!\n -> Old and new IPs are *still* identical.\n"
    eko " -> Stopped at PAGE $PAGE from $FIRST on [$QUERY]\n"
    export PAGE=$PAGE
    export FIRST=$FIRST
    eko " -> Please modify file $0 and edit the values of PAGE and FIRST\n"
    eko " -> Please modify file $QUERYFILE and remove all lines before current query ($QUERY)\n"
    eko "Quitting program..\n"
    exit 1
    else
    eko " done!\n"
    eko " -> Mitigation successful\n"
    continue
    fi
    else
    eko " done!\n"
    fi
    eko " -> Re-running current action..\n"
    continue
    elif ! [ $STATUS -eq 200 ]; then
    eko "..CODE $STATUS! Terminating\n"
    exit 1
    else
    eko "..200"
    fi
    pup "body > div:nth-child(3) > div > div > div > div > div > a > span:nth-child(2) > span text{}" < ${OUTPUT}/GOOGLEHTML/Q${QUERYNUM}_P${PAGE}.htm >> $OUTPUT/$OUTFILE
    NewlyAdded=$(( $(wc -l < $OUTPUT/$OUTFILE) - $SavedLines ))
    eko "..Added $NewlyAdded\n"
    ((PAGE++))
    FIRST=$(($FIRST + $NewlyAdded))
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    sleep $DELAY
    done
    NewlyAdded=999
    eko "[$QUERY] Query #$QUERYNUM Finished\n\n"
    ((QUERYNUM++))
    FIRST=0
    PAGE=1
    done < "$QUERYFILE"

    eko "\n -> Finished fetching..\n"
    eko "Cleaning up.."
    sed -i 's/ › .*//' $OUTPUT/$OUTFILE
    eko "Sorting.."
    sort $OUTPUT/$OUTFILE -o $OUTPUT/$OUTFILE
    eko "Unduplicating.."
    uniq $OUTPUT/$OUTFILE $OUTPUT/${OUTFILE}-tmp
    cat $OUTPUT/${OUTFILE}-tmp > $OUTPUT/$OUTFILE
    rm $OUTPUT/${OUTFILE}-tmp
    eko "Finished!\n\n"
    eko "We saved $(wc -l < $OUTPUT/$OUTFILE) items\n"