Skip to content

Instantly share code, notes, and snippets.

@ttycelery
Created March 31, 2018 09:16
Show Gist options
  • Save ttycelery/c77fa4acf3caa73ee34eb56008ffb6b0 to your computer and use it in GitHub Desktop.
Save ttycelery/c77fa4acf3caa73ee34eb56008ffb6b0 to your computer and use it in GitHub Desktop.
GDS: simple Google dorker script
import re
import requests
from bs4 import BeautifulSoup
import sys
import webbrowser
global_cookies = None
def the_end():
print("\r|! end: successfully dumped {} urls from Google search query '{}' to file '{}' (end_page={})".format(len(cached_urls), sys.argv[1], sys.argv[3], pagenm))
print("-"*15)
sys.exit()
def get_(s_query, pagenum, proxy=None):
global global_cookies
urls = []
t_url = "http://www.google.com/search"
params = {'q': s_query, 'start': pagenum}
headers = {'User-Agent': "Mozilla/14.0"}
r = requests.get(t_url, headers=headers, params=params, proxies=proxy)
if "Our systems have detected unusual traffic from your computer network" in r.text:
the_end()
"""webbrowser.open(r.url, new=0, autoraise=True)
raw_input("|! press any key after you solved the captcha")
global_cookies = r.cookies"""
soup = BeautifulSoup(r.text, "lxml")
h3_soup = soup.find_all('h3', class_='r')
for h3s in h3_soup:
try:
urls.append(re.search('url\?q=(.+?)\&sa', h3s.a['href']).group(1))
except:
continue
return urls
def que_thread(query, page, output):
s_rslt = get_(query, page)
for url in s_rslt:
if url in cached_urls: continue
required_condition = True
if required_condition:
cached_urls.append(url)
print("\r|- " + url)
f = open(output, "a")
f.write(url + "\n")
f.close()
def main():
global running_threads
print """\
________ ____
/ ___/ _ \/ __/ | Google Dorker Script
/ (_ / // /\ \ | Author: P4kL0nc4t
\___/____/___/ | https://github.com/p4kl0nc4t
"""
if len(sys.argv) != 4:
print("usage: {} <query> <start_page> <output_file>".format(sys.argv[0]))
sys.exit()
query = sys.argv[1]
output = sys.argv[3]
global pagenm
pagenm = int(sys.argv[2])
max_thread = 5
global cached_urls
print("-"*15)
print("|* begin: started Google dorker with dork='{}', start_page='{}', output='{}'".format(query, pagenm, output))
cached_urls = []
while True:
que_thread(query, pagenm, output)
pagenm += 1
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
the_end()
#except Exception:
#pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment