Skip to content

Instantly share code, notes, and snippets.

@Q2h1Cg
Created March 6, 2014 13:53
Show Gist options
  • Save Q2h1Cg/9390180 to your computer and use it in GitHub Desktop.
Save Q2h1Cg/9390180 to your computer and use it in GitHub Desktop.
Google Dork
#!/usr/bin/python
"""
Author: Chu
Usage: python dork.py dork page_num
Example: python dork.py "filetype:action" 5
"""
import lxml.html
import requests
import sys
import urllib2
__logo__ = """
..######....#######...#######...######...##.......########....########...#######..########..##....##
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##.....##.##...##.
.##........##.....##.##.....##.##........##.......##..........##.....##.##.....##.##.....##.##..##..
.##...####.##.....##.##.....##.##...####.##.......######......##.....##.##.....##.########..#####...
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##...##...##..##..
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##....##..##...##.
..######....#######...#######...######...########.########....########...#######..##.....##.##....##
"""
class GoogleDork(object):
"""Core Class"""
def google(self, dork, page_num):
"Google dork"
result = {}
ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " \
"(KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
url_base = "https://www.google.com.hk/search?q=%s&newwindow=1" \
"&safe=strict&ei=PpStUq3mPKeziQe0hoHYCw&start=%d0" \
"&sa=N&biw=1301&bih=656"
print "-" * 80
for i in range(page_num):
print "Page %d" % (i+1)
print "-" * 80
url = url_base % (urllib2.quote(dork), i)
try:
req = requests.get(url, headers={"User-Agent": ua}, timeout=5)
except Exception, e:
print "[!]ERROR: %s" % e.message
continue
else:
req.close()
html = lxml.html.fromstring(req.text)
for j in html.xpath('//li[@class="g"]'):
href = j[1][1][0].get("href")
netloc = urllib2.urlparse.urlparse(href).netloc
if netloc not in result:
print "[+]New domain found: %s" % netloc
print "[+]New url found: %s" % href
result[netloc] = []
result[netloc].append(href)
else:
if href not in result[netloc]:
print "[+]New url found: %s" % href
result[netloc].append(href)
print "-" * 80
print "Site: %d\nUrl:%d" % (len(result), sum([len(result[i]) for i in result]))
print "-" * 80
return result
def main():
d = GoogleDork()
dork = sys.argv[1]
page_num = int(sys.argv[2])
result = d.google(dork, page_num)
f_sites = open("sites.txt", "w")
f_urls = open("urls.txt", "w")
for site in result:
f_sites.write(site+"\r\n")
for url in result[site]:
f_urls.write(url+"\r\n")
if __name__ == "__main__":
print __logo__
if len(sys.argv) == 3:
main()
else:
print __doc__
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment