Created
March 6, 2014 13:53
-
-
Save Q2h1Cg/9390180 to your computer and use it in GitHub Desktop.
Google Dork
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Author: Chu | |
Usage: python dork.py dork page_num | |
Example: python dork.py "filetype:action" 5 | |
""" | |
import lxml.html | |
import requests | |
import sys | |
import urllib2 | |
__logo__ = """ | |
..######....#######...#######...######...##.......########....########...#######..########..##....## | |
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##.....##.##...##. | |
.##........##.....##.##.....##.##........##.......##..........##.....##.##.....##.##.....##.##..##.. | |
.##...####.##.....##.##.....##.##...####.##.......######......##.....##.##.....##.########..#####... | |
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##...##...##..##.. | |
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##....##..##...##. | |
..######....#######...#######...######...########.########....########...#######..##.....##.##....## | |
""" | |
class GoogleDork(object): | |
"""Core Class""" | |
def google(self, dork, page_num): | |
"Google dork" | |
result = {} | |
ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " \ | |
"(KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36" | |
url_base = "https://www.google.com.hk/search?q=%s&newwindow=1" \ | |
"&safe=strict&ei=PpStUq3mPKeziQe0hoHYCw&start=%d0" \ | |
"&sa=N&biw=1301&bih=656" | |
print "-" * 80 | |
for i in range(page_num): | |
print "Page %d" % (i+1) | |
print "-" * 80 | |
url = url_base % (urllib2.quote(dork), i) | |
try: | |
req = requests.get(url, headers={"User-Agent": ua}, timeout=5) | |
except Exception, e: | |
print "[!]ERROR: %s" % e.message | |
continue | |
else: | |
req.close() | |
html = lxml.html.fromstring(req.text) | |
for j in html.xpath('//li[@class="g"]'): | |
href = j[1][1][0].get("href") | |
netloc = urllib2.urlparse.urlparse(href).netloc | |
if netloc not in result: | |
print "[+]New domain found: %s" % netloc | |
print "[+]New url found: %s" % href | |
result[netloc] = [] | |
result[netloc].append(href) | |
else: | |
if href not in result[netloc]: | |
print "[+]New url found: %s" % href | |
result[netloc].append(href) | |
print "-" * 80 | |
print "Site: %d\nUrl:%d" % (len(result), sum([len(result[i]) for i in result])) | |
print "-" * 80 | |
return result | |
def main(): | |
d = GoogleDork() | |
dork = sys.argv[1] | |
page_num = int(sys.argv[2]) | |
result = d.google(dork, page_num) | |
f_sites = open("sites.txt", "w") | |
f_urls = open("urls.txt", "w") | |
for site in result: | |
f_sites.write(site+"\r\n") | |
for url in result[site]: | |
f_urls.write(url+"\r\n") | |
if __name__ == "__main__": | |
print __logo__ | |
if len(sys.argv) == 3: | |
main() | |
else: | |
print __doc__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment