Q2h1Cg/dork.py

## dork.py
#!/usr/bin/python

"""
Author: Chu
Usage: python dork.py dork page_num
Example: python dork.py "filetype:action" 5
"""

import lxml.html
import requests
import sys
import urllib2

__logo__ = """
..######....#######...#######...######...##.......########....########...#######..########..##....##
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##.....##.##...##.
.##........##.....##.##.....##.##........##.......##..........##.....##.##.....##.##.....##.##..##..
.##...####.##.....##.##.....##.##...####.##.......######......##.....##.##.....##.########..#####...
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##...##...##..##..
.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##....##..##...##.
..######....#######...#######...######...########.########....########...#######..##.....##.##....##
"""


class GoogleDork(object):
    """Core Class"""
    def google(self, dork, page_num):
        "Google dork"
        result = {}
        ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " \
             "(KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
        url_base = "https://www.google.com.hk/search?q=%s&newwindow=1" \
                    "&safe=strict&ei=PpStUq3mPKeziQe0hoHYCw&start=%d0" \
                    "&sa=N&biw=1301&bih=656"

        print "-" * 80
        for i in range(page_num):
            print "Page %d" % (i+1)
            print "-" * 80
            url = url_base % (urllib2.quote(dork), i)

            try:
                req = requests.get(url, headers={"User-Agent": ua}, timeout=5)
            except Exception, e:
                print "[!]ERROR: %s" % e.message
                continue
            else:
                req.close()
                html = lxml.html.fromstring(req.text)

                for j in html.xpath('//li[@class="g"]'):
                    href = j[1][1][0].get("href")
                    netloc = urllib2.urlparse.urlparse(href).netloc
                    if netloc not in result:
                        print "[+]New domain found: %s" % netloc
                        print "[+]New url found: %s" % href
                        result[netloc] = []
                        result[netloc].append(href)
                    else:
                        if href not in result[netloc]:
                            print "[+]New url found: %s" % href
                            result[netloc].append(href)
            print "-" * 80

        print "Site: %d\nUrl:%d" % (len(result), sum([len(result[i]) for i in result]))
        print "-" * 80
        return result

def main():
    d = GoogleDork()
    dork = sys.argv[1]
    page_num = int(sys.argv[2])
    result = d.google(dork, page_num)

    f_sites = open("sites.txt", "w")
    f_urls = open("urls.txt", "w")
    for site in result:
        f_sites.write(site+"\r\n")
        for url in result[site]:
            f_urls.write(url+"\r\n")

if __name__ == "__main__":
    print __logo__
    if len(sys.argv) == 3:
        main()
    else:
        print __doc__
	#!/usr/bin/python

	"""
	Author: Chu
	Usage: python dork.py dork page_num
	Example: python dork.py "filetype:action" 5
	"""

	import lxml.html
	import requests
	import sys
	import urllib2

	__logo__ = """
	..######....#######...#######...######...##.......########....########...#######..########..##....##
	.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##.....##.##...##.
	.##........##.....##.##.....##.##........##.......##..........##.....##.##.....##.##.....##.##..##..
	.##...####.##.....##.##.....##.##...####.##.......######......##.....##.##.....##.########..#####...
	.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##...##...##..##..
	.##....##..##.....##.##.....##.##....##..##.......##..........##.....##.##.....##.##....##..##...##.
	..######....#######...#######...######...########.########....########...#######..##.....##.##....##
	"""


	class GoogleDork(object):
	"""Core Class"""
	def google(self, dork, page_num):
	"Google dork"
	result = {}
	ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " \
	"(KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36"
	url_base = "https://www.google.com.hk/search?q=%s&newwindow=1" \
	"&safe=strict&ei=PpStUq3mPKeziQe0hoHYCw&start=%d0" \
	"&sa=N&biw=1301&bih=656"

	print "-" * 80
	for i in range(page_num):
	print "Page %d" % (i+1)
	print "-" * 80
	url = url_base % (urllib2.quote(dork), i)

	try:
	req = requests.get(url, headers={"User-Agent": ua}, timeout=5)
	except Exception, e:
	print "[!]ERROR: %s" % e.message
	continue
	else:
	req.close()
	html = lxml.html.fromstring(req.text)

	for j in html.xpath('//li[@class="g"]'):
	href = j[1][1][0].get("href")
	netloc = urllib2.urlparse.urlparse(href).netloc
	if netloc not in result:
	print "[+]New domain found: %s" % netloc
	print "[+]New url found: %s" % href
	result[netloc] = []
	result[netloc].append(href)
	else:
	if href not in result[netloc]:
	print "[+]New url found: %s" % href
	result[netloc].append(href)
	print "-" * 80

	print "Site: %d\nUrl:%d" % (len(result), sum([len(result[i]) for i in result]))
	print "-" * 80
	return result

	def main():
	d = GoogleDork()
	dork = sys.argv[1]
	page_num = int(sys.argv[2])
	result = d.google(dork, page_num)

	f_sites = open("sites.txt", "w")
	f_urls = open("urls.txt", "w")
	for site in result:
	f_sites.write(site+"\r\n")
	for url in result[site]:
	f_urls.write(url+"\r\n")

	if __name__ == "__main__":
	print __logo__
	if len(sys.argv) == 3:
	main()
	else:
	print __doc__