tecknoh19/ghdb_ripper.py

## ghdb_ripper.py
#!/usr/bin/python
# Exploit-db.com Google Dork Hacking Database Replicator written by Andy Bricker
# Proof of concept.  You shouldnt use this script without prior consent from Exploit-db.com
# http://andybricker.com
# Contact: andy at andybricker.com

# Requirements
# Python 2.7 (Has not been tasted on later versions)

# Usage:
#   python ghdb_ripper.py books stores -n 50 -l myLogFile.txt

# Script will crawl exploits_db.com google dork pages and build a csv output file containing line by line
# dork,date dork was added,dork description

# Like the script?  Donate
#   LiteCoin: LcFU5upJyS7FsEeB5sb25vFTS69dH6fugr
#   DogeCoin: D7SPH1LYJn9Co4GCZePH3JvzR5RkZEPi5M


import urllib2
import re
import time
import os

from optparse import OptionParser

options = OptionParser(usage='%prog number_of_dorks [options]', description='Exploit-db.Com GHDB Database Replicator')
options.add_option('-s', '--start_number', type='int', default=51, help='Dork number to start with (default: 5)')
options.add_option('-o', '--output_file', type='string', default="output.txt", help='Name of the output file.  Paths accepted. User must have access to output path. (default: output.txt)')

opts, args = options.parse_args()
if len(args) < 1:
    options.print_help()
    exit()

dorkData = []
output = ""
log_file = open(opts.output_file, "a")

if os.name == 'nt':
        os.system('color a')
        os.system('cls')
else:
        os.system('clear')

max_range = int(args[0]) - opts.start_number
failed_atempts = 0
for page in range(int(opts.start_number),int(max_range)):   # 3943 Max Results

    print "Grabbing " + 'http://www.exploit-db.com/ghdb/' + str(page) + '/'
    print "========================================================================"
    search_url = urllib2.Request('http://www.exploit-db.com/ghdb/' + str(page) + '/')
    search_url.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5')

    try:
        search_response = urllib2.urlopen(search_url, timeout=6)
        search_content = search_response.read()
        dork = re.findall('<h1>(.*?)</h1>',search_content)
        date_added = re.findall('<p>Submited: (.*?)</p>',search_content)
        dork_desc = re.findall('<p class="text">(.*?)</p>',search_content)

        print "Checking response"
        try:
            dork[0]

            try:
                date_added[0]
            except:
                date_added.append("0000-00-00")

            try:
                dork_desc[0]
            except:
                dork_desc.append("na")

            log_file.write(dork[0] + "," + date_added[0] + "," + dork_desc[0] + "\n")
            failed_attempts = 0

        except:
            print "Communication error.  Waiting 3 seconds."
            time.sleep(3)
            pass

        search_response.close()
    except:
        print "Connection interrupted.  Waiting 5 Seconds."
        failed_attempts = failed_attempts + 1
        time.sleep(5)
        if failed_attempts == 3:
            print "Connection lost.  Exiting."
            log_file.close()
            exit()


log_file.close()
	#!/usr/bin/python
	# Exploit-db.com Google Dork Hacking Database Replicator written by Andy Bricker
	# Proof of concept. You shouldnt use this script without prior consent from Exploit-db.com
	# http://andybricker.com
	# Contact: andy at andybricker.com

	# Requirements
	# Python 2.7 (Has not been tasted on later versions)

	# Usage:
	# python ghdb_ripper.py books stores -n 50 -l myLogFile.txt

	# Script will crawl exploits_db.com google dork pages and build a csv output file containing line by line
	# dork,date dork was added,dork description

	# Like the script? Donate
	# LiteCoin: LcFU5upJyS7FsEeB5sb25vFTS69dH6fugr
	# DogeCoin: D7SPH1LYJn9Co4GCZePH3JvzR5RkZEPi5M


	import urllib2
	import re
	import time
	import os

	from optparse import OptionParser

	options = OptionParser(usage='%prog number_of_dorks [options]', description='Exploit-db.Com GHDB Database Replicator')
	options.add_option('-s', '--start_number', type='int', default=51, help='Dork number to start with (default: 5)')
	options.add_option('-o', '--output_file', type='string', default="output.txt", help='Name of the output file. Paths accepted. User must have access to output path. (default: output.txt)')

	opts, args = options.parse_args()
	if len(args) < 1:
	options.print_help()
	exit()

	dorkData = []
	output = ""
	log_file = open(opts.output_file, "a")

	if os.name == 'nt':
	os.system('color a')
	os.system('cls')
	else:
	os.system('clear')

	max_range = int(args[0]) - opts.start_number
	failed_atempts = 0
	for page in range(int(opts.start_number),int(max_range)): # 3943 Max Results

	print "Grabbing " + 'http://www.exploit-db.com/ghdb/' + str(page) + '/'
	print "========================================================================"
	search_url = urllib2.Request('http://www.exploit-db.com/ghdb/' + str(page) + '/')
	search_url.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5')

	try:
	search_response = urllib2.urlopen(search_url, timeout=6)
	search_content = search_response.read()
	dork = re.findall('<h1>(.*?)</h1>',search_content)
	date_added = re.findall('<p>Submited: (.*?)</p>',search_content)
	dork_desc = re.findall('<p class="text">(.*?)</p>',search_content)

	print "Checking response"
	try:
	dork[0]

	try:
	date_added[0]
	except:
	date_added.append("0000-00-00")

	try:
	dork_desc[0]
	except:
	dork_desc.append("na")

	log_file.write(dork[0] + "," + date_added[0] + "," + dork_desc[0] + "\n")
	failed_attempts = 0

	except:
	print "Communication error. Waiting 3 seconds."
	time.sleep(3)
	pass

	search_response.close()
	except:
	print "Connection interrupted. Waiting 5 Seconds."
	failed_attempts = failed_attempts + 1
	time.sleep(5)
	if failed_attempts == 3:
	print "Connection lost. Exiting."
	log_file.close()
	exit()


	log_file.close()