Skip to content

Instantly share code, notes, and snippets.

@sh1nu11bi
Forked from tecknoh19/ghdb_ripper.py
Last active August 29, 2015 14:12
Show Gist options
  • Save sh1nu11bi/f43d77f1f60658e53e12 to your computer and use it in GitHub Desktop.
Save sh1nu11bi/f43d77f1f60658e53e12 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# Exploit-db.com Google Dork Hacking Database Replicator written by Andy Bricker
# Proof of concept. You shouldnt use this script without prior consent from Exploit-db.com
# http://andybricker.com
# Contact: andy at andybricker.com
# Requirements
# Python 2.7 (Has not been tasted on later versions)
# Usage:
# python ghdb_ripper.py books stores -n 50 -l myLogFile.txt
# Script will crawl exploits_db.com google dork pages and build a csv output file containing line by line
# dork,date dork was added,dork description
# Like the script? Donate
# LiteCoin: LcFU5upJyS7FsEeB5sb25vFTS69dH6fugr
# DogeCoin: D7SPH1LYJn9Co4GCZePH3JvzR5RkZEPi5M
import urllib2
import re
import time
import os
from optparse import OptionParser
options = OptionParser(usage='%prog number_of_dorks [options]', description='Exploit-db.Com GHDB Database Replicator')
options.add_option('-s', '--start_number', type='int', default=51, help='Dork number to start with (default: 5)')
options.add_option('-o', '--output_file', type='string', default="output.txt", help='Name of the output file. Paths accepted. User must have access to output path. (default: output.txt)')
opts, args = options.parse_args()
if len(args) < 1:
options.print_help()
exit()
dorkData = []
output = ""
log_file = open(opts.output_file, "a")
if os.name == 'nt':
os.system('color a')
os.system('cls')
else:
os.system('clear')
max_range = int(args[0]) - opts.start_number
failed_atempts = 0
for page in range(int(opts.start_number),int(max_range)): # 3943 Max Results
print "Grabbing " + 'http://www.exploit-db.com/ghdb/' + str(page) + '/'
print "========================================================================"
search_url = urllib2.Request('http://www.exploit-db.com/ghdb/' + str(page) + '/')
search_url.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5')
try:
search_response = urllib2.urlopen(search_url, timeout=6)
search_content = search_response.read()
dork = re.findall('<h1>(.*?)</h1>',search_content)
date_added = re.findall('<p>Submited: (.*?)</p>',search_content)
dork_desc = re.findall('<p class="text">(.*?)</p>',search_content)
print "Checking response"
try:
dork[0]
try:
date_added[0]
except:
date_added.append("0000-00-00")
try:
dork_desc[0]
except:
dork_desc.append("na")
log_file.write(dork[0] + "," + date_added[0] + "," + dork_desc[0] + "\n")
failed_attempts = 0
except:
print "Communication error. Waiting 3 seconds."
time.sleep(3)
pass
search_response.close()
except:
print "Connection interrupted. Waiting 5 Seconds."
failed_attempts = failed_attempts + 1
time.sleep(5)
if failed_attempts == 3:
print "Connection lost. Exiting."
log_file.close()
exit()
log_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment