Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import requests
import re
import sys
from multiprocessing.dummy import Pool
def robots(host):
r = requests.get(
?url=%s/robots.txt&output=json&fl=timestamp,original&filter=statuscode:200&collapse=digest' % host)
results = r.json()
if len(results) == 0: # might find nothing
return []
results.pop(0) # The first item is ['timestamp', 'original']
return results
def getpaths(snapshot):
url = '{0}/{1}'.format(snapshot[0], snapshot[1])
robotstext = requests.get(url).text
if 'Disallow:' in robotstext: # verify it's acually a robots.txt file, not 404 page
paths = re.findall('/.*', robotstext)
return paths
return []
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\tpython3 <domain-name>')
host = sys.argv[1]
snapshots = robots(host)
print('Found %s unique results' % len(snapshots))
if len(snapshots) == 0:
print('This may take some time...')
pool = Pool(4)
paths =, snapshots)
unique_paths = set()
for i in paths:
filename = '%s-robots.txt' % host
with open(filename, 'w') as f:
print('[*] Saved results to %s' % filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment