Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import requests
import re
import sys
from multiprocessing.dummy import Pool
def robots(host):
r = requests.get(
'https://web.archive.org/cdx/search/cdx\
?url=%s/robots.txt&output=json&fl=timestamp,original&filter=statuscode:200&collapse=digest' % host)
results = r.json()
if len(results) == 0: # might find nothing
return []
results.pop(0) # The first item is ['timestamp', 'original']
return results
def getpaths(snapshot):
url = 'https://web.archive.org/web/{0}/{1}'.format(snapshot[0], snapshot[1])
robotstext = requests.get(url).text
if 'Disallow:' in robotstext: # verify it's acually a robots.txt file, not 404 page
paths = re.findall('/.*', robotstext)
return paths
return []
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\tpython3 waybackrobots.py <domain-name>')
sys.exit()
host = sys.argv[1]
snapshots = robots(host)
print('Found %s unique results' % len(snapshots))
if len(snapshots) == 0:
sys.exit()
print('This may take some time...')
pool = Pool(4)
paths = pool.map(getpaths, snapshots)
unique_paths = set()
for i in paths:
unique_paths.update(i)
filename = '%s-robots.txt' % host
with open(filename, 'w') as f:
f.write('\n'.join(unique_paths))
print('[*] Saved results to %s' % filename)
@muztahidultanim
Copy link

muztahidultanim commented Sep 2, 2017

Its giving me " Failed to establish a new connection: [Errno 111] Connection refused',)) " this error most of the time -_-

@JaydanCrosby2021
Copy link

JaydanCrosby2021 commented Mar 21, 2020

How can I use this?

@seluard
Copy link

seluard commented Mar 22, 2020

How can I use this?

$ python3 waybackrobots.py

@ashrafed
Copy link

ashrafed commented Apr 6, 2020

Traceback (most recent call last):
File "waybackrobots.py", line 34, in
snapshots = robots(host)
File "waybackrobots.py", line 11, in robots
results = r.json()
File "/usr/local/lib/python3.8/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/usr/local/lib/python3.8/json/init.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/local/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
6

@ashrafed
Copy link

ashrafed commented Apr 6, 2020

any help for solving this error, please?

@Fawadkhanfk
Copy link

Fawadkhanfk commented Sep 2, 2021

any help for solving this error, please?

install the required libraries

@Harsh5922
Copy link

Harsh5922 commented Aug 24, 2022

python3 waybackrobots.py https://www.abc.in/
Found 4 unique results
This may take some time...
Traceback (most recent call last):
File "/home/wizard/BugBountyTool/waybackrobots.py", line 45, in
with open(filename, 'w') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'https://www.abc.in/-robots.txt'

(here the abc has actual domain)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment