Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import requests
import re
import sys
from multiprocessing.dummy import Pool
def robots(host):
r = requests.get(
'https://web.archive.org/cdx/search/cdx\
?url=%s/robots.txt&output=json&fl=timestamp,original&filter=statuscode:200&collapse=digest' % host)
results = r.json()
if len(results) == 0: # might find nothing
return []
results.pop(0) # The first item is ['timestamp', 'original']
return results
def getpaths(snapshot):
url = 'https://web.archive.org/web/{0}/{1}'.format(snapshot[0], snapshot[1])
robotstext = requests.get(url).text
if 'Disallow:' in robotstext: # verify it's acually a robots.txt file, not 404 page
paths = re.findall('/.*', robotstext)
return paths
return []
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\tpython3 waybackrobots.py <domain-name>')
sys.exit()
host = sys.argv[1]
snapshots = robots(host)
print('Found %s unique results' % len(snapshots))
if len(snapshots) == 0:
sys.exit()
print('This may take some time...')
pool = Pool(4)
paths = pool.map(getpaths, snapshots)
unique_paths = set()
for i in paths:
unique_paths.update(i)
filename = '%s-robots.txt' % host
with open(filename, 'w') as f:
f.write('\n'.join(unique_paths))
print('[*] Saved results to %s' % filename)
@muztahidultanim
Copy link

muztahidultanim commented Sep 2, 2017

Its giving me " Failed to establish a new connection: [Errno 111] Connection refused',)) " this error most of the time -_-

@JaydanCrosby2021
Copy link

JaydanCrosby2021 commented Mar 21, 2020

How can I use this?

@seluard
Copy link

seluard commented Mar 22, 2020

How can I use this?

$ python3 waybackrobots.py

@ashrafed
Copy link

ashrafed commented Apr 6, 2020

Traceback (most recent call last):
File "waybackrobots.py", line 34, in
snapshots = robots(host)
File "waybackrobots.py", line 11, in robots
results = r.json()
File "/usr/local/lib/python3.8/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/usr/local/lib/python3.8/json/init.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/local/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
6

@ashrafed
Copy link

ashrafed commented Apr 6, 2020

any help for solving this error, please?

@Fawadkhanfk
Copy link

Fawadkhanfk commented Sep 2, 2021

any help for solving this error, please?

install the required libraries

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment