Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import requests
import re
import sys
from multiprocessing.dummy import Pool
def robots(host):
r = requests.get(
'https://web.archive.org/cdx/search/cdx\
?url=%s/robots.txt&output=json&fl=timestamp,original&filter=statuscode:200&collapse=digest' % host)
results = r.json()
if len(results) == 0: # might find nothing
return []
results.pop(0) # The first item is ['timestamp', 'original']
return results
def getpaths(snapshot):
url = 'https://web.archive.org/web/{0}/{1}'.format(snapshot[0], snapshot[1])
robotstext = requests.get(url).text
if 'Disallow:' in robotstext: # verify it's acually a robots.txt file, not 404 page
paths = re.findall('/.*', robotstext)
return paths
return []
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage:\n\tpython3 waybackrobots.py <domain-name>')
sys.exit()
host = sys.argv[1]
snapshots = robots(host)
print('Found %s unique results' % len(snapshots))
if len(snapshots) == 0:
sys.exit()
print('This may take some time...')
pool = Pool(4)
paths = pool.map(getpaths, snapshots)
unique_paths = set()
for i in paths:
unique_paths.update(i)
filename = '%s-robots.txt' % host
with open(filename, 'w') as f:
f.write('\n'.join(unique_paths))
print('[*] Saved results to %s' % filename)
@muztahidultanim

This comment has been minimized.

Copy link

muztahidultanim commented Sep 2, 2017

Its giving me " Failed to establish a new connection: [Errno 111] Connection refused',)) " this error most of the time -_-

@JaydanCrosby2021

This comment has been minimized.

Copy link

JaydanCrosby2021 commented Mar 21, 2020

How can I use this?

@seluard

This comment has been minimized.

Copy link

seluard commented Mar 22, 2020

How can I use this?

$ python3 waybackrobots.py

@ashrafed

This comment has been minimized.

Copy link

ashrafed commented Apr 6, 2020

Traceback (most recent call last):
File "waybackrobots.py", line 34, in
snapshots = robots(host)
File "waybackrobots.py", line 11, in robots
results = r.json()
File "/usr/local/lib/python3.8/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/usr/local/lib/python3.8/json/init.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/local/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
6

@ashrafed

This comment has been minimized.

Copy link

ashrafed commented Apr 6, 2020

any help for solving this error, please?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.