-
-
Save mhmdiaa/2742c5e147d49a804b408bfed3d32d07 to your computer and use it in GitHub Desktop.
import requests | |
import re | |
import sys | |
from multiprocessing.dummy import Pool | |
def robots(host): | |
r = requests.get( | |
'https://web.archive.org/cdx/search/cdx\ | |
?url=%s/robots.txt&output=json&fl=timestamp,original&filter=statuscode:200&collapse=digest' % host) | |
results = r.json() | |
if len(results) == 0: # might find nothing | |
return [] | |
results.pop(0) # The first item is ['timestamp', 'original'] | |
return results | |
def getpaths(snapshot): | |
url = 'https://web.archive.org/web/{0}/{1}'.format(snapshot[0], snapshot[1]) | |
robotstext = requests.get(url).text | |
if 'Disallow:' in robotstext: # verify it's acually a robots.txt file, not 404 page | |
paths = re.findall('/.*', robotstext) | |
return paths | |
return [] | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print('Usage:\n\tpython3 waybackrobots.py <domain-name>') | |
sys.exit() | |
host = sys.argv[1] | |
snapshots = robots(host) | |
print('Found %s unique results' % len(snapshots)) | |
if len(snapshots) == 0: | |
sys.exit() | |
print('This may take some time...') | |
pool = Pool(4) | |
paths = pool.map(getpaths, snapshots) | |
unique_paths = set() | |
for i in paths: | |
unique_paths.update(i) | |
filename = '%s-robots.txt' % host | |
with open(filename, 'w') as f: | |
f.write('\n'.join(unique_paths)) | |
print('[*] Saved results to %s' % filename) |
How can I use this?
How can I use this?
$ python3 waybackrobots.py
Traceback (most recent call last):
File "waybackrobots.py", line 34, in
snapshots = robots(host)
File "waybackrobots.py", line 11, in robots
results = r.json()
File "/usr/local/lib/python3.8/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/usr/local/lib/python3.8/json/init.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/local/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/local/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
any help for solving this error, please?
any help for solving this error, please?
install the required libraries
python3 waybackrobots.py https://www.abc.in/
Found 4 unique results
This may take some time...
Traceback (most recent call last):
File "/home/wizard/BugBountyTool/waybackrobots.py", line 45, in
with open(filename, 'w') as f:
FileNotFoundError: [Errno 2] No such file or directory: 'https://www.abc.in/-robots.txt'
(here the abc has actual domain)
Oh its working fine on my end just install the libraries which he import and save file as waybackrobots.py
Its giving me " Failed to establish a new connection: [Errno 111] Connection refused',)) " this error most of the time -_-