Skip to content

Instantly share code, notes, and snippets.

@R0X4R
Last active September 8, 2022 05:35
Show Gist options
  • Save R0X4R/6e7204cd019145b7ac09b40512c3917b to your computer and use it in GitHub Desktop.
Save R0X4R/6e7204cd019145b7ac09b40512c3917b to your computer and use it in GitHub Desktop.
Fetch all the links of the stdin links
# Import modules that'll be needed to run this tool
import requests
from bs4 import BeautifulSoup
from re import search
from sys import stdin, exit, stdout
from time import sleep
# Hide insecure request warning error
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
# Declaring variable
host = stdin.readlines()
# Fetch all the urls from the page source-code
def pagelinks():
for link in host:
try:
sleep(1) # Pause for one second
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
response = requests.get(str(link.strip()), headers=header, timeout=20, verify=False)
soup = BeautifulSoup(response.content, 'html.parser')
for line in soup.find_all('a'):
if search(str('http'), str(line.get('href'))):
stdout.write(str(line.get('href')) + '\n')
except requests.Timeout:
pass # Handle timeout error
except requests.ConnectionError:
pass # Handle connection error
# Fetch all the urls from the wayback archive
def waybacklinks():
for link in host:
try:
sleep(1) # Pause for one second
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
location = 'http://web.archive.org/cdx/search/cdx?url=%s/*&output=txt&fl=original&collapse=urlkey' % link.strip()
request = requests.get(location, headers=header, verify=False, timeout=20)
response = request.text
stdout.write(str(response))
except requests.Timeout:
pass # Handle timeout error
except requests.ConnectionError:
pass # Handle connection error
try:
pagelinks() # Run pagelinks function
waybacklinks() # Run waybacklinks function
except KeyboardInterrupt:
print("\n[!] CTRL+C, Pressed. Exiting the program")
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment