Skip to content

Instantly share code, notes, and snippets.

@0xbepresent
Last active October 26, 2021 19:20
Show Gist options
  • Save 0xbepresent/0bc39cbf3c4fd47867c5c87bfbebb605 to your computer and use it in GitHub Desktop.
Save 0xbepresent/0bc39cbf3c4fd47867c5c87bfbebb605 to your computer and use it in GitHub Desktop.
import os
import random
import requests
import argparse
import textwrap
import time
def hookFactory(*factory_args, **factory_kwargs):
def saveData(response, *args, **kwargs):
"""
Save the requests data.
"""
name_file = factory_args[0]
format_headers = lambda d: '\n'.join("{}: {}".format(k, v) for k, v in d.items())
raw = textwrap.dedent("""
---------------- request ----------------
{req.method} {req.url}
{reqhdrs}
{req.body}
---------------- response ----------------
{res.status_code} {res.reason} {res.url}
{reshdrs}
{res.text}
""").format(
req=response.request,
res=response,
reqhdrs=format_headers(response.request.headers),
reshdrs=format_headers(response.headers),
)
print(raw[0:200])
hash = random.getrandbits(128)
if not os.path.exists("output"):
os.makedirs("output")
with open('output/{}.txt'.format(hash), 'w') as f:
f.write(raw)
return saveData
def get_urls(name_file):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"}
with open(name_file) as infile:
for line in infile:
url = line
if 'https://' not in url and 'http://' not in url:
url = 'https://{}'.format(url)
try:
print('Doing the request to URL: {}'.format(url))
response = requests.get(
url,
headers=headers,
timeout=5,
hooks={'response': hookFactory(name_file)})
except Exception as e:
print('Error {}, {}'.format(url, e))
time.sleep(5)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='HTML Body Extractors.')
parser.add_argument('--file', help='Urls File')
args = parser.parse_args()
get_urls(args.file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment