Skip to content

Instantly share code, notes, and snippets.

@Bloodb0ne
Last active August 22, 2016 06:25
Show Gist options
  • Save Bloodb0ne/a9623ea028681a920a05daac43ea57a1 to your computer and use it in GitHub Desktop.
Save Bloodb0ne/a9623ea028681a920a05daac43ea57a1 to your computer and use it in GitHub Desktop.
Fetching all js files and formatting them from an url.
import urllib3
import sys,os
import argparse,jsbeautifier
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
def writeScript(path,data):
#Nice side effect bruh
print('Writing to :=',path)
f = open(path,'w',encoding="utf-8")
f.write(data)
f.close()
parser = argparse.ArgumentParser(description='Fetches assets from a website')
parser.add_argument('url')
parser.add_argument('--dest',help='Where to save the files',default=os.getcwd())
parser.add_argument('--type',choices=['script','style',],default='script')
parser.add_argument('--prefix',help='Prefix of the saved files',default='')
args = parser.parse_args()
localScriptCount = 0
remoteScriptCount = 0
http = urllib3.PoolManager()
r = http.request('GET', args.url)
soup = BeautifulSoup(str(r.data), 'html.parser')
assets = soup.find_all('script')
counter = 0
burl = urllib3.util.parse_url(args.url)
baseurl = burl.host + '/'
print('Base URL := ',baseurl)
for asset in assets:
asset_source = asset.get('src')
if asset_source == None:
#Use contents of the script tag
scriptCode = jsbeautifier.beautify(asset.get_text())
counter+=1
path = "{0}\\{1}index_{2}.js".format(args.dest,args.prefix,str(counter))
writeScript(path,scriptCode)
localScriptCount+=1
else:
#What about damn relative urls
if not os.path.exists(args.dest+'\\remote'):
os.makedirs(args.dest+'\\remote')
remoteScriptCount+=1
url = asset.get('src')
if url.find('?') != -1:
url = url.split('?')[0]
purl = urllib3.util.parse_url(url)
if purl.scheme == None:
url = baseurl + url
print('Found remote script',url);
r = http.request('GET',url);
if r.status == 200:
damnUnicode = UnicodeDammit(r.data)
scriptCode = jsbeautifier.beautify(damnUnicode.unicode_markup)
filename = os.path.basename(url)
path = "{0}\\remote\\{1}{2}".format(args.dest,args.prefix,filename)
writeScript(path,scriptCode)
print('Local Scripts #= ',localScriptCount)
print('Remote Scripts #= ',remoteScriptCount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment