nolanlawson/bundle_everything.py

## bundle_everything.py
#!/usr/bin/env python
#
# Input a js directory, a css directory, and an HTML file, and this script will bundle
# it all into three groups: head (css), head (js), and body (js).  Skips inline scripts.
# Deletes the original JS and CSS folders, optionally
#
# usage: bundle_everything.py my/html/file output/js/folder output/css/folder delete_originals
#

import sys,re,os.path,os

try:
  (html, jsFolder, cssFolder, deleteOriginals) = sys.argv[1:]
  try: deleteOriginals = bool(eval(deleteOriginals))
  except: deleteOriginals = False
except:
  sys.exit("usage: bundle_everything.py my/html/file output/js/folder output/css/folder delete_originals")


headPattern = re.compile("<head>(.*?)</head>", re.DOTALL)
bodyPattern = re.compile("<body>(.*?)</body>", re.DOTALL)
scriptPattern = re.compile('<script.*?</script>', re.DOTALL)
linkPattern = re.compile('<link.*?/>', re.DOTALL)

scriptFinderPattern = re.compile("""<script.*?</script>""", re.DOTALL)
linkFinderPattern = re.compile("""<link.*?href=["'](.*?)['"]""", re.DOTALL)

srcPattern = re.compile("""src=["'](.*?)['"]""", re.DOTALL)
inlinePattern = re.compile(""">(.*?)<""", re.DOTALL)

filesToRemove = []

htmlText = open(html,'r').read()

head = headPattern.findall(htmlText)[0]
body = bodyPattern.findall(htmlText)[0]

headJs  = scriptFinderPattern.findall(head)
bodyJs  = scriptFinderPattern.findall(body)
headCss = linkFinderPattern.findall(head)

def jsExtractor(input):
  # return the string if it's inline, read the file if it's src
  if (srcPattern.search(input)):
    filename = os.path.join(os.path.dirname(html), srcPattern.search(input).group(1))
    filesToRemove.append(filename)
    result = open(filename,'r').read()
  else:
    result = inlinePattern.search(input).group(1)
  return result

def cssExtractor(input):
  filename = os.path.join(os.path.dirname(html), input)
  filesToRemove.append(filename)
  return open(filename,'r').read()

headJsOut = '\n'.join(map(jsExtractor, headJs))
bodyJsOut = '\n'.join(map(jsExtractor, bodyJs))
headCssOut = '\n'.join(map(cssExtractor, headCss))

filenames = [os.path.join(jsFolder,'head.js'),os.path.join(jsFolder,'body.js'),os.path.join(cssFolder,'head.css')]

for (filename, text) in zip(filenames, [headJsOut, bodyJsOut, headCssOut]):
  fileout = open(os.path.join(os.path.dirname(html),filename),'w')
  fileout.write(text)
  fileout.close()

headMatch = headPattern.search(htmlText)
bodyMatch = bodyPattern.search(htmlText)
headText = headMatch.group(1)
bodyText = bodyMatch.group(1)

# replace all the text
headText = scriptPattern.sub('',headText)
headText = linkPattern.sub('',headText)
bodyText = scriptPattern.sub('',bodyText)

headText += """<link rel="stylesheet" href="%s"/><script src="%s"></script>""" % (filenames[2],filenames[0])
bodyText += """<script src="%s"></script>""" % filenames[1]
htmlText = htmlText[:headMatch.start(1)] + headText + htmlText[headMatch.end(1):bodyMatch.start(1)] + bodyText + htmlText[bodyMatch.end(1):]

fileout = open(html,'w')
fileout.write(htmlText)
fileout.close()

if bool(deleteOriginals):
  for filename in filesToRemove:
    os.remove(filename)
	#!/usr/bin/env python
	#
	# Input a js directory, a css directory, and an HTML file, and this script will bundle
	# it all into three groups: head (css), head (js), and body (js). Skips inline scripts.
	# Deletes the original JS and CSS folders, optionally
	#
	# usage: bundle_everything.py my/html/file output/js/folder output/css/folder delete_originals
	#

	import sys,re,os.path,os

	try:
	(html, jsFolder, cssFolder, deleteOriginals) = sys.argv[1:]
	try: deleteOriginals = bool(eval(deleteOriginals))
	except: deleteOriginals = False
	except:
	sys.exit("usage: bundle_everything.py my/html/file output/js/folder output/css/folder delete_originals")


	headPattern = re.compile("<head>(.*?)</head>", re.DOTALL)
	bodyPattern = re.compile("<body>(.*?)</body>", re.DOTALL)
	scriptPattern = re.compile('<script.*?</script>', re.DOTALL)
	linkPattern = re.compile('<link.*?/>', re.DOTALL)

	scriptFinderPattern = re.compile("""<script.*?</script>""", re.DOTALL)
	linkFinderPattern = re.compile("""<link.?href=["'](.?)['"]""", re.DOTALL)

	srcPattern = re.compile("""src=["'](.*?)['"]""", re.DOTALL)
	inlinePattern = re.compile(""">(.*?)<""", re.DOTALL)

	filesToRemove = []

	htmlText = open(html,'r').read()

	head = headPattern.findall(htmlText)[0]
	body = bodyPattern.findall(htmlText)[0]

	headJs = scriptFinderPattern.findall(head)
	bodyJs = scriptFinderPattern.findall(body)
	headCss = linkFinderPattern.findall(head)

	def jsExtractor(input):
	# return the string if it's inline, read the file if it's src
	if (srcPattern.search(input)):
	filename = os.path.join(os.path.dirname(html), srcPattern.search(input).group(1))
	filesToRemove.append(filename)
	result = open(filename,'r').read()
	else:
	result = inlinePattern.search(input).group(1)
	return result

	def cssExtractor(input):
	filename = os.path.join(os.path.dirname(html), input)
	filesToRemove.append(filename)
	return open(filename,'r').read()

	headJsOut = '\n'.join(map(jsExtractor, headJs))
	bodyJsOut = '\n'.join(map(jsExtractor, bodyJs))
	headCssOut = '\n'.join(map(cssExtractor, headCss))

	filenames = [os.path.join(jsFolder,'head.js'),os.path.join(jsFolder,'body.js'),os.path.join(cssFolder,'head.css')]

	for (filename, text) in zip(filenames, [headJsOut, bodyJsOut, headCssOut]):
	fileout = open(os.path.join(os.path.dirname(html),filename),'w')
	fileout.write(text)
	fileout.close()

	headMatch = headPattern.search(htmlText)
	bodyMatch = bodyPattern.search(htmlText)
	headText = headMatch.group(1)
	bodyText = bodyMatch.group(1)

	# replace all the text
	headText = scriptPattern.sub('',headText)
	headText = linkPattern.sub('',headText)
	bodyText = scriptPattern.sub('',bodyText)

	headText += """<link rel="stylesheet" href="%s"/><script src="%s"></script>""" % (filenames[2],filenames[0])
	bodyText += """<script src="%s"></script>""" % filenames[1]
	htmlText = htmlText[:headMatch.start(1)] + headText + htmlText[headMatch.end(1):bodyMatch.start(1)] + bodyText + htmlText[bodyMatch.end(1):]

	fileout = open(html,'w')
	fileout.write(htmlText)
	fileout.close()

	if bool(deleteOriginals):
	for filename in filesToRemove:
	os.remove(filename)