fukasawah/jacoco-report-inlinize.py

## jacoco-report-inlinize.py
'''
usage) python jacoco-report-inlinize.py 'target/site/jacoco/**/*.html'

'''
import os
import re
from os.path import dirname, abspath, relpath, normpath
import glob
import functools

from html.parser import HTMLParser

import argparse


parser = argparse.ArgumentParser()
parser.add_argument('targets_glob', metavar='TargetGlob',
                    help='target html files with glob. eg.) target/site/jacoco/**/*.html', type=str, nargs=1)
args = parser.parse_args()

TARGETS = args.targets_glob[0]

@functools.lru_cache()
def _read_file(filepath):
    with open(filepath, encoding='utf-8') as fp:
        data = fp.read()
        return data

def normpath_s(path):
    return normpath(path).replace('\\', '/')

class MyHTMLParser(HTMLParser):
    def __init__(self):
        super().__init__()
        self.html = ''
        self.expect_endtag = False

    def handle_starttag(self, tag, attrs):
        attrdict = dict(attrs)
        if tag == 'link' and attrdict.get('rel') == 'stylesheet':
            self.html += self._inline_css(attrdict.get('href'))
            self.expect_endtag = True
            return

        if tag == 'script' and not attrdict.get('src') is None:
            self.html += self._inline_script(attrdict.get('src'))
            self.expect_endtag = True
            return

        self.html += self._start_tag(tag, attrs)

    def handle_endtag(self, tag):
        if self.expect_endtag:
            self.expect_endtag = False
            return

        self.html += '</{}>'.format(tag)

    def handle_data(self, data):
        if self.expect_endtag:
            raise("unexpected data.")

        self.html += data

    def handle_decl(self, decl):
        self.html += '<!{}>'.format(decl)

    def _start_tag(self, tag, attrs):
        attr_str = ' '.join(['{}="{}"'.format(k, v)
                             for (k, v) in attrs])  # TODO: HTML ESCAPE

        return '<{} {}>'.format(tag, attr_str) if len(attr_str) else '<{}>'.format(tag)

    def _inline_css(self, file):
        # css in url
        css_relpath = normpath_s(relpath(dirname(file)))
        css_data = _read_file(file)
        css_data = re.sub(r'url\(([^\)]+)\)', lambda m: 'url({})'.format(css_relpath + '/' + m.group(1)), css_data)
        return '<style>{}</style>'.format(css_data)

    def _inline_script(self, file):
        return '<script>{}</script>'.format(_read_file(file))


if __name__ == '__main__':
    files = [abspath(file) for file in glob.glob(TARGETS, recursive=True)]

    for file in files:
        dirpath = dirname(file)

        print("--- {} ---".format(file))
        with open(file, encoding='utf-8') as fp:
            data = fp.read()

        os.chdir(dirpath)
        parser = MyHTMLParser()
        parser.feed(data)

        # override
        with open(file, mode='w', encoding='utf-8') as fp:
            fp.write(parser.html)
	'''
	usage) python jacoco-report-inlinize.py 'target/site/jacoco/*/.html'

	'''
	import os
	import re
	from os.path import dirname, abspath, relpath, normpath
	import glob
	import functools

	from html.parser import HTMLParser

	import argparse


	parser = argparse.ArgumentParser()
	parser.add_argument('targets_glob', metavar='TargetGlob',
	help='target html files with glob. eg.) target/site/jacoco/*/.html', type=str, nargs=1)
	args = parser.parse_args()

	TARGETS = args.targets_glob[0]

	@functools.lru_cache()
	def _read_file(filepath):
	with open(filepath, encoding='utf-8') as fp:
	data = fp.read()
	return data

	def normpath_s(path):
	return normpath(path).replace('\\', '/')

	class MyHTMLParser(HTMLParser):
	def __init__(self):
	super().__init__()
	self.html = ''
	self.expect_endtag = False

	def handle_starttag(self, tag, attrs):
	attrdict = dict(attrs)
	if tag == 'link' and attrdict.get('rel') == 'stylesheet':
	self.html += self._inline_css(attrdict.get('href'))
	self.expect_endtag = True
	return

	if tag == 'script' and not attrdict.get('src') is None:
	self.html += self._inline_script(attrdict.get('src'))
	self.expect_endtag = True
	return

	self.html += self._start_tag(tag, attrs)

	def handle_endtag(self, tag):
	if self.expect_endtag:
	self.expect_endtag = False
	return

	self.html += '</{}>'.format(tag)

	def handle_data(self, data):
	if self.expect_endtag:
	raise("unexpected data.")

	self.html += data

	def handle_decl(self, decl):
	self.html += '<!{}>'.format(decl)

	def _start_tag(self, tag, attrs):
	attr_str = ' '.join(['{}="{}"'.format(k, v)
	for (k, v) in attrs]) # TODO: HTML ESCAPE

	return '<{} {}>'.format(tag, attr_str) if len(attr_str) else '<{}>'.format(tag)

	def _inline_css(self, file):
	# css in url
	css_relpath = normpath_s(relpath(dirname(file)))
	css_data = _read_file(file)
	css_data = re.sub(r'url\(([^\)]+)\)', lambda m: 'url({})'.format(css_relpath + '/' + m.group(1)), css_data)
	return '<style>{}</style>'.format(css_data)

	def _inline_script(self, file):
	return '<script>{}</script>'.format(_read_file(file))


	if __name__ == '__main__':
	files = [abspath(file) for file in glob.glob(TARGETS, recursive=True)]

	for file in files:
	dirpath = dirname(file)

	print("--- {} ---".format(file))
	with open(file, encoding='utf-8') as fp:
	data = fp.read()

	os.chdir(dirpath)
	parser = MyHTMLParser()
	parser.feed(data)

	# override
	with open(file, mode='w', encoding='utf-8') as fp:
	fp.write(parser.html)