mimoo/README.md

## README.md

      
    Raw
  

              README.md
            
          
    I wrote a very primitive script to produce a flamegraph out of a number of asciidoc files.
See this blog post for some idea of the result.

WARNING: It's heavily taylored to my own setup, so not sure it'll work directly with yours, or you might have some customization to do.
(If there's enough interest I could do a more general script / Dockerfile.)
How to use?

get the flamegraph tool:
git clone --depth 1 git@github.com:brendangregg/FlameGraph.git

get the python code included in this file
then run the following command:
python get_sections.py flamegraph | ./FlameGraph/flamegraph.pl > flamegraph.svg

then open the flamegraph.svg in your browser.
note that you can also check what your sections are with this command:
python get_sections.py

What is the expected setup

This is the expected file structure:
your_folder/
├── manuscript/ # your book
│   ├── 1_some_chapter.adoc # chapters must be named <num>_<name>.adoc
│   └── 2_some_other_chapter.adoc
├── Flamegraph/ # the clone of the FlameGraph repository
└── get_sections.py # the file contained in this gist

If you have a different file structure, you might want to replace this snippet of code with a manual list of your files:
    filenames = glob.glob("./manuscript/*_*.adoc")
    filenames = sorted(filenames, key=lambda line: int(
        os.path.basename(line).split("_", 1)[0]))
as well as get the chapter number in a different way:
            # get chapter number
            chapt_num = int(name.split("_", 1)[0])
It'll expect .adoc files that start with the name of the chapter as depth-1 heading:
= Chapter Title

== first section

=== first subsection

if you do not have such a convention, you might want to change this part:
                match depth:
                    # main title: initialization
                    case 1:
with case x if len(prefix) == 0:

  
## get_sections.py
import glob
import re
import os
import sys

####


class Chapter:
    def __init__(self, num: int, data: Iterator[str]):
        self.num = str(num)
        self.data = data

    def parse(self):
        words = {}
        nums = []
        prefix = []
        # read section of the chapter
        first_line = True
        is_comment = False
        is_formula = False

        # read line by line
        while True:
            # read next line
            line = next(self.data, None)

            # stop when we reached the end
            if line is None:
                break

            # skip first line
            if first_line:
                first_line = False
                continue

            # skip comments
            if is_comment:
                if len(line) >= 4 and line[:4] == "////":
                    is_comment = False
                else:
                    continue
            else:
                if len(line) >= 4 and line[:4] == "////":
                    is_comment = True
                    continue

            # skip formulas
            if is_formula:
                if len(line) >= 4 and line[:4] == "****":
                    is_formula = False
                else:
                    continue
            else:
                if len(line) >= 4 and line[:4] == "****":
                    is_formula = True
                    continue

            # retrieve information from titles only (e.g. ==== TITLE)
            depth = count_equal(line)
            is_title = depth > 0

            if is_title:
                # get section name
                raw_name = line.rsplit("=", 1)[1].rstrip("\n")
                name = simplify_name(raw_name)

                # ignore empty titles
                if name == "":
                    continue

                # handle
                match depth:
                    # main title: initialization
                    case 1:
                        assert(len(prefix) == 0)
                        assert(len(words) == 0)
                        nums = [self.num]
                        prefix = [name]

                    # nesting
                    case x if x > len(prefix):
                        nums.append(1)
                        prefix.append(name)

                    # unesting
                    case x if x < len(prefix):
                        nums.pop()
                        nums[-1] += 1
                        prefix.pop()
                        prefix[-1] = name

                    # same prefix
                    case x:
                        nums[-1] += 1
                        prefix[-1] = name

                path = path_from(nums, prefix)
                assert(path not in words)
                words[path] = 0

                #
                continue

            # if we're still not initialized, skip
            if len(prefix) == 0:
                continue

            # a normal line: count the words
            path = path_from(nums, prefix)
            words[path] += count_words(line)

        #
        return words


def path_from(nums, prefix):
    nums = [str(num) for num in nums]

    # trick to make sure chapters are sorted
    if len(nums[0]) == 1:
        nums[0] = "0" + nums[0]

    path = []
    for depth, pref in enumerate(prefix):
        num = ".".join(nums[:depth+1])
        path.append(num + " " + pref)

    return ";".join(path)


def simplify_name(name: str):
    res = name
    res = res.strip(" ")
#    res = res.replace(" ", "_")
    return res

####


def count_equal(line):
    len_line = len(line)
    top = min(len_line, 5)
    res = 0
    for i in range(top):
        if line[i] == "=":
            res += 1
        else:
            break
    return res


def count_words(line):
    return len(line.split(" "))


def parse_chapter(chapt_num, chapter):
    section_depth = {0: chapt_num}

    # read section of the chapter
    words = 0
    first_line = True
    is_comment = False
    is_formula = False
    for line in chapter:

        # skip first line
        if first_line:
            first_line = False
            continue

        # skip comments
        if is_comment:
            if len(line) >= 4 and line[:4] == "////":
                is_comment = False
            else:
                continue
        else:
            if len(line) >= 4 and line[:4] == "////":
                is_comment = True
                continue

        # skip formulas
        if is_formula:
            if len(line) >= 4 and line[:4] == "****":
                is_formula = False
            else:
                continue
        else:
            if len(line) >= 4 and line[:4] == "****":
                is_formula = True
                continue

        # retrieve information from titles only (e.g. ==== TITLE)
        depth = count_equal(line)
        is_title = depth > 0

        if is_title:
            name = line.rsplit("=", 1)[1].rstrip("\n")
            # ignore empty titles
            if name == "":
                continue

            # recurse
            section_depth = print_section_number(section_depth, depth-1, name)
            continue

        # a normal line, count the words
        words += count_words(line)


def print_section_number(section_depth, depth, name):
    # add in dictionary
    if depth in section_depth:
        section_depth[depth] += 1
    else:
        section_depth[depth] = 1
    section_depth = clear_depth(section_depth, depth)

    # format
    fmt = ""
    for i in range(depth+1):
        #        print("access", i)
        fmt += str(section_depth[i]) + "."
    print("-" * depth + fmt + name)
    return section_depth


def clear_depth(section_depth, depth):
    while True:
        depth += 1
        if depth in section_depth:
            section_depth[depth] = 0
        else:
            break
    return section_depth


def main():
    # parse arguments
    flamegraph = False  # generate a flamegraph
    verbose = False

    if len(sys.argv) > 1:
        if sys.argv[1] == "flamegraph":
            flamegraph = True
        if sys.argv[1] == "verbose":
            verbose = True

    # read chapter files sorted by their number
    filenames = glob.glob("./manuscript/*_*.adoc")
    filenames = sorted(filenames, key=lambda line: int(
        os.path.basename(line).split("_", 1)[0]))

    # parse each file one by one
    for filename in filenames:
        with open(filename, 'r') as f:
            # get filename
            name = os.path.basename(filename)
            if verbose:
                print(name)

            # get chapter number
            chapt_num = int(name.split("_", 1)[0])

            # magic
            if flamegraph:
                chapter = Chapter(chapt_num, iter(f))
                res = chapter.parse()
                for key in res:
                    print(key, res[key])

            else:
                parse_chapter(chapt_num-1, f)


if __name__ == "__main__":
    main()
	import glob
	import re
	import os
	import sys

	####


	class Chapter:
	def __init__(self, num: int, data: Iterator[str]):
	self.num = str(num)
	self.data = data

	def parse(self):
	words = {}
	nums = []
	prefix = []
	# read section of the chapter
	first_line = True
	is_comment = False
	is_formula = False

	# read line by line
	while True:
	# read next line
	line = next(self.data, None)

	# stop when we reached the end
	if line is None:
	break

	# skip first line
	if first_line:
	first_line = False
	continue

	# skip comments
	if is_comment:
	if len(line) >= 4 and line[:4] == "////":
	is_comment = False
	else:
	continue
	else:
	if len(line) >= 4 and line[:4] == "////":
	is_comment = True
	continue

	# skip formulas
	if is_formula:
	if len(line) >= 4 and line[:4] == "****":
	is_formula = False
	else:
	continue
	else:
	if len(line) >= 4 and line[:4] == "****":
	is_formula = True
	continue

	# retrieve information from titles only (e.g. ==== TITLE)
	depth = count_equal(line)
	is_title = depth > 0

	if is_title:
	# get section name
	raw_name = line.rsplit("=", 1)[1].rstrip("\n")
	name = simplify_name(raw_name)

	# ignore empty titles
	if name == "":
	continue

	# handle
	match depth:
	# main title: initialization
	case 1:
	assert(len(prefix) == 0)
	assert(len(words) == 0)
	nums = [self.num]
	prefix = [name]

	# nesting
	case x if x > len(prefix):
	nums.append(1)
	prefix.append(name)

	# unesting
	case x if x < len(prefix):
	nums.pop()
	nums[-1] += 1
	prefix.pop()
	prefix[-1] = name

	# same prefix
	case x:
	nums[-1] += 1
	prefix[-1] = name

	path = path_from(nums, prefix)
	assert(path not in words)
	words[path] = 0

	#
	continue

	# if we're still not initialized, skip
	if len(prefix) == 0:
	continue

	# a normal line: count the words
	path = path_from(nums, prefix)
	words[path] += count_words(line)

	#
	return words


	def path_from(nums, prefix):
	nums = [str(num) for num in nums]

	# trick to make sure chapters are sorted
	if len(nums[0]) == 1:
	nums[0] = "0" + nums[0]

	path = []
	for depth, pref in enumerate(prefix):
	num = ".".join(nums[:depth+1])
	path.append(num + " " + pref)

	return ";".join(path)


	def simplify_name(name: str):
	res = name
	res = res.strip(" ")
	# res = res.replace(" ", "_")
	return res

	####


	def count_equal(line):
	len_line = len(line)
	top = min(len_line, 5)
	res = 0
	for i in range(top):
	if line[i] == "=":
	res += 1
	else:
	break
	return res


	def count_words(line):
	return len(line.split(" "))


	def parse_chapter(chapt_num, chapter):
	section_depth = {0: chapt_num}

	# read section of the chapter
	words = 0
	first_line = True
	is_comment = False
	is_formula = False
	for line in chapter:

	# skip first line
	if first_line:
	first_line = False
	continue

	# skip comments
	if is_comment:
	if len(line) >= 4 and line[:4] == "////":
	is_comment = False
	else:
	continue
	else:
	if len(line) >= 4 and line[:4] == "////":
	is_comment = True
	continue

	# skip formulas
	if is_formula:
	if len(line) >= 4 and line[:4] == "****":
	is_formula = False
	else:
	continue
	else:
	if len(line) >= 4 and line[:4] == "****":
	is_formula = True
	continue

	# retrieve information from titles only (e.g. ==== TITLE)
	depth = count_equal(line)
	is_title = depth > 0

	if is_title:
	name = line.rsplit("=", 1)[1].rstrip("\n")
	# ignore empty titles
	if name == "":
	continue

	# recurse
	section_depth = print_section_number(section_depth, depth-1, name)
	continue

	# a normal line, count the words
	words += count_words(line)


	def print_section_number(section_depth, depth, name):
	# add in dictionary
	if depth in section_depth:
	section_depth[depth] += 1
	else:
	section_depth[depth] = 1
	section_depth = clear_depth(section_depth, depth)

	# format
	fmt = ""
	for i in range(depth+1):
	# print("access", i)
	fmt += str(section_depth[i]) + "."
	print("-" * depth + fmt + name)
	return section_depth


	def clear_depth(section_depth, depth):
	while True:
	depth += 1
	if depth in section_depth:
	section_depth[depth] = 0
	else:
	break
	return section_depth


	def main():
	# parse arguments
	flamegraph = False # generate a flamegraph
	verbose = False

	if len(sys.argv) > 1:
	if sys.argv[1] == "flamegraph":
	flamegraph = True
	if sys.argv[1] == "verbose":
	verbose = True

	# read chapter files sorted by their number
	filenames = glob.glob("./manuscript/_.adoc")
	filenames = sorted(filenames, key=lambda line: int(
	os.path.basename(line).split("_", 1)[0]))

	# parse each file one by one
	for filename in filenames:
	with open(filename, 'r') as f:
	# get filename
	name = os.path.basename(filename)
	if verbose:
	print(name)

	# get chapter number
	chapt_num = int(name.split("_", 1)[0])

	# magic
	if flamegraph:
	chapter = Chapter(chapt_num, iter(f))
	res = chapter.parse()
	for key in res:
	print(key, res[key])

	else:
	parse_chapter(chapt_num-1, f)


	if __name__ == "__main__":
	main()