brabect1/customizing_rst_syntax.rst

## customizing_rst_syntax.rst

      
    Raw
  

              customizing_rst_syntax.rst
            
          
    Customizing reStructuredText (rst) for Domain-specific Syntax


document tree
created by a Parser component (i.e. from reStructuredText input)
manually constructed


docutils components
Reader: Intended to obtain input and pass it to a Parser.
Parser: Generates a (docutils) document tree from a Reader input. docutils contains only a reStructuredText parser but potentially others may exists (e.g. markdown, html, etc.).
Writer: Turns a document tree into an output (e.g. html, LaTeX).


other "pieces"
transforms: Called by a Parser for on-the-fly document tree manipulation.
visitors: Operate on an existing document tree (i.e. after parsing). Can collect various information and/or manipulate the document tree.
roles: ??? https://docutils.sourceforge.io/docs/ref/rst/roles.html (see example that creates a custom date role as alternative to a date:: directive https://docutils.sourceforge.io/docs/ref/rst/directives.html#date)
directives: ???


Custom Roles


Custom Directives

https://docutils.sourceforge.io/docs/ref/rst/directives.html
Directives have the following syntax:
+-------+-------------------------------+
| ".. " | directive type "::" directive |
+-------+ block                         |
        |                               |
        +-------------------------------+

Examples:
.. image:: /path/to/image.png

.. figure:: /path/to/image.png
   :width: 600px

.. note:: This is the note's 1st paragraph
   that spans multiple lines.

   This the note's 2nd paragraph.

   Notes are general blocks and can embed usual markup elements:

   * text elements with inline markup
   * list elements
   * other directives

https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#directives
howto: https://docutils.sourceforge.io/docs/howto/rst-directives.html


## impl_custom_admonition_processing.py
import docutils
from docutils.parsers.rst.directives.admonitions import BaseAdmonition

# this is merely to specifically identify elements derived
# from the `my_extension` directive
class myadmonition(docutils.nodes.Admonition, docutils.nodes.Element): pass

class MyExtensionAdmonition(BaseAdmonition):
    node_class = myadmonition;

# This overrides what directive class will be used to process the `my_extension` directive
directives._directives['my_extension'] = MyExtensionAdmonition;

## impl_custom_directive_processing.py
import docutils

# this is merely to specifically identify elements derived
# from the `my_extension` directive
class mydirective(docutils.nodes.Element): pass

class MyExtensionDirective(docutils.parsers.rst.Directive):
    required_arguments = 1;
    optional_arguments = 0;
    final_argument_whitespace = True;
    has_content = True;

    def run(self):
        self.assert_has_content()
        text = '\n'.join(self.content)
        n = mydirective(text, **self.options)
        self.add_name(n)

        # this will turn the directive argument into a title node
        # (as we generally allow the title to contain inline markup,
        # we use the inline parser to get (`textnodes`) list of inline
        # elements/nodes)
        title_text = self.arguments[0]
        textnodes, messages = self.state.inline_text(title_text,
                                                     self.lineno)
        # the following would end up calling TextElement(rawsource,text,*children) constructor
        title = docutils.nodes.title(title_text, '', *textnodes)
        title.source, title.line = (
                self.state_machine.get_source_and_line(self.lineno))

        # add a new sub-node and any system messages from the inline parsing
        n += title
        n += messages

        # This will parse the directive content
        self.state.nested_parse(self.content, self.content_offset,
                                n)
        return [n]

# This overrides what directive class will be used to process the `my_extension` directive
directives._directives['my_extension'] = MyExtensionDirective;

## xper_custom_directive.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.parsers.rst
import docutils.parsers.rst.directives

# this is merely to specifically identify elements derived
# from the `my_extension` directive
class mydirective(docutils.nodes.Element): pass

class MyExtensionDirective(docutils.parsers.rst.Directive):
    required_arguments = 1;
    optional_arguments = 0;
    final_argument_whitespace = True;
    has_content = True;
    option_spec = {'my_attr': docutils.parsers.rst.directives.unchanged,
                   }

    def run(self):
        self.assert_has_content()
        text = '\n'.join(self.content)
        n = mydirective(text, **self.options)
        self.add_name(n)

        # this will turn the directive argument into a title node
        # (as we generally allow the title to contain inline markup,
        # we use the inline parser to get (`textnodes`) list of inline
        # elements/nodes)
        title_text = self.arguments[0]
        textnodes, messages = self.state.inline_text(title_text,
                                                     self.lineno)
        # the following would end up calling TextElement(rawsource,text,*children) constructor
        title = docutils.nodes.title(title_text, '', *textnodes)
        title.source, title.line = (
                self.state_machine.get_source_and_line(self.lineno))

        # add a new sub-node and any system messages from the inline parsing
        n += title
        n += messages

        # This will parse the directive content
        self.state.nested_parse(self.content, self.content_offset,
                                n)
        return [n]

# This overrides what directive class will be used to process the `my_extension` directive
docutils.parsers.rst.directives._directives['my_extension'] = MyExtensionDirective;

settings = docutils.frontend.OptionParser().get_default_values()

parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()

option_parser = docutils.frontend.OptionParser(
    components=(parser,),
    read_config_files=True,
    description='')

settings = option_parser.parse_args()


document = docutils.utils.new_document('', settings)

text = '''
My title
========

1st paragraph

Subtitle
--------

2nd paragraph

.. my_extension:: This is my extension
   :my_attr: 1 2 3
     4 5 6

   1st extension paragraph

   #. 1st numbered item
   #. 2nd numbered item

3rd paragraph

- a
- b
'''

parser.parse(text, document)

print(document.pformat())

## xper_date_role.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.nodes
import docutils.parsers.rst.roles


def date_role(role, rawtext, text, lineno, inliner, options={}, content=[]):
    import datetime
    node = docutils.nodes.inline(rawtext, datetime.datetime.today().strftime('%Y-%m-%d'), **options)
    return [node], []

docutils.parsers.rst.roles.register_local_role('date', date_role)

settings = docutils.frontend.OptionParser().get_default_values()

parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()

option_parser = docutils.frontend.OptionParser(
    components=(parser,),
    read_config_files=True,
    description='')

settings = option_parser.parse_args()


document = docutils.utils.new_document('', settings)

text = '''
My title
========

1st paragraph as of :date:`whatever` today

2nd paragraph

- a
- b
'''

parser.parse(text, document)

print(document.pformat())

## xper_inline_text_construct.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.frontend
import docutils.utils
import docutils.parsers

settings = docutils.frontend.OptionParser().get_default_values()

parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()

option_parser = docutils.frontend.OptionParser(
    components=(parser,),
    read_config_files=True,
    description='')

settings = option_parser.parse_args()


document = docutils.utils.new_document('', settings)

text = '''
My title
========

1st paragraph

2nd paragraph

- a
- b
'''

parser.parse(text, document)

print(document.pformat())

## xper_manipulate_doctree.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.frontend
import docutils.utils
import docutils.parsers

def is_note(node):
    return node is not None and isinstance(node, docutils.nodes.note)

def my_condition(node):
    return node is not None and isinstance(node, docutils.nodes.list_item) and node.astext() == 'a'

settings = docutils.frontend.OptionParser().get_default_values()

parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()

option_parser = docutils.frontend.OptionParser(
    components=(parser,),
    read_config_files=True,
    description='')

settings = option_parser.parse_args()


document = docutils.utils.new_document('', settings)

text = '''
My title
========

.. note:: 1st paragraph

   2nd paragraph

3rd paragraph

.. note:: 4th paragraph

   - a
   - b
'''

parser.parse(text, document)

print(document.pformat())
print(20 * '=')

# in-place replacement of nodes
for node in document.traverse(condition=is_note):
    parent = node.parent;
    childs = node.children;
    parent.replace(node,childs)
    # note: the above is equivalent to ``node.replace_self(node.children)``

# deletion of nodes
for node in document.traverse(condition=my_condition):
    node.parent.remove(node)

print(document.pformat())

## xper_manual_doc_construct.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.utils

document = docutils.utils.new_document('', None)

document += docutils.nodes.title(text='My title');
document += docutils.nodes.paragraph(text='1st paragraph');
document += docutils.nodes.paragraph(text='2nd paragraph');

node = docutils.nodes.bullet_list();
node['bullet'] = '-';
subnode = docutils.nodes.list_item('');
subnode += docutils.nodes.paragraph(text='a');
node += subnode;
subnode = docutils.nodes.list_item('');
subnode += docutils.nodes.paragraph(text='b');
node += subnode;

document += node;

print(document.pformat())

## xper_publisher_rst2html.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.io
import docutils.core

source = '''
My title
========

1st paragraph

2nd paragraph

- a
- b
'''

# using string input and output
pub = docutils.core.Publisher(source_class=docutils.io.StringInput, destination_class=docutils.io.StringOutput)

reader_name='standalone'
parser_name='restructuredtext'
writer_name='html'

pub.set_components(reader_name, parser_name, writer_name)
settings = pub.get_settings(); # populates the publisher default settings (from individual components)
pub.set_source(source=source, source_path=''); # assign input source
output = pub.publish()

print(output)

## xper_title_visitor.py
#! /usr/bin/env python

# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

try:
    import locale
    locale.setlocale(locale.LC_ALL, '')
except:
    pass

import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.nodes

class TitleCollectVisitor(docutils.nodes.SparseNodeVisitor):

    def __init__(self, document):
        self.document = document
        self.titles = {};

    def visit_title(self,node):
        lvl = self.get_sec_level(node.parent);
        d = self.titles;
        if lvl >= 0:
            while lvl > 0:
                d = d[list(d)[-1]];
                lvl = lvl - 1;
            d[node.astext()] = {};

    def get_sec_level(self, node):
        """
          There are three cases to consider (all are due to `frontmatter`
          transform happening on the parsed input). First::

              Title
              =====

              Subtitle
              --------

          which yields::

              <document>
                  <title>
                  <subtitle>

          Second::

              Title
              =====

              Subtitle 1
              ----------

              Subtitle 2
              ----------

          which yields::

              <document>
                  <title>
                  <section>
                      <title>
                  <section>
                      <title>

          And eventually third::

              Title 1
              =======

              Subtitle 1
              ----------

              Title 2
              =======

          which yields::

              <document>
                  <section>
                      <title>
                      <section>
                          <title>
                  <section>
                      <title>
        """
        if isinstance(node, docutils.nodes.document):
            # This will be called for the 1st (`title` and `subtitle`) and
            # 2nd case (`title` only)
            return 0
        elif isinstance(node, docutils.nodes.section):
            # This will be called for the 2nd (`subtitle` only) and 3rd case.
            # To distinguish the two, we need to see if the `document` node
            # contains a `title` child.
            i=0
            while node==None or not isinstance(node, docutils.nodes.document):
                node = node.parent
                i += 1
            if isinstance(node, docutils.nodes.document):
                hastitle = 0
                for c in node.children:
                    if isinstance(c, docutils.nodes.title):
                        hastitle = 1
                        break
                if not hastitle:
                    i -= 1
            return i
        else:
            return -1

    def print_titles(self, d=None, indent=''):
        if d is None:
            d = self.titles;

        for (k,v) in d.items():
            print(f"{indent}- {k}");
            self.print_titles(v, indent + '  ');

settings = docutils.frontend.OptionParser().get_default_values()

parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()

option_parser = docutils.frontend.OptionParser(
    components=(parser,),
    read_config_files=True,
    description='')

settings = option_parser.parse_args()


document = docutils.utils.new_document('', settings)

text = '''
My title
========

1st paragraph

1st Subtitle
------------

2nd paragraph

1st SubSubtitle
...............

2nd SubSubtitle
...............

2nd Subtitle
------------

- a
- b
'''

parser.parse(text, document)

visitor = TitleCollectVisitor(document)
document.walkabout(visitor)

visitor.print_titles()
	import docutils
	from docutils.parsers.rst.directives.admonitions import BaseAdmonition

	# this is merely to specifically identify elements derived
	# from the `my_extension` directive
	class myadmonition(docutils.nodes.Admonition, docutils.nodes.Element): pass

	class MyExtensionAdmonition(BaseAdmonition):
	node_class = myadmonition;

	# This overrides what directive class will be used to process the `my_extension` directive
	directives._directives['my_extension'] = MyExtensionAdmonition;
	#! /usr/bin/env python

	# Copyright 2024 Tomas Brabec
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	try:
	import locale
	locale.setlocale(locale.LC_ALL, '')
	except:
	pass

	import docutils.frontend
	import docutils.utils
	import docutils.parsers
	import docutils.parsers.rst
	import docutils.parsers.rst.directives

	# this is merely to specifically identify elements derived
	# from the `my_extension` directive
	class mydirective(docutils.nodes.Element): pass

	class MyExtensionDirective(docutils.parsers.rst.Directive):
	required_arguments = 1;
	optional_arguments = 0;
	final_argument_whitespace = True;
	has_content = True;
	option_spec = {'my_attr': docutils.parsers.rst.directives.unchanged,
	}

	def run(self):
	self.assert_has_content()
	text = '\n'.join(self.content)
	n = mydirective(text, **self.options)
	self.add_name(n)

	# this will turn the directive argument into a title node
	# (as we generally allow the title to contain inline markup,
	# we use the inline parser to get (`textnodes`) list of inline
	# elements/nodes)
	title_text = self.arguments[0]
	textnodes, messages = self.state.inline_text(title_text,
	self.lineno)
	# the following would end up calling TextElement(rawsource,text,*children) constructor
	title = docutils.nodes.title(title_text, '', *textnodes)
	title.source, title.line = (
	self.state_machine.get_source_and_line(self.lineno))

	# add a new sub-node and any system messages from the inline parsing
	n += title
	n += messages

	# This will parse the directive content
	self.state.nested_parse(self.content, self.content_offset,
	n)
	return [n]

	# This overrides what directive class will be used to process the `my_extension` directive
	docutils.parsers.rst.directives._directives['my_extension'] = MyExtensionDirective;

	settings = docutils.frontend.OptionParser().get_default_values()

	parser_class = docutils.parsers.get_parser_class('restructuredtext')
	parser = parser_class()

	option_parser = docutils.frontend.OptionParser(
	components=(parser,),
	read_config_files=True,
	description='')

	settings = option_parser.parse_args()


	document = docutils.utils.new_document('', settings)

	text = '''
	My title
	========

	1st paragraph

	Subtitle
	--------

	2nd paragraph

	.. my_extension:: This is my extension
	:my_attr: 1 2 3
	4 5 6

	1st extension paragraph

	#. 1st numbered item
	#. 2nd numbered item

	3rd paragraph

	- a
	- b
	'''

	parser.parse(text, document)

	print(document.pformat())