Skip to content

Instantly share code, notes, and snippets.

@brabect1
Last active June 29, 2024 11:48
Show Gist options
  • Save brabect1/f8260648e27bd742b6227f8af6c07c35 to your computer and use it in GitHub Desktop.
Save brabect1/f8260648e27bd742b6227f8af6c07c35 to your computer and use it in GitHub Desktop.
Customizing reStructuredText Markup #rst #docutils #python #markup #customization

Customizing reStructuredText (rst) for Domain-specific Syntax

  • document tree
    • created by a Parser component (i.e. from reStructuredText input)
    • manually constructed
  • docutils components
    • Reader: Intended to obtain input and pass it to a Parser.
    • Parser: Generates a (docutils) document tree from a Reader input. docutils contains only a reStructuredText parser but potentially others may exists (e.g. markdown, html, etc.).
    • Writer: Turns a document tree into an output (e.g. html, LaTeX).
  • other "pieces"

Custom Roles

Custom Directives

https://docutils.sourceforge.io/docs/ref/rst/directives.html

Directives have the following syntax:

+-------+-------------------------------+
| ".. " | directive type "::" directive |
+-------+ block                         |
        |                               |
        +-------------------------------+

Examples:

.. image:: /path/to/image.png

.. figure:: /path/to/image.png
   :width: 600px

.. note:: This is the note's 1st paragraph
   that spans multiple lines.

   This the note's 2nd paragraph.

   Notes are general blocks and can embed usual markup elements:

   * text elements with inline markup
   * list elements
   * other directives

https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#directives

howto: https://docutils.sourceforge.io/docs/howto/rst-directives.html

import docutils
from docutils.parsers.rst.directives.admonitions import BaseAdmonition
# this is merely to specifically identify elements derived
# from the `my_extension` directive
class myadmonition(docutils.nodes.Admonition, docutils.nodes.Element): pass
class MyExtensionAdmonition(BaseAdmonition):
node_class = myadmonition;
# This overrides what directive class will be used to process the `my_extension` directive
directives._directives['my_extension'] = MyExtensionAdmonition;
import docutils
# this is merely to specifically identify elements derived
# from the `my_extension` directive
class mydirective(docutils.nodes.Element): pass
class MyExtensionDirective(docutils.parsers.rst.Directive):
required_arguments = 1;
optional_arguments = 0;
final_argument_whitespace = True;
has_content = True;
def run(self):
self.assert_has_content()
text = '\n'.join(self.content)
n = mydirective(text, **self.options)
self.add_name(n)
# this will turn the directive argument into a title node
# (as we generally allow the title to contain inline markup,
# we use the inline parser to get (`textnodes`) list of inline
# elements/nodes)
title_text = self.arguments[0]
textnodes, messages = self.state.inline_text(title_text,
self.lineno)
# the following would end up calling TextElement(rawsource,text,*children) constructor
title = docutils.nodes.title(title_text, '', *textnodes)
title.source, title.line = (
self.state_machine.get_source_and_line(self.lineno))
# add a new sub-node and any system messages from the inline parsing
n += title
n += messages
# This will parse the directive content
self.state.nested_parse(self.content, self.content_offset,
n)
return [n]
# This overrides what directive class will be used to process the `my_extension` directive
directives._directives['my_extension'] = MyExtensionDirective;
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.parsers.rst
import docutils.parsers.rst.directives
# this is merely to specifically identify elements derived
# from the `my_extension` directive
class mydirective(docutils.nodes.Element): pass
class MyExtensionDirective(docutils.parsers.rst.Directive):
required_arguments = 1;
optional_arguments = 0;
final_argument_whitespace = True;
has_content = True;
option_spec = {'my_attr': docutils.parsers.rst.directives.unchanged,
}
def run(self):
self.assert_has_content()
text = '\n'.join(self.content)
n = mydirective(text, **self.options)
self.add_name(n)
# this will turn the directive argument into a title node
# (as we generally allow the title to contain inline markup,
# we use the inline parser to get (`textnodes`) list of inline
# elements/nodes)
title_text = self.arguments[0]
textnodes, messages = self.state.inline_text(title_text,
self.lineno)
# the following would end up calling TextElement(rawsource,text,*children) constructor
title = docutils.nodes.title(title_text, '', *textnodes)
title.source, title.line = (
self.state_machine.get_source_and_line(self.lineno))
# add a new sub-node and any system messages from the inline parsing
n += title
n += messages
# This will parse the directive content
self.state.nested_parse(self.content, self.content_offset,
n)
return [n]
# This overrides what directive class will be used to process the `my_extension` directive
docutils.parsers.rst.directives._directives['my_extension'] = MyExtensionDirective;
settings = docutils.frontend.OptionParser().get_default_values()
parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()
option_parser = docutils.frontend.OptionParser(
components=(parser,),
read_config_files=True,
description='')
settings = option_parser.parse_args()
document = docutils.utils.new_document('', settings)
text = '''
My title
========
1st paragraph
Subtitle
--------
2nd paragraph
.. my_extension:: This is my extension
:my_attr: 1 2 3
4 5 6
1st extension paragraph
#. 1st numbered item
#. 2nd numbered item
3rd paragraph
- a
- b
'''
parser.parse(text, document)
print(document.pformat())
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.nodes
import docutils.parsers.rst.roles
def date_role(role, rawtext, text, lineno, inliner, options={}, content=[]):
import datetime
node = docutils.nodes.inline(rawtext, datetime.datetime.today().strftime('%Y-%m-%d'), **options)
return [node], []
docutils.parsers.rst.roles.register_local_role('date', date_role)
settings = docutils.frontend.OptionParser().get_default_values()
parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()
option_parser = docutils.frontend.OptionParser(
components=(parser,),
read_config_files=True,
description='')
settings = option_parser.parse_args()
document = docutils.utils.new_document('', settings)
text = '''
My title
========
1st paragraph as of :date:`whatever` today
2nd paragraph
- a
- b
'''
parser.parse(text, document)
print(document.pformat())
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.frontend
import docutils.utils
import docutils.parsers
settings = docutils.frontend.OptionParser().get_default_values()
parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()
option_parser = docutils.frontend.OptionParser(
components=(parser,),
read_config_files=True,
description='')
settings = option_parser.parse_args()
document = docutils.utils.new_document('', settings)
text = '''
My title
========
1st paragraph
2nd paragraph
- a
- b
'''
parser.parse(text, document)
print(document.pformat())
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.frontend
import docutils.utils
import docutils.parsers
def is_note(node):
return node is not None and isinstance(node, docutils.nodes.note)
def my_condition(node):
return node is not None and isinstance(node, docutils.nodes.list_item) and node.astext() == 'a'
settings = docutils.frontend.OptionParser().get_default_values()
parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()
option_parser = docutils.frontend.OptionParser(
components=(parser,),
read_config_files=True,
description='')
settings = option_parser.parse_args()
document = docutils.utils.new_document('', settings)
text = '''
My title
========
.. note:: 1st paragraph
2nd paragraph
3rd paragraph
.. note:: 4th paragraph
- a
- b
'''
parser.parse(text, document)
print(document.pformat())
print(20 * '=')
# in-place replacement of nodes
for node in document.traverse(condition=is_note):
parent = node.parent;
childs = node.children;
parent.replace(node,childs)
# note: the above is equivalent to ``node.replace_self(node.children)``
# deletion of nodes
for node in document.traverse(condition=my_condition):
node.parent.remove(node)
print(document.pformat())
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.utils
document = docutils.utils.new_document('', None)
document += docutils.nodes.title(text='My title');
document += docutils.nodes.paragraph(text='1st paragraph');
document += docutils.nodes.paragraph(text='2nd paragraph');
node = docutils.nodes.bullet_list();
node['bullet'] = '-';
subnode = docutils.nodes.list_item('');
subnode += docutils.nodes.paragraph(text='a');
node += subnode;
subnode = docutils.nodes.list_item('');
subnode += docutils.nodes.paragraph(text='b');
node += subnode;
document += node;
print(document.pformat())
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.io
import docutils.core
source = '''
My title
========
1st paragraph
2nd paragraph
- a
- b
'''
# using string input and output
pub = docutils.core.Publisher(source_class=docutils.io.StringInput, destination_class=docutils.io.StringOutput)
reader_name='standalone'
parser_name='restructuredtext'
writer_name='html'
pub.set_components(reader_name, parser_name, writer_name)
settings = pub.get_settings(); # populates the publisher default settings (from individual components)
pub.set_source(source=source, source_path=''); # assign input source
output = pub.publish()
print(output)
#! /usr/bin/env python
# Copyright 2024 Tomas Brabec
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
import locale
locale.setlocale(locale.LC_ALL, '')
except:
pass
import docutils.frontend
import docutils.utils
import docutils.parsers
import docutils.nodes
class TitleCollectVisitor(docutils.nodes.SparseNodeVisitor):
def __init__(self, document):
self.document = document
self.titles = {};
def visit_title(self,node):
lvl = self.get_sec_level(node.parent);
d = self.titles;
if lvl >= 0:
while lvl > 0:
d = d[list(d)[-1]];
lvl = lvl - 1;
d[node.astext()] = {};
def get_sec_level(self, node):
"""
There are three cases to consider (all are due to `frontmatter`
transform happening on the parsed input). First::
Title
=====
Subtitle
--------
which yields::
<document>
<title>
<subtitle>
Second::
Title
=====
Subtitle 1
----------
Subtitle 2
----------
which yields::
<document>
<title>
<section>
<title>
<section>
<title>
And eventually third::
Title 1
=======
Subtitle 1
----------
Title 2
=======
which yields::
<document>
<section>
<title>
<section>
<title>
<section>
<title>
"""
if isinstance(node, docutils.nodes.document):
# This will be called for the 1st (`title` and `subtitle`) and
# 2nd case (`title` only)
return 0
elif isinstance(node, docutils.nodes.section):
# This will be called for the 2nd (`subtitle` only) and 3rd case.
# To distinguish the two, we need to see if the `document` node
# contains a `title` child.
i=0
while node==None or not isinstance(node, docutils.nodes.document):
node = node.parent
i += 1
if isinstance(node, docutils.nodes.document):
hastitle = 0
for c in node.children:
if isinstance(c, docutils.nodes.title):
hastitle = 1
break
if not hastitle:
i -= 1
return i
else:
return -1
def print_titles(self, d=None, indent=''):
if d is None:
d = self.titles;
for (k,v) in d.items():
print(f"{indent}- {k}");
self.print_titles(v, indent + ' ');
settings = docutils.frontend.OptionParser().get_default_values()
parser_class = docutils.parsers.get_parser_class('restructuredtext')
parser = parser_class()
option_parser = docutils.frontend.OptionParser(
components=(parser,),
read_config_files=True,
description='')
settings = option_parser.parse_args()
document = docutils.utils.new_document('', settings)
text = '''
My title
========
1st paragraph
1st Subtitle
------------
2nd paragraph
1st SubSubtitle
...............
2nd SubSubtitle
...............
2nd Subtitle
------------
- a
- b
'''
parser.parse(text, document)
visitor = TitleCollectVisitor(document)
document.walkabout(visitor)
visitor.print_titles()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment