Skip to content

Instantly share code, notes, and snippets.

@ZelphirKaltstahl
Last active October 20, 2016 23:15
Show Gist options
  • Save ZelphirKaltstahl/de2b725ef5adba0a150dd27a5f7b66dc to your computer and use it in GitHub Desktop.
Save ZelphirKaltstahl/de2b725ef5adba0a150dd27a5f7b66dc to your computer and use it in GitHub Desktop.
GTKSourceView Language File for Markdown
<?xml version="1.0" encoding="UTF-8"?>
<!--
Author: Jean-Philippe Fleury
Copyright (C) 2011 Jean-Philippe Fleury <contact@jpfleury.net>
Author: Zelphir Kaltstahl (improvements:
* mapping some styles to their own specific style in a gtksourceview style xml
* adding math block and inline supprt
* removing bias towards space and allowing tabs and changing some amounts to
arbitrary amounts
* adding support for sublists with and multilevel list markers like "1.1." or "2.1.3.")
* added support for roman numbers
* added support for letters as list symbols
* made footnotes brackets a thing
Copyright (c) 2016 Zelphir Kaltstahl <zelphirkaltstahl@gmail.com>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the
Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
-->
<!-- Note: this language definition file adds support for Markdown Extra syntax,
described here:
* (fr) <http://michelf.com/projets/php-markdown/extra/>
* (en) <http://michelf.com/projects/php-markdown/extra/> -->
<language id="markdown" _name="Markdown" version="2.0" _section="Markup">
<metadata>
<property name="mimetypes">text/x-markdown</property>
<property name="globs">*.markdown;*.md;*.mkd;*.mdown</property>
<property name="block-comment-start">&lt;!---</property>
<property name="block-comment-end">---&gt;</property>
</metadata>
<styles>
<!-- Markdown styles. -->
<style id="horizontal-rule" _name="Horizontal Rule" map-to="def:type"/>
<style id="code" _name="Code" map-to="def:identifier"/>
<style id="blockquote-marker" _name="Blockquote Marker" map-to="def:shebang"/>
<style id="label" _name="Label" map-to="def:preprocessor"/>
<style id="attribute-value" _name="Attribute Value" map-to="def:constant"/>
<style id="image-marker" _name="Image Marker" map-to="def:shebang"/>
<style id="backslash-escape" _name="Backslash Escape" map-to="def:special-char"/>
<style id="line-break" _name="Line Break" map-to="def:note"/>
<!-- Xiaolong added styles -->
<style id="header" _name="Header" map-to="markdown:heading"/>
<style id="list-marker" _name="List Marker" map-to="markdown:list-marker"/>
<style id="math-inline" _name="Math Inline" map-to="markdown:single-math-marker"/>
<style id="math-text" _name="Math Inline Text" map-to="markdown:math-text"/>
<style id="math-block" _name="Math Block" map-to="markdown:double-math-marker"/>
<style id="url" _name="URL" map-to="markdown:url"/>
<style id="link-text" _name="Link Text" map-to="markdown:link-text"/>
<style id="emphasis" _name="Emphasis" map-to="markdown:emphasis"/>
<style id="strong-emphasis" _name="Strong Emphasis" map-to="markdown:strong-emphasis"/>
<style id="html-block-comment" _name="HTML Block Comment" map-to="markdown:html-block-comment"/>
<style id="inline-latex" _name="Inline Latex" map-to="markdown:inline-latex"/>
<style id="footnote-identifier" _name="Footnote Identifier" map-to="markdown:footnote-identifier"/>
<style id="footnote-bracket" _name="Footnote Bracket" map-to="markdown:footnote-bracket"/>
<!-- Markdown Extra styles. -->
<style id="header-id-marker" _name="Header Id Marker" map-to="def:shebang"/>
<style id="definition-list-marker" _name="Definition List Marker" map-to="def:shebang"/>
<style id="footnote-marker" _name="Footnote Marker" map-to="markdown:footnote-marker"/>
<style id="abbreviation-marker" _name="Abbreviation Marker" map-to="def:shebang"/>
<style id="abbreviation" _name="Abbreviation" map-to="def:preprocessor"/>
<style id="table-separator" _name="Table Separator" map-to="def:statement"/>
</styles>
<definitions>
<!-- _______________________ Markdown contexts. _______________________ -->
<!-- Examples:
Header 1
========
Header 2
-
-->
<!-- Note: line break can't be used in regex, so only underline is matched. -->
<context id="setext-header" style-ref="header">
<match>^(-+|=+)[ \t]*$</match>
</context>
<!-- Examples:
- - -
** ** ** ** **
_____
-->
<context id="horizontal-rule" style-ref="horizontal-rule">
<match extended="true">
^[ \t]* # arbitrary spaces or tabs at the beginning of the line.
(
(-[ ]{0,2}){3,} | # 3 or more hyphens, with 2 spaces maximum between each hyphen.
(_[ ]{0,2}){3,} | # Idem, but with underscores.
(\*[ ]{0,2}){3,} # Idem, but with asterisks.
)
[ \t]*$ # Optional trailing spaces or tabs.
</match>
</context>
<!-- Note about following list and code block contexts: according to the
Markdown syntax, to write several paragraphs in a list item, we have
to indent each paragraph. Example:
- Item A (paragraph 1).
Item A (paragraph 2).
Item A (paragraph 3).
- Item B.
So there is a conflict in terms of syntax highlighting between an
indented paragraph inside a list item (4 spaces or 1 tab) and an
indented line of code outside a list (also 4 spaces or 1 tab). In this
language file, since a full context analysis can't be done (because
line break can't be used in regex), the choice was made ​​to highlight
code block only from 2 levels of indentation. -->
<!-- Example (unordered list):
* Item
+ Item
- Item
Example (ordered list):
1. Item
2. Item
3. Item
-->
<context id="list" style-ref="list-marker">
<match extended="true">
^[ \t]* # arbitrary spaces or tabs at the beginning of the line.
( # begin capture group 1
\*| # asterisk OR
\+| # plus OR
-| # hyphen for unordered list.
[0-9]+\.(?:[0-9]+\.)*| # number followed by period for ordered list.
\(?[0-9]+\)| # ordered list with parenthesis enclosing a number
[iIvVxXlLcCdDmM]+\.(?:[iIvVxXlLcCdDmM]+\.)*| # roman numbers
\(?[iIvVxXlLcCdDmM]+\)| # ordered list with parenthesis enclosing a number
[a-zA-Z]\.| # letters as list symbols
\(?[a-zA-Z]\) # letters encapsulated in parentheses
) # end capture group 1
[ \t]+ # Must be followed by at least 1 space or 1 tab.
</match>
</context>
<!-- Example:
\color
-->
<context id="inline-latex" class="no-spell-check" style-ref="inline-latex">
<match>\\[a-zA-Z0-9]+(\[[a-zA-Z0-9=, ]*\])?(\{[a-zA-Z0-9=, ]*\})?</match>
</context>
<!-- Example:
<em>HTML code</em> displayed <strong>literally</strong>.
-->
<context id="code-block" class="no-spell-check">
<match>^( {8,}|\t{2,})([^ \t]+.*)</match>
<include>
<context sub-pattern="2" style-ref="code"/>
</include>
</context>
<!-- Note about following code span contexts: within a paragraph, text
wrapped with backticks indicates a code span. Markdown allows to use
one or more backticks to wrap text, provided that the number is identical
on both sides, and the same number of consecutive backticks is not
present within the text. The current language file supports code span
highlighting with up to 2 backticks surrounding text. -->
<!-- Examples:
Here's a literal HTML tag: `<p>`.
`Here's a code span containing ``backticks``.`
-->
<context id="1-backtick-code-span" class="no-spell-check" style-ref="code">
<match>(?&lt;!`)`[^`]+(`{2,}[^`]+)*`(?!`)</match>
</context>
<!-- Examples:
Here's a literal HTML tag: ``<p>``.
``The grave accent (`) is used in Markdown to indicate a code span.``
``Here's another code span containing ```backticks```.``
-->
<context id="2-backticks-code-span" class="no-spell-check" style-ref="code">
<match>(?&lt;!`)``[^`]+((`|`{3,})[^`]+)*``(?!`)</match>
</context>
<!-- Examples:
$ math $
-->
<context id="math-span-inline" class="no-spell-check"><!-- CHANGED -->
<match extended="true">
(?&lt;![$]) # negative lookbehind: no other dollar signs in front (that would be math-block)
([$])
([^\n\r$]+)
([$])
(?![$]) # negative lookahead: no other dollar signs after (that would be math-block)
</match>
<include>
<context sub-pattern="1" style-ref="math-inline"/>
<context sub-pattern="2" style-ref="math-text"/>
<context sub-pattern="3" style-ref="math-inline"/>
</include>
</context>
<!-- Examples:
$$ math $$
-->
<context id="math-span-block" class="no-spell-check"><!-- CHANGED -->
<match extended="true">
([$]{2}) # 2 dollar signs
([^$]+) # something except dollar
([$]{2}) # 2 dollar signs
</match>
<include>
<context sub-pattern="1" style-ref="math-block"/>
<context sub-pattern="2" style-ref="math-text"/>
<context sub-pattern="3" style-ref="math-block"/>
</include>
</context>
<!-- Example:
> Quoted text.
> Quoted text with `code span`.
>> Blockquote nested.
-->
<!-- Note: blockquote can contain block-level and inline Markdown elements,
but the current language file only highlights inline ones (emphasis,
link, etc.). -->
<context id="blockquote" end-at-line-end="true">
<start>^( {0,3}&gt;(?=.)( {0,4}&gt;)*)</start>
<include>
<context sub-pattern="1" where="start" style-ref="blockquote-marker"/>
<context ref="1-backtick-code-span"/>
<context ref="2-backticks-code-span"/>
<context ref="automatic-link"/>
<context ref="inline-link"/>
<context ref="reference-link"/>
<context ref="inline-image"/>
<context ref="reference-image"/>
<context ref="underscores-emphasis"/>
<context ref="asterisks-emphasis"/>
<context ref="underscores-strong-emphasis"/>
<context ref="asterisks-strong-emphasis"/>
<context ref="backslash-escape"/>
<context ref="line-break"/>
</include>
</context>
<!-- Examples:
<user@example.com>
<http://www.example.com/>
-->
<!-- Note: regular expressions are based from function `_DoAutoLinks` from
Markdown.pl (see <http://daringfireball.net/projects/markdown/>). -->
<context id="automatic-link" class="no-spell-check">
<match case-sensitive="false" extended="true">
&lt;
(((mailto:)?[a-z0-9.-]+\@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+) | # E-mail.
((https?|ftp):[^'">\s]+)) # URL.
&gt;
</match>
<include>
<context sub-pattern="1" style-ref="url"/>
</include>
</context>
<!-- Examples:
Lorem *ipsum dolor* sit amet.
Here's an *emphasized text containing an asterisk (\*)*.
-->
<context id="asterisks-emphasis" style-ref="emphasis">
<match>(?&lt;!\*)\*[^\* \t].*?(?&lt;!\\|\*| |\t)\*(?!\*)</match>
</context>
<!-- Examples:
Lorem **ipsum dolor** sit amet.
Here's a **strongly emphasized text containing an asterisk (\*).**
-->
<context id="asterisks-strong-emphasis" style-ref="strong-emphasis">
<match>\*\*[^\* \t].*?(?&lt;!\\|\*| |\t)\*\*</match>
</context>
<context id="backslash-escape" style-ref="backslash-escape">
<match>\\[\\`*_{}\[\]()#+-.!]</match>
</context>
<!-- Note: a manual line break should be followed by a line containing text,
but since line break can't be used in regex, only trailing spaces or tabs
are matched. -->
<context id="line-break">
<match>(?&lt;=[^ \t])([ \t]{2,})$</match>
<include>
<context sub-pattern="1" style-ref="line-break"/>
</include>
</context>
<!-- _________ Markdown contexts redefined for Markdown Extra. _________ -->
<define-regex id="header-id-attribute" extended="true">
(?&lt;=[ \t]) # Must be preceded by a space or a tab.
\{ # Literal curly bracket.
(\#) # Literal hash.
([-_:a-zA-Z0-9]+) # Id.
\} # Literal curly bracket.
[ \t]* # Optional trailing spaces or tabs.
</define-regex>
<!-- Examples:
# Header 1 # {#id1}
## Header 2 {#id2}
###Header 3###
-->
<context id="atx-header">
<match>^(#+.+?)\%{header-id-attribute}?$</match>
<include>
<context sub-pattern="1" style-ref="header"/>
<context sub-pattern="2" style-ref="header-id-marker"/>
<context sub-pattern="3" class="no-spell-check" style-ref="attribute-value"/>
</include>
</context>
<!-- Note about following link and image contexts: link and image are not matched if link text, link label, alt text or image label begin with a circumflex (because it's the syntax used for footnotes references), even if in some tricky cases, they could begin with a circumflex and still be well-formed. However, it's very unlikely that a well-formed link or image will not be highlighted. -->
<!-- Examples matched:
[link text](http://www.example.com/)
[link text](<http://www.example.com/>)
[link text]( /folder/page.html "Title" )
Example NOT matched:
[^link text](http://www.example.com/)
-->
<context id="inline-link">
<match extended="true">
\[(?!\^)(.*?)\] # Link text.
\( # Literal opening parenthesis.
[ \t]* # Optional spaces or tabs after the opening parenthesis.
(&lt;(.*?)&gt; | # URL with brackets.
(.*?)) # URL without brackets.
([ \t]+(".*?"))? # Optional title.
[ \t]* # Optional spaces or tabs before the closing parenthesis.
\) # Literal closing parenthesis.
</match>
<include>
<context sub-pattern="1" style-ref="link-text"/>
<context sub-pattern="3" class="no-spell-check" style-ref="url"/>
<context sub-pattern="4" class="no-spell-check" style-ref="url"/>
<context sub-pattern="6" style-ref="attribute-value"/>
</include>
</context>
<!-- Examples matched:
[link text]
[link text][]
[link text][link label]
[link text] [link label]
Examples NOT matched:
[^link text][link label]
[link text][^link label]
-->
<!-- Note: some assertions are used to differentiate reference link from
link label. -->
<context id="reference-link">
<match extended="true">
(?&lt;!^ |^ |^ )
\[
(?!\^)
(.*?)
\]
(
[ \t]?
\[
(?!\^)
(.*?)
\]
)?
(?!:)
<!--(?&lt;![^$]*[$][^$]*) # negative lookbehind-->
<!-- my own regex
\[ # opening square brackets
(?!\^) # negative lookbehind: not ^
([^\[\]]+) # at least one character but not square brackets
\]
# closing square bracket
[\s]* # optional whitespace
(
\[
(?!\^)
([^\[\]]+)
\]
)?
(?!:) # negative lookahead
-->
<!--
^.*?
\[
([^\[\]]*?)
\]
[\s]*
(
\[
([^\[\]]*?)
\]
)
(?!:)
.*$
-->
</match>
<include>
<context sub-pattern="1" style-ref="link-text"/>
<context sub-pattern="3" class="no-spell-check" style-ref="label"/>
</include>
</context>
<!-- Examples matched:
[link label]: /folder/page.html
[link label]: <http://www.example.com/>
[link label]: http://www.example.com/ "Title"
Example NOT matched:
[^link label]: /folder/page.html
-->
<context id="link-definition">
<match extended="true">
^[ ]{0,3} # Maximum 3 spaces at the beginning of the line.
\[(?!\^)(.+?)\]: # Link label and colon.
[ \t]* # Optional spaces or tabs.
(&lt;([^ \t]+?)&gt; | # URL with brackets.
([^ \t]+?)) # URL without brackets.
([ \t]+(".*?"))? # Optional title.
[ \t]*$ # Optional trailing spaces or tabs.
</match>
<include>
<context sub-pattern="1" class="no-spell-check" style-ref="label"/>
<context sub-pattern="3" class="no-spell-check" style-ref="url"/>
<context sub-pattern="4" class="no-spell-check" style-ref="url"/>
<context sub-pattern="6" style-ref="attribute-value"/>
</include>
</context>
<!-- Examples matched:
![alt text](http://www.example.com/image.jpg)
![alt text] (/path/to/image.jpg "Title")
Example NOT matched:
![^alt text](http://www.example.com/image.jpg)
-->
<context id="inline-image">
<match extended="true">
(!) # Leading ! sign.
\[(?!\^)(.*?)\][ ]? # Alternate text for the image (and optional space).
\( # Literal parenthesis.
[ \t]* # Optional spaces or tabs after the opening parenthesis.
(([^ \t]*?) | # Image path or URL (no title).
([^ \t]+?)[ \t]+(".*?")) # Image path or URL and title.
[ \t]* # Optional spaces or tabs before the closing parenthesis.
\) # Literal parenthesis.
</match>
<include>
<context sub-pattern="1" style-ref="image-marker"/>
<context sub-pattern="2" style-ref="attribute-value"/>
<context sub-pattern="4" class="no-spell-check" style-ref="url"/>
<context sub-pattern="5" class="no-spell-check" style-ref="url"/>
<context sub-pattern="6" style-ref="attribute-value"/>
</include>
</context>
<!-- Examples matched:
![alt text][image label]
![alt text] [image label]
Examples NOT matched:
![^alt text][image label]
![alt text][^image label]
-->
<context id="reference-image">
<match>(!)\[(?!\^)(.*?)\] ?\[(?!\^)(.*?)\]</match>
<include>
<context sub-pattern="1" style-ref="image-marker"/>
<context sub-pattern="2" style-ref="attribute-value"/>
<context sub-pattern="3" class="no-spell-check" style-ref="label"/>
</include>
</context>
<!-- Examples generating an emphasis with Markdown Extra:
Lorem _ipsum dolor_ sit amet.
Here's an _emphasized text containing an underscore (\_)_.
Example NOT generating an emphasis with Markdown Extra:
The file name is "my_text_file.txt".
-->
<!-- Note: according to the Markdown Extra syntax, underscores in the middle
of a word no longer generate an emphasis. -->
<context id="underscores-emphasis" style-ref="emphasis">
<match>(?&lt;![a-zA-Z0-9_])_[^_ \t].*?(?&lt;!\\|_| |\t)_(?![a-zA-Z0-9_])</match>
</context>
<!-- Examples generating a strong emphasis with Markdown Extra:
Lorem __ipsum dolor__ sit amet.
Here's a __strongly emphasized text containing an underscore (\_)__.
Example NOT generating a strong emphasis with Markdown Extra:
The file name is "my__text__file.txt".
-->
<context id="underscores-strong-emphasis" style-ref="strong-emphasis">
<match>(?&lt;![a-zA-Z0-9])__[^_ \t].*?(?&lt;!\\|_| |\t)__(?![a-zA-Z0-9])</match>
</context>
<!-- ____________________ Markdown Extra contexts. ____________________ -->
<!-- Example:
Header 2 {#id}
========
-->
<!-- Note: line break can't be used in regex, so only the id attribute is matched. -->
<context id="setext-header-id-attribute">
<match>\%{header-id-attribute}$</match>
<include>
<context sub-pattern="1" style-ref="header-id-marker"/>
<context sub-pattern="2" class="no-spell-check" style-ref="attribute-value"/>
</include>
</context>
<!-- Example:
Word
: definition
-->
<!-- Note: line break can't be used in regex, so only the line containing
the colon is matched. -->
<context id="definition-list">
<match>^ {0,3}(:)[ \t]+</match>
<include>
<context sub-pattern="1" style-ref="definition-list-marker"/>
</include>
</context>
<!-- Example:
~~~
<em>HTML code</em> displayed <strong>literally</strong>.
~~~
-->
<context id="fenced-code-block" class="no-spell-check" style-ref="code">
<start>^(~{3,})[ \t]*$</start>
<end>^(\%{1@start})[ \t]*$</end>
</context>
<!-- Example:
A sentence containing a link to a footnote[^1].
[^1]: That's the footnote.
-->
<context id="footnote-reference">
<match>(\[)(\^)(.+?)(\])</match>
<include>
<context sub-pattern="1" style-ref="footnote-bracket"/>
<context sub-pattern="2" style-ref="footnote-marker"/>
<context sub-pattern="3" class="no-spell-check" style-ref="footnote-identifier"/>
<context sub-pattern="4" style-ref="footnote-bracket"/>
</include>
</context>
<!-- Example:
*[PCRE]: Perl Compatible Regular Expression
-->
<context id="abbreviation">
<match>^ {0,3}(\*)\[(.+?)\] ?:(.*)</match>
<include>
<context sub-pattern="1" style-ref="abbreviation-marker"/>
<context sub-pattern="2" class="no-spell-check" style-ref="abbreviation"/>
<context sub-pattern="3" style-ref="attribute-value"/>
</include>
</context>
<!-- Example:
| Header 1 | Header 2 |
| - | - |
| Cell | Cell |
| Cell | Cell |
-->
<!-- Note: line break can't be used in regex, so only the separator line is
matched. -->
<context id="table-separator" style-ref="table-separator">
<match extended="true">
^[ ]{0,3} # Maximum 3 spaces at the beginning of the line.
(
\|[ \t]*:?-+:? | # Separator with leading pipe.
:?-+:?[ \t]*\| # Separator without leading pipe.
)
[ \t-:|]*$ # Optional separator characters.
</match>
</context>
<context id="backslash-escape-extra" style-ref="backslash-escape">
<match>\\[:|]</match>
</context>
<!--
(literally this kind of comment!)
somehow still not working!
-->
<context id="html-block-comment" style-ref="html-block-comment">
<match extended="true">
^.* # at the beginning of the line an arbitrary amount of any characters
( # begin capture group 1
&lt;!-- # the begin marker of an html comment
[^&gt;]* # everything except &gt;
--&gt; # the end marker of an html comment
) # end of capturing group 1
.*$ # an arbitrary amount of any characters until line end
</match>
</context>
<context id="markdown-syntax">
<include>
<!-- Markdown contexts. -->
<context ref="atx-header"/>
<context ref="setext-header"/>
<context ref="horizontal-rule"/>
<context ref="list"/>
<context ref="code-block"/>
<context ref="1-backtick-code-span"/>
<context ref="2-backticks-code-span"/>
<context ref="blockquote"/>
<context ref="automatic-link"/>
<context ref="inline-link"/>
<context ref="reference-link"/>
<context ref="link-definition"/>
<context ref="inline-image"/>
<context ref="reference-image"/>
<context ref="underscores-emphasis"/>
<context ref="asterisks-emphasis"/>
<context ref="underscores-strong-emphasis"/>
<context ref="asterisks-strong-emphasis"/>
<context ref="backslash-escape"/>
<context ref="line-break"/>
<!-- Markdown Extra contexts. -->
<context ref="setext-header-id-attribute"/>
<context ref="definition-list"/>
<context ref="fenced-code-block"/>
<context ref="footnote-reference"/>
<context ref="abbreviation"/>
<context ref="table-separator"/>
<context ref="backslash-escape-extra"/>
<!-- Xiaolong added -->
<context ref="math-span-inline"/>
<context ref="inline-latex"/>
<!--<context ref="math-text"/>-->
<context ref="math-span-block"/>
<context ref="html-block-comment"/>
</include>
</context>
<replace id="html:embedded-lang-hook" ref="markdown-syntax"/>
<context id="markdown">
<include>
<context ref="markdown-syntax"/>
<!-- Note: even if it's highlighted, Markdown Extra syntax within HTML blocks
without `markdown` attribute set to 1 (e.g., `<div markdown="1">`) is
not processed. -->
<context ref="html:html"/>
</include>
</context>
</definitions>
</language>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment