Skip to content

Instantly share code, notes, and snippets.

@riccardomurri
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save riccardomurri/a79e1ce21ad117a39813 to your computer and use it in GitHub Desktop.
Save riccardomurri/a79e1ce21ad117a39813 to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
import re
from markdown2 import markdown
link_patterns = [
# issueNNN --> /help/issueNNN
(re.compile(r'\b(issue)\s*(\d+)\b'), r'/help/\g<1>\g<2>'),
# web URL
(re.compile(r'''(?xi)
(?<!['"]) (?# not preceded by a quotes, avoid recursive processing of href="..." )
\b
( (?# Capture group 1: entire matched URL)
(?:ftp|https?):// (?# URL schema)
(?: (?# optional userinfo part: )
[a-z0-9._~\-%!$&'()*+,;=:]+ (?# see RFC 3986, sec. 3.2.1 )
@ (?# literal '@' mark )
)?
(?: (?# host, either one of the following: )
[a-z0-9][a-z0-9.-]+\.(?:[a-z][a-z]+) (?# looks like an FQDN )
|
[0-9]+(?:\.[0-9]+){3} (?# IPv4 dotted-quad )
|
\[ [0-9a-f]+(?::[0-9a-f]+){7} \] (?# IPv6 full address literal )
|
\[ ([0-9a-f]+(?: :[0-9a-f]+ )* )? :: ([0-9a-f]+(?: :[0-9a-f]+)* ) \]
(?# looks like an IPv6 abbreviated address literal )
)
(?: :[0-9]+ )? (?# optional port number )
(?: (?# optional path: )
/ (?# literal slash )
[a-z0-9._~\-%!$&'()*+,;=:@]* (?# see RFC 3986, sec. 2.2, 2.3, and 3.3 )
)* (?# repeat zero or more times )
(?: (?# optional query part: )
[?] (?# literal question mark )
[a-z0-9._~\-%!$&'()*+,;=:@?/]* (?# see RFC 3986, sec. 2.2, 2.3, and 3.4 )
)? (?# at most once )
(?: (?# optional fragment part: )
[#] (?# literal hash mark )
[a-z0-9._~\-%!$&'()*+,;=:@?/]* (?# see RFC 3986, sec. 2.2, 2.3, and 3.5 )
)? (?# at most once )
)
'''), r'\g<1>'),
# "naked" domain, assume `http://` URL
(re.compile(r'''(?xi)
(?<!/) (?# not preceded by a /, avoid matching http://example.com )
(?<!\.) (?# not preceded by a ., avoid matching just `com` in example.com )
(?<!@) (?# not preceded by a @, avoid matching foo@_gmail.com_ )
\b
( (?# capture group 1: domain name)
[a-z0-9][a-z0-9.-]+\.(?:[a-z][a-z]+) (?# looks like a domain name )
/? (?# optional slash )
(?!@) (?# not followed by a @, avoid matching "foo.na" in "foo.na@example.com" )
)
\b
'''), r'http://\g<1>')
]
def _to_markdown(text):
html = markdown(
text,
extras=['footnotes', 'fenced-code-blocks', 'link-patterns', 'footnotes'],
link_patterns=link_patterns)
# Ensure the code is converted to utf-8
return html.encode('utf-8')
def test_link_patterns():
for src, expected in (
("issue1", '<p><a href="/help/issue1">issue1</a></p>'),
("issue 2", '<p><a href="/help/issue2">issue 2</a></p>'),
("nonlink1", '<p>nonlink1</p>'),
("http://www.google.com",
'<p><a href="http://www.google.com">http://www.google.com</a></p>'),
("http://www.google.com/",
'<p><a href="http://www.google.com/">http://www.google.com/</a></p>'),
("http://www.google.com:80/",
'<p><a href="http://www.google.com:80/">http://www.google.com:80/</a></p>'),
("http://www.google.com:80",
'<p><a href="http://www.google.com:80">http://www.google.com:80</a></p>'),
("http://www.google.com/search",
'<p><a href="http://www.google.com/search">http://www.google.com/search</a></p>'),
("http://www.google.com/search?q=regexp",
'<p><a href="http://www.google.com/search?q=regexp">http://www.google.com/search?q=regexp</a></p>'),
("[google](https://google.com)",
'<p><a href="https://google.com">google</a></p>'),
("""
[Google][1]
[1]: http://www.google.com
""",
'<p><a href="http://www.google.com">Google</a></p>'),
("This is text with http://www.example.org/foo/bar/something link",
'<p>This is text with <a href="http://www.example.org/foo/bar/something">http://www.example.org/foo/bar/something</a> link</p>'),
("Dotted-quad addresses are also supported, as in http://192.0.2.5/",
'<p>Dotted-quad addresses are also supported, as in <a href="http://192.0.2.5/">http://192.0.2.5/</a></p>'),
("IPv6 full address literals, as in http://[2001:DB8:0:0:0:0:0:1]",
'<p>IPv6 full address literals, as in <a href="http://[2001:DB8:0:0:0:0:0:1]">http://[2001:DB8:0:0:0:0:0:1]</a></p>'),
("IPv6 abbreviated address literals, as in http://[::1]",
'<p>IPv6 abbreviated address literals, as in <a href="http://[::1]">http://[::1]</a></p>'),
("IPv6 abbreviated address literals w/ port number, as in http://[::1]:8080",
'<p>IPv6 abbreviated address literals w/ port number, as in <a href="http://[::1]:8080">http://[::1]:8080</a></p>'),
("FTP URLs, as in ftp://ftp.example.org/",
'<p>FTP URLs, as in <a href="ftp://ftp.example.org/">ftp://ftp.example.org/</a></p>'),
("FTP URLs with explicit username, as in ftp://anonymous@ftp.example.org",
'<p>FTP URLs with explicit username, as in <a href="ftp://anonymous@ftp.example.org">ftp://anonymous@ftp.example.org</a></p>'),
("FTP URLs with explicit username, as in ftp://anonymous@ftp.example.org",
'<p>FTP URLs with explicit username, as in <a href="ftp://anonymous@ftp.example.org">ftp://anonymous@ftp.example.org</a></p>'),
("FTP URLs with username and password (INSECURE!), as in ftp://anonymous:guesswho@ftp.example.org",
'<p>FTP URLs with username and password (INSECURE!), as in <a href="ftp://anonymous:guesswho@ftp.example.org">ftp://anonymous:guesswho@ftp.example.org</a></p>'),
):
actual = _to_markdown(src).strip()
assert actual == expected, \
("Rendering of '%(src)s'"
" expected to be:\n\t '%(expected)s'\n"
" but gotten:\n\t '%(actual)s'\n instead."
% locals())
if __name__ == "__main__":
test_link_patterns()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment