build_parser
with AutoLinks
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1678 @profile
1679 def build_parser(self) -> markdown.Markdown:
1680 # Build the parser using selected default features from py-markdown.
1681 # The complete list of all available processors can be found in the
1682 # super().build_parser() function.
1683 #
1684 # Note: for any py-markdown updates, manually check if we want any
1685 # of the new features added upstream or not; they wouldn't get
1686 # included by default.
1687 3 145.0 48.3 0.2 self.preprocessors = self.build_preprocessors()
1688 3 4506.0 1502.0 5.4 self.parser = self.build_block_parser()
1689 3 79107.0 26369.0 94.0 self.inlinePatterns = self.build_inlinepatterns()
1690 3 145.0 48.3 0.2 self.treeprocessors = self.build_treeprocessors()
1691 3 57.0 19.0 0.1 self.postprocessors = self.build_postprocessors()
1692 3 171.0 57.0 0.2 self.handle_zephyr_mirror()
1693 3 0.0 0.0 0.0 return self
build_inlinepatterns
with AutoLinks
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1717 @profile
1718 def build_inlinepatterns(self) -> markdown.util.Registry:
1719 # Declare regexes for clean single line calls to .register().
1720 3 7.0 2.3 0.0 NOT_STRONG_RE = markdown.inlinepatterns.NOT_STRONG_RE
1721 # Custom strikethrough syntax: ~~foo~~
1722 3 3.0 1.0 0.0 DEL_RE = r'(?<!~)(\~\~)([^~\n]+?)(\~\~)(?!~)'
1723 # Custom bold syntax: **foo** but not __foo__
1724 # str inside ** must start and end with a word character
1725 # it need for things like "const char *x = (char *)y"
1726 3 4.0 1.3 0.0 EMPHASIS_RE = r'(\*)(?!\s+)([^\*^\n]+)(?<!\s)\*'
1727 3 5.0 1.7 0.0 ENTITY_RE = markdown.inlinepatterns.ENTITY_RE
1728 3 4.0 1.3 0.0 STRONG_EM_RE = r'(\*\*\*)(?!\s+)([^\*^\n]+)(?<!\s)\*\*\*'
1729 # Inline code block without whitespace stripping
1730 3 3.0 1.0 0.0 BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
1731
1732 # Add Inline Patterns
1733 3 12.0 4.0 0.0 reg = markdown.util.Registry()
1734 3 3038.0 1012.7 3.9 reg.register(BacktickPattern(BACKTICK_RE), 'backtick', 105)
1735 3 1718.0 572.7 2.2 reg.register(markdown.inlinepatterns.DoubleTagPattern(STRONG_EM_RE, 'strong,em'), 'strong_em', 100)
1736 3 1589.0 529.7 2.0 reg.register(UserMentionPattern(mention.find_mentions, self), 'usermention', 95)
1737 3 1252.0 417.3 1.6 reg.register(Tex(r'\B(?<!\$)\$\$(?P<body>[^\n_$](\\\$|[^$\n])*)\$\$(?!\$)\B'), 'tex', 90)
1738 3 2986.0 995.3 3.8 reg.register(StreamPattern(verbose_compile(STREAM_LINK_REGEX), self), 'stream', 85)
1739 3 710.0 236.7 0.9 reg.register(Avatar(AVATAR_REGEX, self), 'avatar', 80)
1740 3 957.0 319.0 1.2 reg.register(ModalLink(r'!modal_link\((?P<relative_url>[^)]*), (?P<text>[^)]*)\)'), 'modal_link', 75)
1741 # Note that !gravatar syntax should be deprecated long term.
1742 3 705.0 235.0 0.9 reg.register(Avatar(GRAVATAR_REGEX, self), 'gravatar', 70)
1743 3 800.0 266.7 1.0 reg.register(UserGroupMentionPattern(mention.user_group_mentions, self), 'usergroupmention', 65)
1744 3 4404.0 1468.0 5.6 reg.register(AtomicLinkPattern(get_link_re(), self), 'link', 60)
1745 3 49927.0 16642.3 63.4 reg.register(AutoLink(get_web_link_regex(), self), 'autolink', 55)
1746 # Reserve priority 45-54 for Realm Filters
1747 3 1159.0 386.3 1.5 reg = self.register_realm_filters(reg)
1748 3 989.0 329.7 1.3 reg.register(markdown.inlinepatterns.HtmlInlineProcessor(ENTITY_RE, self), 'entity', 40)
1749 3 929.0 309.7 1.2 reg.register(markdown.inlinepatterns.SimpleTagPattern(r'(\*\*)([^\n]+?)\2', 'strong'), 'strong', 35)
1750 3 1025.0 341.7 1.3 reg.register(markdown.inlinepatterns.SimpleTagPattern(EMPHASIS_RE, 'em'), 'emphasis', 30)
1751 3 865.0 288.3 1.1 reg.register(markdown.inlinepatterns.SimpleTagPattern(DEL_RE, 'del'), 'del', 25)
1752 3 591.0 197.0 0.7 reg.register(markdown.inlinepatterns.SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 20)
1753 3 645.0 215.0 0.8 reg.register(Emoji(EMOJI_REGEX, self), 'emoji', 15)
1754 3 1827.0 609.0 2.3 reg.register(EmoticonTranslation(emoticon_regex, self), 'translate_emoticons', 10)
1755 # We get priority 5 from 'nl2br' extension
1756 3 2642.0 880.7 3.4 reg.register(UnicodeEmoji(unicode_emoji_regex), 'unicodeemoji', 0)
1757 3 14.0 4.7 0.0 return reg
build_inlinepatterns
with AutoLinks x20
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1717 @profile
1718 def build_inlinepatterns(self) -> markdown.util.Registry:
1719 # Declare regexes for clean single line calls to .register().
1720 60 110.0 1.8 0.1 NOT_STRONG_RE = markdown.inlinepatterns.NOT_STRONG_RE
1721 # Custom strikethrough syntax: ~~foo~~
1722 60 89.0 1.5 0.1 DEL_RE = r'(?<!~)(\~\~)([^~\n]+?)(\~\~)(?!~)'
1723 # Custom bold syntax: **foo** but not __foo__
1724 # str inside ** must start and end with a word character
1725 # it need for things like "const char *x = (char *)y"
1726 60 92.0 1.5 0.1 EMPHASIS_RE = r'(\*)(?!\s+)([^\*^\n]+)(?<!\s)\*'
1727 60 103.0 1.7 0.1 ENTITY_RE = markdown.inlinepatterns.ENTITY_RE
1728 60 92.0 1.5 0.1 STRONG_EM_RE = r'(\*\*\*)(?!\s+)([^\*^\n]+)(?<!\s)\*\*\*'
1729 # Inline code block without whitespace stripping
1730 60 95.0 1.6 0.1 BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
1731
1732 # Add Inline Patterns
1733 60 201.0 3.4 0.1 reg = markdown.util.Registry()
1734 60 6887.0 114.8 4.0 reg.register(BacktickPattern(BACKTICK_RE), 'backtick', 105)
1735 60 4601.0 76.7 2.7 reg.register(markdown.inlinepatterns.DoubleTagPattern(STRONG_EM_RE, 'strong,em'), 'strong_em', 100)
1736 60 5718.0 95.3 3.3 reg.register(UserMentionPattern(mention.find_mentions, self), 'usermention', 95)
1737 60 4886.0 81.4 2.9 reg.register(Tex(r'\B(?<!\$)\$\$(?P<body>[^\n_$](\\\$|[^$\n])*)\$\$(?!\$)\B'), 'tex', 90)
1738 60 11651.0 194.2 6.8 reg.register(StreamPattern(verbose_compile(STREAM_LINK_REGEX), self), 'stream', 85)
1739 60 3366.0 56.1 2.0 reg.register(Avatar(AVATAR_REGEX, self), 'avatar', 80)
1740 60 4068.0 67.8 2.4 reg.register(ModalLink(r'!modal_link\((?P<relative_url>[^)]*), (?P<text>[^)]*)\)'), 'modal_link', 75)
1741 # Note that !gravatar syntax should be deprecated long term.
1742 60 3048.0 50.8 1.8 reg.register(Avatar(GRAVATAR_REGEX, self), 'gravatar', 70)
1743 60 3762.0 62.7 2.2 reg.register(UserGroupMentionPattern(mention.user_group_mentions, self), 'usergroupmention', 65)
1744 60 16811.0 280.2 9.8 reg.register(AtomicLinkPattern(get_link_re(), self), 'link', 60)
1745 60 86201.0 1436.7 50.3 reg.register(AutoLink(get_web_link_regex(), self), 'autolink', 55)
1746 # Reserve priority 45-54 for Realm Filters
1747 60 1998.0 33.3 1.2 reg = self.register_realm_filters(reg)
1748 60 1857.0 30.9 1.1 reg.register(markdown.inlinepatterns.HtmlInlineProcessor(ENTITY_RE, self), 'entity', 40)
1749 60 1691.0 28.2 1.0 reg.register(markdown.inlinepatterns.SimpleTagPattern(r'(\*\*)([^\n]+?)\2', 'strong'), 'strong', 35)
1750 60 1869.0 31.1 1.1 reg.register(markdown.inlinepatterns.SimpleTagPattern(EMPHASIS_RE, 'em'), 'emphasis', 30)
1751 60 1846.0 30.8 1.1 reg.register(markdown.inlinepatterns.SimpleTagPattern(DEL_RE, 'del'), 'del', 25)
1752 60 1442.0 24.0 0.8 reg.register(markdown.inlinepatterns.SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 20)
1753 60 1447.0 24.1 0.8 reg.register(Emoji(EMOJI_REGEX, self), 'emoji', 15)
1754 60 3130.0 52.2 1.8 reg.register(EmoticonTranslation(emoticon_regex, self), 'translate_emoticons', 10)
1755 # We get priority 5 from 'nl2br' extension
1756 60 4058.0 67.6 2.4 reg.register(UnicodeEmoji(unicode_emoji_regex), 'unicodeemoji', 0)
1757 60 95.0 1.6 0.1 return reg
build_inlinepatterns
with AutoLinks x1000
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1717 @profile
1718 def build_inlinepatterns(self) -> markdown.util.Registry:
1719 # Declare regexes for clean single line calls to .register().
1720 3000 5585.0 1.9 0.6 NOT_STRONG_RE = markdown.inlinepatterns.NOT_STRONG_RE
1721 # Custom strikethrough syntax: ~~foo~~
1722 3000 4336.0 1.4 0.5 DEL_RE = r'(?<!~)(\~\~)([^~\n]+?)(\~\~)(?!~)'
1723 # Custom bold syntax: **foo** but not __foo__
1724 # str inside ** must start and end with a word character
1725 # it need for things like "const char *x = (char *)y"
1726 3000 4221.0 1.4 0.5 EMPHASIS_RE = r'(\*)(?!\s+)([^\*^\n]+)(?<!\s)\*'
1727 3000 4556.0 1.5 0.5 ENTITY_RE = markdown.inlinepatterns.ENTITY_RE
1728 3000 4138.0 1.4 0.5 STRONG_EM_RE = r'(\*\*\*)(?!\s+)([^\*^\n]+)(?<!\s)\*\*\*'
1729 # Inline code block without whitespace stripping
1730 3000 4077.0 1.4 0.4 BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
1731
1732 # Add Inline Patterns
1733 3000 9489.0 3.2 1.0 reg = markdown.util.Registry()
1734 3000 51756.0 17.3 5.6 reg.register(BacktickPattern(BACKTICK_RE), 'backtick', 105)
1735 3000 40331.0 13.4 4.4 reg.register(markdown.inlinepatterns.DoubleTagPattern(STRONG_EM_RE, 'strong,em'), 'strong_em', 100)
1736 3000 35881.0 12.0 3.9 reg.register(UserMentionPattern(mention.find_mentions, self), 'usermention', 95)
1737 3000 33446.0 11.1 3.6 reg.register(Tex(r'\B(?<!\$)\$\$(?P<body>[^\n_$](\\\$|[^$\n])*)\$\$(?!\$)\B'), 'tex', 90)
1738 3000 63048.0 21.0 6.9 reg.register(StreamPattern(verbose_compile(STREAM_LINK_REGEX), self), 'stream', 85)
1739 3000 34833.0 11.6 3.8 reg.register(Avatar(AVATAR_REGEX, self), 'avatar', 80)
1740 3000 33452.0 11.2 3.6 reg.register(ModalLink(r'!modal_link\((?P<relative_url>[^)]*), (?P<text>[^)]*)\)'), 'modal_link', 75)
1741 # Note that !gravatar syntax should be deprecated long term.
1742 3000 31730.0 10.6 3.5 reg.register(Avatar(GRAVATAR_REGEX, self), 'gravatar', 70)
1743 3000 52681.0 17.6 5.7 reg.register(UserGroupMentionPattern(mention.user_group_mentions, self), 'usergroupmention', 65)
1744 3000 61471.0 20.5 6.7 reg.register(AtomicLinkPattern(get_link_re(), self), 'link', 60)
1745 3000 138959.0 46.3 15.1 reg.register(AutoLink(get_web_link_regex(), self), 'autolink', 55)
1746 # Reserve priority 45-54 for Realm Filters
1747 3000 29352.0 9.8 3.2 reg = self.register_realm_filters(reg)
1748 3000 34615.0 11.5 3.8 reg.register(markdown.inlinepatterns.HtmlInlineProcessor(ENTITY_RE, self), 'entity', 40)
1749 3000 35690.0 11.9 3.9 reg.register(markdown.inlinepatterns.SimpleTagPattern(r'(\*\*)([^\n]+?)\2', 'strong'), 'strong', 35)
1750 3000 33910.0 11.3 3.7 reg.register(markdown.inlinepatterns.SimpleTagPattern(EMPHASIS_RE, 'em'), 'emphasis', 30)
1751 3000 32818.0 10.9 3.6 reg.register(markdown.inlinepatterns.SimpleTagPattern(DEL_RE, 'del'), 'del', 25)
1752 3000 30871.0 10.3 3.4 reg.register(markdown.inlinepatterns.SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 20)
1753 3000 32698.0 10.9 3.6 reg.register(Emoji(EMOJI_REGEX, self), 'emoji', 15)
1754 3000 33658.0 11.2 3.7 reg.register(EmoticonTranslation(emoticon_regex, self), 'translate_emoticons', 10)
1755 # We get priority 5 from 'nl2br' extension
1756 3000 37423.0 12.5 4.1 reg.register(UnicodeEmoji(unicode_emoji_regex), 'unicodeemoji', 0)
1757 3000 4250.0 1.4 0.5 return reg
Line # Hits Time Per Hit % Time Line Contents
==============================================================
100 @profile
101 def get_web_link_regex() -> str:
102 # We create this one time, but not at startup. So the
103 # first message rendered in any process will have some
104 # extra costs.
105 global LINK_REGEX
106 3 4.0 1.3 0.0 if LINK_REGEX is None:
107 # NOTE: this is a very expensive step, it reads a file of tlds!
108 1 393.0 393.0 0.8 tlds = '|'.join(list_of_tlds())
109
110 # A link starts at a word boundary, and ends at space, punctuation, or end-of-input.
111 #
112 # We detect a url either by the `https?://` or by building around the TLD.
113
114 # In lieu of having a recursive regex (which python doesn't support) to match
115 # arbitrary numbers of nested matching parenthesis, we manually build a regexp that
116 # can match up to six
117 # The inner_paren_contents chunk matches the innermore non-parenthesis-holding text,
118 # and the paren_group matches text with, optionally, a matching set of parens
119 1 1.0 1.0 0.0 inner_paren_contents = r"[^\s()\"]*"
120 paren_group = r"""
121 [^\s()\"]*? # Containing characters that won't end the URL
122 (?: \( %s \) # and more characters in matched parens
123 [^\s()\"]*? # followed by more characters
124 )* # zero-or-more sets of paired parens
125 1 1.0 1.0 0.0 """
126 1 1.0 1.0 0.0 nested_paren_chunk = paren_group
127 7 4.0 0.6 0.0 for i in range(6):
128 6 15.0 2.5 0.0 nested_paren_chunk = nested_paren_chunk % (paren_group,)
129 1 3.0 3.0 0.0 nested_paren_chunk = nested_paren_chunk % (inner_paren_contents,)
130
131 1 13.0 13.0 0.0 file_links = r"| (?:file://(/[^/ ]*)+/?)" if settings.ENABLE_FILE_LINKS else r""
132 regex = r"""
133 (?<![^\s'"\(,:<]) # Start after whitespace or specified chars
134 # (Double-negative lookbehind to allow start-of-string)
135 (?P<url> # Main group
136 (?:(?: # Domain part
137 https?://[\w.:@-]+? # If it has a protocol, anything goes.
138 |(?: # Or, if not, be more strict to avoid false-positives
139 (?:[\w-]+\.)+ # One or more domain components, separated by dots
140 (?:%s) # TLDs (filled in via format from tlds-alpha-by-domain.txt)
141 )
142 )
143 (?:/ # A path, beginning with /
144 %s # zero-to-6 sets of paired parens
145 )?) # Path is optional
146 | (?:[\w.-]+\@[\w.-]+\.[\w]+) # Email is separate, since it can't have a path
147 %s # File path start with file:///, enable by setting ENABLE_FILE_LINKS=True
148 | (?:bitcoin:[13][a-km-zA-HJ-NP-Z1-9]{25,34}) # Bitcoin address pattern, see https://mokagio.github.io/tech-journal/2014/11/21/regex-bitcoin.html
149 )
150 (?= # URL must be followed by (not included in group)
151 [!:;\?\),\.\'\"\>]* # Optional punctuation characters
152 (?:\Z|\s) # followed by whitespace or end of string
153 )
154 1 5.0 5.0 0.0 """ % (tlds, nested_paren_chunk, file_links)
155 1 49367.0 49367.0 99.1 LINK_REGEX = verbose_compile(regex)
156 3 4.0 1.3 0.0 return LINK_REGEX
build_inlinepatterns
without AutoLink
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1712 @profile
1713 def build_inlinepatterns(self) -> markdown.util.Registry:
1714 # Declare regexes for clean single line calls to .register().
1715 3 14.0 4.7 0.0 NOT_STRONG_RE = markdown.inlinepatterns.NOT_STRONG_RE
1716 # Custom strikethrough syntax: ~~foo~~
1717 3 7.0 2.3 0.0 DEL_RE = r'(?<!~)(\~\~)([^~\n]+?)(\~\~)(?!~)'
1718 # Custom bold syntax: **foo** but not __foo__
1719 # str inside ** must start and end with a word character
1720 # it need for things like "const char *x = (char *)y"
1721 3 6.0 2.0 0.0 EMPHASIS_RE = r'(\*)(?!\s+)([^\*^\n]+)(?<!\s)\*'
1722 3 7.0 2.3 0.0 ENTITY_RE = markdown.inlinepatterns.ENTITY_RE
1723 3 6.0 2.0 0.0 STRONG_EM_RE = r'(\*\*\*)(?!\s+)([^\*^\n]+)(?<!\s)\*\*\*'
1724 # Inline code block without whitespace stripping
1725 3 10.0 3.3 0.0 BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\3(?!`))'
1726
1727 # Add Inline Patterns
1728 3 19.0 6.3 0.0 reg = markdown.util.Registry()
1729 3 6683.0 2227.7 14.6 reg.register(BacktickPattern(BACKTICK_RE), 'backtick', 105)
1730 3 4371.0 1457.0 9.6 reg.register(markdown.inlinepatterns.DoubleTagPattern(STRONG_EM_RE, 'strong,em'), 'strong_em', 100)
1731 3 4569.0 1523.0 10.0 reg.register(UserMentionPattern(mention.find_mentions, self), 'usermention', 95)
1732 3 3989.0 1329.7 8.7 reg.register(Tex(r'\B(?<!\$)\$\$(?P<body>[^\n_$](\\\$|[^$\n])*)\$\$(?!\$)\B'), 'tex', 90)
1733 3 5775.0 1925.0 12.6 reg.register(StreamPattern(verbose_compile(STREAM_LINK_REGEX), self), 'stream', 85)
1734 3 1161.0 387.0 2.5 reg.register(Avatar(AVATAR_REGEX, self), 'avatar', 80)
1735 3 1298.0 432.7 2.8 reg.register(ModalLink(r'!modal_link\((?P<relative_url>[^)]*), (?P<text>[^)]*)\)'), 'modal_link', 75)
1736 # Note that !gravatar syntax should be deprecated long term.
1737 3 940.0 313.3 2.1 reg.register(Avatar(GRAVATAR_REGEX, self), 'gravatar', 70)
1738 3 1083.0 361.0 2.4 reg.register(UserGroupMentionPattern(mention.user_group_mentions, self), 'usergroupmention', 65)
1739 3 5382.0 1794.0 11.8 reg.register(AtomicLinkPattern(get_link_re(), self), 'link', 60)
1740 # reg.register(AutoLink(get_web_link_regex(), self), 'autolink', 55)
1741 # Reserve priority 45-54 for Realm Filters
1742 3 1221.0 407.0 2.7 reg = self.register_realm_filters(reg)
1743 3 951.0 317.0 2.1 reg.register(markdown.inlinepatterns.HtmlInlineProcessor(ENTITY_RE, self), 'entity', 40)
1744 3 676.0 225.3 1.5 reg.register(markdown.inlinepatterns.SimpleTagPattern(r'(\*\*)([^\n]+?)\2', 'strong'), 'strong', 35)
1745 3 964.0 321.3 2.1 reg.register(markdown.inlinepatterns.SimpleTagPattern(EMPHASIS_RE, 'em'), 'emphasis', 30)
1746 3 939.0 313.0 2.1 reg.register(markdown.inlinepatterns.SimpleTagPattern(DEL_RE, 'del'), 'del', 25)
1747 3 640.0 213.3 1.4 reg.register(markdown.inlinepatterns.SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 20)
1748 3 656.0 218.7 1.4 reg.register(Emoji(EMOJI_REGEX, self), 'emoji', 15)
1749 3 1870.0 623.3 4.1 reg.register(EmoticonTranslation(emoticon_regex, self), 'translate_emoticons', 10)
1750 # We get priority 5 from 'nl2br' extension
1751 3 2432.0 810.7 5.3 reg.register(UnicodeEmoji(unicode_emoji_regex), 'unicodeemoji', 0)
1752 3 2.0 0.7 0.0 return reg