Skip to content

Instantly share code, notes, and snippets.

@tomstuart
Last active December 10, 2015 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomstuart/4462577 to your computer and use it in GitHub Desktop.
Save tomstuart/4462577 to your computer and use it in GitHub Desktop.
Syntax highlighting around inline markup
#!/usr/bin/env ruby
require 'nokogiri'
def highlight(document)
document.xpath('descendant::*[self::programlisting or self::screen][attribute::language]').each do |element|
tokens_and_types = tokenize(element.content, element[:language])
next_token, next_type = tokens_and_types.shift
element.xpath('descendant::text()').each do |text|
content = text.content
highlighted = Nokogiri::XML::DocumentFragment.new(document)
until content.empty?
prefix, next_token, content = remove_common_prefix(next_token, content)
highlighted.add_child(document.create_element('phrase', prefix, role: next_type))
next_token, next_type = tokens_and_types.shift if next_token.empty?
end
text.replace(highlighted)
end
end
end
def tokenize(string, language)
get_raw_tokens(string, language).map { |line| parse_raw_token(line) }
end
def get_raw_tokens(string, language)
IO.popen(['pygmentize', '-l', language, '-f', 'raw'], 'r+') do |io|
io.print(string)
io.close_write
io.readlines
end
end
def parse_raw_token(string)
type, token = string.chomp.split(/\t/)
[python_repr_to_ruby_string(token), type]
end
def python_repr_to_ruby_string(repr)
raise unless repr =~ /\Au'(.*)'\z/
# TODO also handle \newline, \a, \b, \f, \uxxxx etc
$1.gsub(/\\[\\'"nrt]/, {
'\\\\' => '\\',
'\\\'' => '\'',
'\\"' => '"',
'\\n' => "\n",
'\\r' => "\r",
'\\t' => "\t"
})
end
def remove_common_prefix(a, b)
prefix = common_prefix(a, b)
[prefix, a.slice(prefix.length..-1), b.slice(prefix.length..-1)]
end
def common_prefix(a, b)
a.length.downto(0).
map { |n| a.slice(0, n) }.
detect { |s| b.start_with?(s) }
end
document = Nokogiri::XML::Document.parse <<eod
<book>
<chapter>
<screen language="irb">>> <userinput>[:a, :b, :c].length</userinput>
=> 3</screen>
<programlisting language="ruby">class Foo
<emphasis role="bold">def foo(bar)</emphasis>
foo = b<emphasis>ar + 1</emphasis>
end
end</programlisting>
</chapter>
</book>
eod
highlight(document)
puts document.to_xml
# <?xml version="1.0"?>
# <book>
# <chapter>
# <screen language="irb"><phrase role="Token.Operator"></phrase><phrase role="Token.Generic.Prompt">&gt;&gt; </phrase><userinput><phrase role="Token.Operator">[</phrase><phrase role="Token.Literal.String.Symbol">:a</phrase><phrase role="Token.Punctuation">,</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Literal.String.Symbol">:b</phrase><phrase role="Token.Punctuation">,</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Literal.String.Symbol">:c</phrase><phrase role="Token.Operator">]</phrase><phrase role="Token.Operator">.</phrase><phrase role="Token.Name">length</phrase></userinput><phrase role="Token.Text">
# </phrase><phrase role="Token.Generic.Output">=&gt; 3</phrase></screen>
#
# <programlisting language="ruby"><phrase role="Token.Keyword">class</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Name.Class">Foo</phrase><phrase role="Token.Text">
# </phrase><emphasis role="bold"><phrase role="Token.Keyword">def</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Name.Function">foo</phrase><phrase role="Token.Punctuation">(</phrase><phrase role="Token.Name">bar</phrase><phrase role="Token.Punctuation">)</phrase></emphasis><phrase role="Token.Text">
# </phrase><phrase role="Token.Name">foo</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Operator">=</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Name">b</phrase><emphasis><phrase role="Token.Name">ar</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Operator">+</phrase><phrase role="Token.Text"> </phrase><phrase role="Token.Literal.Number.Integer">1</phrase></emphasis><phrase role="Token.Text">
# </phrase><phrase role="Token.Keyword">end</phrase><phrase role="Token.Text">
# </phrase><phrase role="Token.Keyword">end</phrase></programlisting>
# </chapter>
# </book>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment