Skip to content

Instantly share code, notes, and snippets.

@xziyue
Last active July 5, 2020 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xziyue/911236ab182bdbedc07424c46387b46c to your computer and use it in GitHub Desktop.
Save xziyue/911236ab182bdbedc07424c46387b46c to your computer and use it in GitHub Desktop.
A Simple Ruby Program For Jekyll Indexing
require 'digest/sha1'
module Jekyll
module JekyllIndexTermFilter
@@show_warning = true
@@tag_re = /(%+)({)(.*?)(}%)/
@@tag_alias_re = /(?<!\\)@/
@@TagInfo = Struct.new(:type, :raw, :segments)
@@ConvInfo = Struct.new(:display_name, :alias_name, :key_hash)
def print_error_msg(msg)
if @@show_warning then
print("\e[33m#{msg}\e[0m\n")
end
end
def parse_tag(match)
ret = @@TagInfo.new('none', '', [])
ret.raw = match[0]
inner_content = match[3].strip
if inner_content[0] == '#' then
pos = 1
while pos < inner_content.length
if inner_content[pos] == ' ' then
break
end
pos += 1
end
cmd_name = inner_content[1..pos - 1]
cmd_param = inner_content[pos..inner_content.length].strip
if cmd_name.length == 0 then
print_error_msg("unable to parse command name for tag '#{ret.raw}'")
return ret
end
ret.type = 'cmd'
ret.segments.push(cmd_name)
ret.segments.push(cmd_param)
else
# see if there is an alias
alias_match = @@tag_alias_re.match(inner_content)
display_name = ''
alias_name = ''
if alias_match == nil then
display_name = inner_content
alias_name = inner_content
else
if alias_match.begin(0) == 0 or
alias_match.end(0) == inner_content.length then
print_error_msg("unable to parse display name/alias for tag '#{ret.raw}'")
return ret
end
display_name = inner_content[0..alias_match.begin(0) - 1].strip
alias_name = inner_content[alias_match.end(0)..inner_content.length].strip
end
display_name.gsub!('\\@', '@')
alias_name.gsub!('\\@', '@')
ret.type = 'index-item'
ret.segments.push(display_name)
ret.segments.push(alias_name)
end
ret
end
def scan_document(doc)
ret = []
last_pos = 0
while true
match = @@tag_re.match(doc, last_pos)
if match == nil then
ret.push(doc[last_pos..doc.length])
break
else
before = ''
if match.begin(0) > 0 then
before = doc[last_pos..match.begin(0) - 1]
end
ret.push(before)
# check if it is an escaped tag
if match[1].length > 1 then
match_len = match[0].length
ret.push(match[0][1..match_len])
else
ret.push(parse_tag(match))
end
last_pos = match.end(0)
end
end
ret
end
# the key is not case sensitive
def str_to_key(s)
return s.downcase
end
def parse_document(doc)
scaned_doc = scan_document(doc)
name_converter = Hash.new
counter = Hash.new(0)
for item in scaned_doc
if item.is_a?(@@TagInfo)
if item.type == "index-item" then
display_name, alias_name = item.segments
key = str_to_key(alias_name)
counter[key] += 1
converted_item = @@ConvInfo.new(display_name, alias_name, Digest::SHA1.hexdigest(key))
name_converter[key] = converted_item
end
end
end
counter.each do |k, v|
if v > 1 then
print_error_msg("index item '#{k}' appeared #{v} times")
end
end
return scaned_doc, name_converter
end
def process_index_item(tag_info, name_converter)
key = str_to_key(tag_info.segments[1])
raise "internal error: key '#{key}' does not exist" unless name_converter.has_key?(key)
conv_info = name_converter[key]
return '<span class="indexed-term-style"><a name="%s"></a>%s</span>' % [conv_info.key_hash, conv_info.display_name]
end
def process_command(tag_info, name_converter)
cmd_name = tag_info.segments[0]
cmd_param = tag_info.segments[1]
ret = ''
if cmd_name == 'get_link' then
key = str_to_key(cmd_param)
if name_converter.has_key?(key) then
ret = '#' + name_converter[key].key_hash
else
print_error_msg("#get_link: key '#{cmd_param}'=>'#{key}' does not exist")
end
else
print_error_msg("command '#{cmd_name}' does not exist")
end
ret
end
def indexedbody(doc)
@@show_warning = true
scaned_doc, name_converter = parse_document(doc)
ret = ''
for item in scaned_doc
if item.is_a?(@@TagInfo)
if item.type == 'none' then
# do nothing
elsif item.type == 'index-item' then
ret += process_index_item(item, name_converter)
elsif item.type == 'cmd' then
ret += process_command(item, name_converter)
else
print_error_msg("unknown @@TagInfo type #{item.type}")
end
else
# append string
ret += item
end
end
ret
end
def indexedterms(doc)
@@show_warning = false
scaned_doc, name_converter = parse_document(doc)
# extract results from name converter
kvs = []
name_converter.each do |k, v|
kvs.push([k, v])
end
kvs.sort!{|a, b| a[0] <=> b[0]}
strs = []
for k, v in kvs
strs.push('<a class="index-a" href="#%s">%s</a>' % [v.key_hash, v.alias_name])
end
return strs.join(" &#xB7; \n")
end
end
end
Liquid::Template.register_filter(Jekyll::JekyllIndexTermFilter)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment