Last active
July 5, 2020 15:08
-
-
Save xziyue/911236ab182bdbedc07424c46387b46c to your computer and use it in GitHub Desktop.
A Simple Ruby Program For Jekyll Indexing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'digest/sha1' | |
module Jekyll | |
module JekyllIndexTermFilter | |
@@show_warning = true | |
@@tag_re = /(%+)({)(.*?)(}%)/ | |
@@tag_alias_re = /(?<!\\)@/ | |
@@TagInfo = Struct.new(:type, :raw, :segments) | |
@@ConvInfo = Struct.new(:display_name, :alias_name, :key_hash) | |
def print_error_msg(msg) | |
if @@show_warning then | |
print("\e[33m#{msg}\e[0m\n") | |
end | |
end | |
def parse_tag(match) | |
ret = @@TagInfo.new('none', '', []) | |
ret.raw = match[0] | |
inner_content = match[3].strip | |
if inner_content[0] == '#' then | |
pos = 1 | |
while pos < inner_content.length | |
if inner_content[pos] == ' ' then | |
break | |
end | |
pos += 1 | |
end | |
cmd_name = inner_content[1..pos - 1] | |
cmd_param = inner_content[pos..inner_content.length].strip | |
if cmd_name.length == 0 then | |
print_error_msg("unable to parse command name for tag '#{ret.raw}'") | |
return ret | |
end | |
ret.type = 'cmd' | |
ret.segments.push(cmd_name) | |
ret.segments.push(cmd_param) | |
else | |
# see if there is an alias | |
alias_match = @@tag_alias_re.match(inner_content) | |
display_name = '' | |
alias_name = '' | |
if alias_match == nil then | |
display_name = inner_content | |
alias_name = inner_content | |
else | |
if alias_match.begin(0) == 0 or | |
alias_match.end(0) == inner_content.length then | |
print_error_msg("unable to parse display name/alias for tag '#{ret.raw}'") | |
return ret | |
end | |
display_name = inner_content[0..alias_match.begin(0) - 1].strip | |
alias_name = inner_content[alias_match.end(0)..inner_content.length].strip | |
end | |
display_name.gsub!('\\@', '@') | |
alias_name.gsub!('\\@', '@') | |
ret.type = 'index-item' | |
ret.segments.push(display_name) | |
ret.segments.push(alias_name) | |
end | |
ret | |
end | |
def scan_document(doc) | |
ret = [] | |
last_pos = 0 | |
while true | |
match = @@tag_re.match(doc, last_pos) | |
if match == nil then | |
ret.push(doc[last_pos..doc.length]) | |
break | |
else | |
before = '' | |
if match.begin(0) > 0 then | |
before = doc[last_pos..match.begin(0) - 1] | |
end | |
ret.push(before) | |
# check if it is an escaped tag | |
if match[1].length > 1 then | |
match_len = match[0].length | |
ret.push(match[0][1..match_len]) | |
else | |
ret.push(parse_tag(match)) | |
end | |
last_pos = match.end(0) | |
end | |
end | |
ret | |
end | |
# the key is not case sensitive | |
def str_to_key(s) | |
return s.downcase | |
end | |
def parse_document(doc) | |
scaned_doc = scan_document(doc) | |
name_converter = Hash.new | |
counter = Hash.new(0) | |
for item in scaned_doc | |
if item.is_a?(@@TagInfo) | |
if item.type == "index-item" then | |
display_name, alias_name = item.segments | |
key = str_to_key(alias_name) | |
counter[key] += 1 | |
converted_item = @@ConvInfo.new(display_name, alias_name, Digest::SHA1.hexdigest(key)) | |
name_converter[key] = converted_item | |
end | |
end | |
end | |
counter.each do |k, v| | |
if v > 1 then | |
print_error_msg("index item '#{k}' appeared #{v} times") | |
end | |
end | |
return scaned_doc, name_converter | |
end | |
def process_index_item(tag_info, name_converter) | |
key = str_to_key(tag_info.segments[1]) | |
raise "internal error: key '#{key}' does not exist" unless name_converter.has_key?(key) | |
conv_info = name_converter[key] | |
return '<span class="indexed-term-style"><a name="%s"></a>%s</span>' % [conv_info.key_hash, conv_info.display_name] | |
end | |
def process_command(tag_info, name_converter) | |
cmd_name = tag_info.segments[0] | |
cmd_param = tag_info.segments[1] | |
ret = '' | |
if cmd_name == 'get_link' then | |
key = str_to_key(cmd_param) | |
if name_converter.has_key?(key) then | |
ret = '#' + name_converter[key].key_hash | |
else | |
print_error_msg("#get_link: key '#{cmd_param}'=>'#{key}' does not exist") | |
end | |
else | |
print_error_msg("command '#{cmd_name}' does not exist") | |
end | |
ret | |
end | |
def indexedbody(doc) | |
@@show_warning = true | |
scaned_doc, name_converter = parse_document(doc) | |
ret = '' | |
for item in scaned_doc | |
if item.is_a?(@@TagInfo) | |
if item.type == 'none' then | |
# do nothing | |
elsif item.type == 'index-item' then | |
ret += process_index_item(item, name_converter) | |
elsif item.type == 'cmd' then | |
ret += process_command(item, name_converter) | |
else | |
print_error_msg("unknown @@TagInfo type #{item.type}") | |
end | |
else | |
# append string | |
ret += item | |
end | |
end | |
ret | |
end | |
def indexedterms(doc) | |
@@show_warning = false | |
scaned_doc, name_converter = parse_document(doc) | |
# extract results from name converter | |
kvs = [] | |
name_converter.each do |k, v| | |
kvs.push([k, v]) | |
end | |
kvs.sort!{|a, b| a[0] <=> b[0]} | |
strs = [] | |
for k, v in kvs | |
strs.push('<a class="index-a" href="#%s">%s</a>' % [v.key_hash, v.alias_name]) | |
end | |
return strs.join(" · \n") | |
end | |
end | |
end | |
Liquid::Template.register_filter(Jekyll::JekyllIndexTermFilter) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment