Last active
August 29, 2015 13:59
-
-
Save nilium/10440827 to your computer and use it in GitHub Desktop.
Ruby script to build a Dash docset for Gambit-C (http://gambitscheme.org)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'fileutils' | |
require 'sqlite3' | |
require 'cgi' | |
HELP_TEXT = <<-EOS | |
build-gsc-docset [options...] | |
Options: | |
-help Displays this text and exits. | |
-verbose Log most things. | |
-log-sql Especially verbose -- logs SQL statements. | |
(Implies -verbose.) | |
-nop Dry run. This still creates a DB in memory to ensure DB | |
ops are correct. | |
(Implies -verbose.) | |
-rebuild Forces a complete rebuild of the docset (in-place). This | |
includes re-downloading the Gambit-C manual HTML unless | |
-skip-dl is specified. | |
-skip-dl If specified, the manual download is skipped. This is | |
only really useful if you've made changes to the script | |
to categorize entry types but don't want to redownload | |
the manual's HTML file. Highly recommended you use this | |
when messing with things so you don't spam Gambit's over | |
and over. | |
EOS | |
class String | |
def min_indent | |
each_line.select { |l| l.chomp.length > 0 }.map { |l| l[/^ */].length }.min | |
end | |
def unindented | |
to_strip = 0 ... min_indent | |
if to_strip.size == 0 | |
dup | |
else | |
each_line.map { |l, r| l.chomp.length > 0 ? l.dup.tap { |k| k[to_strip] = '' } : l }.join '' | |
end | |
end | |
end | |
if ARGV.include?('-help') | |
puts HELP_TEXT.unindented | |
exit 0 | |
end | |
nop = ARGV.include?('-nop') | |
log_sql = ARGV.include?('-log-sql') | |
verbose = log_sql || nop || ARGV.include?('-verbose') | |
rebuild = ARGV.include?('-rebuild') | |
skip_dl = ARGV.include?('-skip-dl') | |
if verbose | |
def log(*args) ; $stderr.puts(*args) ; end | |
else | |
def log(*) ; end | |
end | |
INDENT = 4 | |
GSC_MANUAL_URL = 'http://www.iro.umontreal.ca/~gambit/doc/gambit-c.html' | |
GSC_MANUAL_FILE = 'Gambit-C.docset/Contents/Resources/Documents/index.html' | |
GSC_INDEX_DB = 'Gambit-C.docset/Contents/Resources/docSet.dsidx' | |
GSC_INFO_PLIST = 'Gambit-C.docset/Contents/Info.plist' | |
INFO_PLIST_SOURCE = <<-ENDPLIST.unindented | |
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> | |
<plist version="1.0"> | |
<dict> | |
<key>CFBundleIdentifier</key> | |
<string>gambit</string> | |
<key>CFBundleName</key> | |
<string>Gambit-C</string> | |
<key>DocSetPlatformFamily</key> | |
<string>gambit</string> | |
<key>isDashDocset</key> | |
<true/> | |
</dict> | |
</plist> | |
ENDPLIST | |
DOWNLOAD_GSC_MANUAL = %Q[curl '#{GSC_MANUAL_URL}' > '#{GSC_MANUAL_FILE}'] | |
DOCSET_DIRECTORIES = %w[ | |
Gambit-C.docset/Contents/Resources/Documents | |
] | |
SQL_CREATE_TABLES = <<-SQL | |
CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT); | |
CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path); | |
SQL | |
SQL_CREATE_ENTRY = <<-SQL | |
INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?); | |
SQL | |
def log_insert(*vars) | |
vars.map! { |k| "'#{SQLite3::Database.quote(k)}'" } | |
log " INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (#{vars.join ', '});" | |
end | |
INDEX_ENTRY_REGEX = %r[<td valign="top"><a href="#(?<link_id>index-[^"]+)"><code>(?<link_name>.+?)</code></a></td>]i | |
INDEX_SECTION_REGEX = %r[<tr><td align="left" valign="top"><a href="#(?<link_id>[^"]+)">\d+\.(\d+\.?)*\s*(?<link_name>.+?)</a></td><td> ]i | |
log "Creating docset directories." | |
FileUtils.mkdir_p(DOCSET_DIRECTORIES, noop: nop) | |
if !skip_dl && (rebuild || !File.exists?(GSC_MANUAL_FILE)) | |
log DOWNLOAD_GSC_MANUAL | |
if File.exists?(GSC_MANUAL_FILE) | |
log "Deleting old manual for rebuild." | |
FileUtils.rm(GSC_MANUAL_FILE) unless nop | |
end | |
log "Downloading Gambit-C manual." | |
unless nop | |
system DOWNLOAD_GSC_MANUAL | |
puts $? | |
end | |
else | |
log "'#{GSC_MANUAL_FILE}' already exists" | |
end | |
have_manual = File.exists?(GSC_MANUAL_FILE) | |
unless nop || have_manual | |
$stderr.puts "Unable to build docset index -- no manual file found" | |
exit 1 | |
end | |
entries = [] | |
log "Scanning '#{GSC_MANUAL_FILE}' for entries." | |
File.open(GSC_MANUAL_FILE, 'rb') do |io| | |
io.each_line do |line; link_name, link_id| | |
case line | |
when INDEX_ENTRY_REGEX | |
link_name = $~[:link_name] | |
link_id = $~[:link_id] | |
link_type = 'Function' | |
when INDEX_SECTION_REGEX | |
link_name = $~[:link_name] | |
link_id = $~[:link_id] | |
link_type = 'Guide' | |
else next | |
end | |
link_name.gsub!(%r[</?.+?/?>], '') | |
link_name = CGI.unescapeHTML(link_name) | |
if link_type == 'Function' | |
case link_name | |
when /^[,\^]/ then link_type = 'Command' | |
when /^-/ then link_type = 'Option' | |
end | |
end | |
entries << [link_name, link_type, "index.html##{link_id}"] | |
end | |
end if have_manual | |
if rebuild || !File.exists?(GSC_INFO_PLIST) | |
log "Building '#{GSC_INFO_PLIST}'." | |
File.open(GSC_INFO_PLIST, 'wb') { |io| io.write(INFO_PLIST_SOURCE) } unless nop | |
end | |
if File.exists?(GSC_INDEX_DB) | |
log "Removing old '#{GSC_INDEX_DB}'." | |
FileUtils.rm(GSC_INDEX_DB) unless nop | |
end | |
log "Create DB '#{GSC_INDEX_DB}'" | |
db = SQLite3::Database.new(nop ? ':memory:' : GSC_INDEX_DB) | |
log "Create tables." | |
log SQL_CREATE_TABLES if log_sql | |
db.execute SQL_CREATE_TABLES | |
insert_stmt = db.prepare(SQL_CREATE_ENTRY) | |
log "Insert table entries." | |
db.transaction do | |
entries.each do |entry| | |
log_insert(*entry) if log_sql | |
insert_stmt.execute(*entry) | |
end | |
end | |
log "Done." | |
log "Closing DB." | |
insert_stmt.close | |
db.close |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment