Skip to content

Instantly share code, notes, and snippets.

@nilium
Last active August 29, 2015 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nilium/10440827 to your computer and use it in GitHub Desktop.
Save nilium/10440827 to your computer and use it in GitHub Desktop.
Ruby script to build a Dash docset for Gambit-C (http://gambitscheme.org)
#!/usr/bin/env ruby
require 'fileutils'
require 'sqlite3'
require 'cgi'
HELP_TEXT = <<-EOS
build-gsc-docset [options...]
Options:
-help Displays this text and exits.
-verbose Log most things.
-log-sql Especially verbose -- logs SQL statements.
(Implies -verbose.)
-nop Dry run. This still creates a DB in memory to ensure DB
ops are correct.
(Implies -verbose.)
-rebuild Forces a complete rebuild of the docset (in-place). This
includes re-downloading the Gambit-C manual HTML unless
-skip-dl is specified.
-skip-dl If specified, the manual download is skipped. This is
only really useful if you've made changes to the script
to categorize entry types but don't want to redownload
the manual's HTML file. Highly recommended you use this
when messing with things so you don't spam Gambit's over
and over.
EOS
class String
def min_indent
each_line.select { |l| l.chomp.length > 0 }.map { |l| l[/^ */].length }.min
end
def unindented
to_strip = 0 ... min_indent
if to_strip.size == 0
dup
else
each_line.map { |l, r| l.chomp.length > 0 ? l.dup.tap { |k| k[to_strip] = '' } : l }.join ''
end
end
end
if ARGV.include?('-help')
puts HELP_TEXT.unindented
exit 0
end
nop = ARGV.include?('-nop')
log_sql = ARGV.include?('-log-sql')
verbose = log_sql || nop || ARGV.include?('-verbose')
rebuild = ARGV.include?('-rebuild')
skip_dl = ARGV.include?('-skip-dl')
if verbose
def log(*args) ; $stderr.puts(*args) ; end
else
def log(*) ; end
end
INDENT = 4
GSC_MANUAL_URL = 'http://www.iro.umontreal.ca/~gambit/doc/gambit-c.html'
GSC_MANUAL_FILE = 'Gambit-C.docset/Contents/Resources/Documents/index.html'
GSC_INDEX_DB = 'Gambit-C.docset/Contents/Resources/docSet.dsidx'
GSC_INFO_PLIST = 'Gambit-C.docset/Contents/Info.plist'
INFO_PLIST_SOURCE = <<-ENDPLIST.unindented
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleIdentifier</key>
<string>gambit</string>
<key>CFBundleName</key>
<string>Gambit-C</string>
<key>DocSetPlatformFamily</key>
<string>gambit</string>
<key>isDashDocset</key>
<true/>
</dict>
</plist>
ENDPLIST
DOWNLOAD_GSC_MANUAL = %Q[curl '#{GSC_MANUAL_URL}' > '#{GSC_MANUAL_FILE}']
DOCSET_DIRECTORIES = %w[
Gambit-C.docset/Contents/Resources/Documents
]
SQL_CREATE_TABLES = <<-SQL
CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);
CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);
SQL
SQL_CREATE_ENTRY = <<-SQL
INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (?, ?, ?);
SQL
def log_insert(*vars)
vars.map! { |k| "'#{SQLite3::Database.quote(k)}'" }
log " INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES (#{vars.join ', '});"
end
INDEX_ENTRY_REGEX = %r[<td valign="top"><a href="#(?<link_id>index-[^"]+)"><code>(?<link_name>.+?)</code></a></td>]i
INDEX_SECTION_REGEX = %r[<tr><td align="left" valign="top"><a href="#(?<link_id>[^"]+)">\d+\.(\d+\.?)*\s*(?<link_name>.+?)</a></td><td>&nbsp;&nbsp;]i
log "Creating docset directories."
FileUtils.mkdir_p(DOCSET_DIRECTORIES, noop: nop)
if !skip_dl && (rebuild || !File.exists?(GSC_MANUAL_FILE))
log DOWNLOAD_GSC_MANUAL
if File.exists?(GSC_MANUAL_FILE)
log "Deleting old manual for rebuild."
FileUtils.rm(GSC_MANUAL_FILE) unless nop
end
log "Downloading Gambit-C manual."
unless nop
system DOWNLOAD_GSC_MANUAL
puts $?
end
else
log "'#{GSC_MANUAL_FILE}' already exists"
end
have_manual = File.exists?(GSC_MANUAL_FILE)
unless nop || have_manual
$stderr.puts "Unable to build docset index -- no manual file found"
exit 1
end
entries = []
log "Scanning '#{GSC_MANUAL_FILE}' for entries."
File.open(GSC_MANUAL_FILE, 'rb') do |io|
io.each_line do |line; link_name, link_id|
case line
when INDEX_ENTRY_REGEX
link_name = $~[:link_name]
link_id = $~[:link_id]
link_type = 'Function'
when INDEX_SECTION_REGEX
link_name = $~[:link_name]
link_id = $~[:link_id]
link_type = 'Guide'
else next
end
link_name.gsub!(%r[</?.+?/?>], '')
link_name = CGI.unescapeHTML(link_name)
if link_type == 'Function'
case link_name
when /^[,\^]/ then link_type = 'Command'
when /^-/ then link_type = 'Option'
end
end
entries << [link_name, link_type, "index.html##{link_id}"]
end
end if have_manual
if rebuild || !File.exists?(GSC_INFO_PLIST)
log "Building '#{GSC_INFO_PLIST}'."
File.open(GSC_INFO_PLIST, 'wb') { |io| io.write(INFO_PLIST_SOURCE) } unless nop
end
if File.exists?(GSC_INDEX_DB)
log "Removing old '#{GSC_INDEX_DB}'."
FileUtils.rm(GSC_INDEX_DB) unless nop
end
log "Create DB '#{GSC_INDEX_DB}'"
db = SQLite3::Database.new(nop ? ':memory:' : GSC_INDEX_DB)
log "Create tables."
log SQL_CREATE_TABLES if log_sql
db.execute SQL_CREATE_TABLES
insert_stmt = db.prepare(SQL_CREATE_ENTRY)
log "Insert table entries."
db.transaction do
entries.each do |entry|
log_insert(*entry) if log_sql
insert_stmt.execute(*entry)
end
end
log "Done."
log "Closing DB."
insert_stmt.close
db.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment