Skip to content

Instantly share code, notes, and snippets.

@sirupsen
Created February 6, 2020 12:44
Show Gist options
  • Star 20 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sirupsen/03fb05cc7efa6a63c8d5c9737fb0686f to your computer and use it in GitHub Desktop.
Save sirupsen/03fb05cc7efa6a63c8d5c9737fb0686f to your computer and use it in GitHub Desktop.
require "sqlite3"
require 'set'
require 'byebug'
# Will be rebuilt at any time. Nice and incremental.
db = SQLite3::Database.new "index.db"
# Keep prefix indexes for "mos*" searches.
#
# TODO: It doesn't seem like SQLITE FTS5 supports synonyms well. That's ok, but
# we're going to want that. We can download this database from Princeton, write
# a parser for it (or use grind(1)). This should allow us to do potentially do
# `OR` queries. Alternatively, and probably better, woluld be to see if Lucene
# supports this.
#
# https://wordnet.princeton.edu/download/current-version
db.execute <<-SQL
CREATE VIRTUAL TABLE IF NOT EXISTS zettelkasten
USING fts5(title, body, tags, mtime UNINDEXED, prefix = 3, tokenize = "porter unicode61");
SQL
# Weigh tags higher, and title a bit higher.
db.execute <<-SQL
INSERT INTO zettelkasten (zettelkasten, rank) VALUES('rank', 'bm25(2.0, 1.0, 5.0, 0.0)');
SQL
raw_existing = db.execute("SELECT title, mtime FROM zettelkasten")
existing = Hash[raw_existing.map { |e| [e[0], Time.parse(e[1]).to_i] }]
Dir["*.md"].each do |path|
mtime = File.stat(path).mtime
# Any file that's been modified since its entry in the full-text search index
# will get updated (or if it doesn't exist, of course).
if !existing[path]
contents = File.read(path)
tags = contents.scan(/#[\w-]+/).join(" ")
db.execute(<<-SQL, [path, contents, tags, File.stat(path).mtime.to_s])
INSERT INTO zettelkasten (title, body, tags, mtime) VALUES (?, ?, ?, ?);
SQL
elsif mtime.to_i > existing[path] # to_i because the stat may have more precision
contents = File.read(path)
tags = contents.scan(/#[\w-]+/).join(" ")
db.execute(<<-SQL, [contents, tags, mtime.to_s, path])
UPDATE zettelkasten SET body = ?, tags = ?, mtime = ? WHERE title = ?
SQL
end
existing[path] = 'VISITED'
end
# Delete any entries in the full text index that don't have files!
existing.each do |(path, present)|
puts db.execute("DELETE FROM zettelkasten WHERE title = ?;", [path]) unless present == 'VISITED'
end
file_cat = ARGV.delete("-f")
# For preview
if file_cat
if !ARGV[1].empty?
results = db.execute(<<-SQL, ARGV[0], ARGV[1])
SELECT rank, highlight(zettelkasten, 1, '\x1b[0;41m', '\x1b[0m')
FROM zettelkasten WHERE title = ? AND zettelkasten MATCH ? ORDER BY rank;
SQL
# This is when it starts and there's no query input...
else
results = db.execute(<<-SQL, ARGV[0])
SELECT rank, body FROM zettelkasten WHERE title = ?;
SQL
end
elsif ARGV[0]
# Ideally we'd use the search to also `cat` instead of using `bat`, in order
# to provide highlighting within the document.
results = db.execute(<<-SQL, ARGV.join(" "))
SELECT rank, highlight(zettelkasten, 0, '\x1b[0;41m', '\x1b[0m')
FROM zettelkasten WHERE zettelkasten MATCH ? ORDER BY rank;
SQL
else
results = db.execute("SELECT title FROM zettelkasten;")
end
results.each do |(_score, content)|
# puts score
puts content
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment