require "sqlite3" | |
require 'set' | |
require 'byebug' | |
# Will be rebuilt at any time. Nice and incremental. | |
db = SQLite3::Database.new "index.db" | |
# Keep prefix indexes for "mos*" searches. | |
# | |
# TODO: It doesn't seem like SQLITE FTS5 supports synonyms well. That's ok, but | |
# we're going to want that. We can download this database from Princeton, write | |
# a parser for it (or use grind(1)). This should allow us to do potentially do | |
# `OR` queries. Alternatively, and probably better, woluld be to see if Lucene | |
# supports this. | |
# | |
# https://wordnet.princeton.edu/download/current-version | |
db.execute <<-SQL | |
CREATE VIRTUAL TABLE IF NOT EXISTS zettelkasten | |
USING fts5(title, body, tags, mtime UNINDEXED, prefix = 3, tokenize = "porter unicode61"); | |
SQL | |
# Weigh tags higher, and title a bit higher. | |
db.execute <<-SQL | |
INSERT INTO zettelkasten (zettelkasten, rank) VALUES('rank', 'bm25(2.0, 1.0, 5.0, 0.0)'); | |
SQL | |
raw_existing = db.execute("SELECT title, mtime FROM zettelkasten") | |
existing = Hash[raw_existing.map { |e| [e[0], Time.parse(e[1]).to_i] }] | |
Dir["*.md"].each do |path| | |
mtime = File.stat(path).mtime | |
# Any file that's been modified since its entry in the full-text search index | |
# will get updated (or if it doesn't exist, of course). | |
if !existing[path] | |
contents = File.read(path) | |
tags = contents.scan(/#[\w-]+/).join(" ") | |
db.execute(<<-SQL, [path, contents, tags, File.stat(path).mtime.to_s]) | |
INSERT INTO zettelkasten (title, body, tags, mtime) VALUES (?, ?, ?, ?); | |
SQL | |
elsif mtime.to_i > existing[path] # to_i because the stat may have more precision | |
contents = File.read(path) | |
tags = contents.scan(/#[\w-]+/).join(" ") | |
db.execute(<<-SQL, [contents, tags, mtime.to_s, path]) | |
UPDATE zettelkasten SET body = ?, tags = ?, mtime = ? WHERE title = ? | |
SQL | |
end | |
existing[path] = 'VISITED' | |
end | |
# Delete any entries in the full text index that don't have files! | |
existing.each do |(path, present)| | |
puts db.execute("DELETE FROM zettelkasten WHERE title = ?;", [path]) unless present == 'VISITED' | |
end | |
file_cat = ARGV.delete("-f") | |
# For preview | |
if file_cat | |
if !ARGV[1].empty? | |
results = db.execute(<<-SQL, ARGV[0], ARGV[1]) | |
SELECT rank, highlight(zettelkasten, 1, '\x1b[0;41m', '\x1b[0m') | |
FROM zettelkasten WHERE title = ? AND zettelkasten MATCH ? ORDER BY rank; | |
SQL | |
# This is when it starts and there's no query input... | |
else | |
results = db.execute(<<-SQL, ARGV[0]) | |
SELECT rank, body FROM zettelkasten WHERE title = ?; | |
SQL | |
end | |
elsif ARGV[0] | |
# Ideally we'd use the search to also `cat` instead of using `bat`, in order | |
# to provide highlighting within the document. | |
results = db.execute(<<-SQL, ARGV.join(" ")) | |
SELECT rank, highlight(zettelkasten, 0, '\x1b[0;41m', '\x1b[0m') | |
FROM zettelkasten WHERE zettelkasten MATCH ? ORDER BY rank; | |
SQL | |
else | |
results = db.execute("SELECT title FROM zettelkasten;") | |
end | |
results.each do |(_score, content)| | |
# puts score | |
puts content | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment