Last active
January 25, 2024 01:41
-
-
Save bburky/ba90e1aa1451be20b1e046aa9efd8191 to your computer and use it in GitHub Desktop.
EPUB full text search using SQLite FTS5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Update index: | |
# epub-fts5.sh | |
# FTS5 query (avoid single quotes, there's no escaping): https://www.sqlite.org/fts5.html#full_text_query_syntax | |
# epub-fts5.sh foo OR bar NOT baz | |
# epub-fts5.sh '"foo bar"' | |
# epub-fts5.sh 'NEAR(foo bar)' | |
EPUB_PATH=$HOME/path/to/your/epub/library | |
if [ "$#" -eq 0 ]; then | |
sqlite3 "$EPUB_PATH/epub_index.db" <<EOF && echo Index updated | |
CREATE TABLE IF NOT EXISTS chapters( | |
epub_path TEXT, | |
chapter_path TEXT, | |
chapter_text TEXT, | |
PRIMARY KEY (epub_path, chapter_path) | |
); | |
CREATE VIRTUAL TABLE IF NOT EXISTS chapters_idx USING fts5(chapter_text, content='chapters', tokenize='porter unicode61'); | |
CREATE TRIGGER IF NOT EXISTS chapters_ai AFTER INSERT ON chapters BEGIN | |
INSERT INTO chapters_idx(rowid, chapter_text) VALUES (new.rowid, new.chapter_text); | |
END; | |
-- Add more triggers if desired https://www.sqlite.org/fts5.html#external_content_tables | |
INSERT OR IGNORE INTO chapters | |
SELECT | |
fsdir.name, | |
zipfile.name, | |
zipfile.data | |
FROM fsdir("", "$EPUB_PATH") | |
JOIN zipfile(fsdir.data) | |
WHERE | |
fsdir.name LIKE "%.epub" | |
AND ( | |
zipfile.name LIKE "%.html" OR | |
zipfile.name LIKE "%.xhtml" | |
); | |
EOF | |
else | |
sqlite3 -separator ": " "$EPUB_PATH/epub_index.db" <<EOF | |
SELECT epub_path, snippet(chapters_idx, -1, "", "", "", 20) | |
FROM chapters_idx | |
JOIN chapters ON chapters.rowid = chapters_idx.rowid | |
WHERE chapters_idx match '$@'; | |
EOF | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment