Skip to content

Instantly share code, notes, and snippets.

@robrich
Created October 9, 2020 22:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save robrich/3e36fe454e7e4b520e29d63b23822fb5 to your computer and use it in GitHub Desktop.
Save robrich/3e36fe454e7e4b520e29d63b23822fb5 to your computer and use it in GitHub Desktop.
-- FULL TEXT SEARCH
-- ================
-- setup schema
CREATE DATABASE library;
USE library;
CREATE TABLE books (
title VARCHAR(200) not null,
author VARCHAR(200) not null,
published DATE not null,
line INT not null,
content TEXT,
KEY (title, author, line) USING CLUSTERED COLUMNSTORE,
FULLTEXT (content)
);
-- load books
-- Thank you to http://www.gutenberg.org/ for the book text
CREATE PIPELINE books
AS LOAD DATA FS '/vagrant/books/*'
INTO TABLE books
FORMAT CSV
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
-- start pipeline
TEST PIPELINE books LIMIT 10;
START PIPELINE books FOREGROUND LIMIT 1 BATCHES;
START PIPELINE books;
-- verify pipeline
SELECT * FROM books;
SELECT count(*) FROM books;
SELECT * FROM information_schema.PIPELINES_BATCHES_SUMMARY;
OPTIMIZE TABLE books FLUSH;
SELECT title, count(*) FROM books GROUP BY 1;
-- without the index
SELECT * FROM books WHERE content like '%Alice%';
-- use full-text index
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice');
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Alice') GROUP BY 1;
SELECT * FROM books WHERE MATCH (content) AGAINST ('Peter');
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Peter') GROUP BY 1;
-- find multiple words
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice OR Peter');
SELECT * FROM books WHERE MATCH (content) AGAINST ('Alice AND Peter');
SELECT * FROM books WHERE MATCH (content) AGAINST ('Huck* OR Tom');
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Huck* OR Tom') GROUP BY 1;
SELECT * FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom');
SELECT title, count(*) FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom') GROUP BY 1;
-- Highlight matching lines
SELECT HIGHLIGHT (content) AGAINST ('Alice') FROM books WHERE MATCH (content) AGAINST ('Alice');
SELECT HIGHLIGHT (content) AGAINST ('Huck OR Tom') FROM books WHERE MATCH (content) AGAINST ('Huck* AND Tom');
-- Cleanup
STOP PIPELINE books;
DROP PIPELINE books;
DROP TABLE books;
DROP DATABASE library;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment