Skip to content

Instantly share code, notes, and snippets.

@apinanyogaratnam
Created June 24, 2023 02:59
Show Gist options
  • Save apinanyogaratnam/7f91016a16e9c114c3d9d79c5142c78c to your computer and use it in GitHub Desktop.
Save apinanyogaratnam/7f91016a16e9c114c3d9d79c5142c78c to your computer and use it in GitHub Desktop.
Search Text Postgres Really Fast
'''
THIS SCRIPT IS FOR TESTING pg_trgm postgres extension
1.
CREATE TABLE test_text_search (
id SERIAL PRIMARY KEY,
word TEXT
);
2.
CREATE EXTENSION IF NOT EXISTS pg_trgm;
3.
CREATE INDEX CONCURRENTLY index_t_on_word_trigram
ON test_text_search
USING gin (word gin_trgm_ops);
4.
SELECT *
FROM test_text_search
WHERE word ILIKE 'a%';
RESULTS ARE REALLY FAST COMPARED TO NO INDEX
'''
import psycopg2
from psycopg2.extras import execute_batch
connection = psycopg2.connect(
host="localhost",
database="postgres",
user="postgres",
password="postgres",
)
cursor = connection.cursor()
import requests
word_site = "https://www.mit.edu/~ecprice/wordlist.10000"
response = requests.get(word_site)
txt = response.text
WORDS = txt.splitlines()
# add 10 million records
records = []
print('adding records')
for word in WORDS * 1000:
records.append((word,))
print('executing batch insert for fast insert')
import time
start_time = time.time()
execute_batch(cursor, '''
INSERT INTO test_text_search (
word
) VALUES (
%s
);
''', records)
print(f'batch took {time.time() - start_time} seconds')
print('committing')
connection.commit()
cursor.close()
connection.close()
print('done')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment