Created
December 12, 2023 21:53
-
-
Save amotl/9a1290fa2289407df93820c0e98c3df3 to your computer and use it in GitHub Desktop.
PostgreSQL/pgvector: Exercise storing and searching by vectors using its operators.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* PostgreSQL/pgvector: Exercise storing and searching by vectors using its operators. | |
* The example uses euclidean distance for vector similarity search. | |
* | |
* Synopsis:: | |
* | |
* docker run --rm -it --publish=5432:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector postgres -c log_statement=all | |
* psql postgresql://postgres@localhost < pgvector-vector-knn-exercise.sql | |
* | |
* Resources: | |
* - https://github.com/pgvector/pgvector | |
**/ | |
CREATE EXTENSION IF NOT EXISTS vector; | |
CREATE SCHEMA IF NOT EXISTS langchain; | |
DROP TABLE IF EXISTS langchain.embedding; | |
CREATE TABLE langchain.embedding ( | |
collection_id VARCHAR, | |
embedding VECTOR(14), | |
document VARCHAR, | |
cmetadata JSON, | |
custom_id VARCHAR, | |
uuid UUID NOT NULL, | |
PRIMARY KEY (uuid) | |
); | |
INSERT INTO langchain.embedding | |
(collection_id, embedding, document, cmetadata, custom_id, uuid) | |
VALUES | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0]', 'foo', '{"tag": "something"}', 'n/a', 'f9b6e677-0542-4323-be2b-a06442384321'), | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]', 'bar', '{"tag": "something else"}', 'n/a', '3f8557fc-8bd0-47f4-bb54-207184302caa'), | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0]', 'baz', '{"tag": "just anything"}', 'n/a', 'f313c95d-7ecd-4575-9aa0-bf1d73e60535') | |
; | |
-- Get nearest neighbors by L2 distance, which is effectively Euclidean, according to the manual. | |
SELECT document FROM langchain.embedding | |
ORDER BY embedding <-> '[1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.3]' LIMIT 4; | |
SELECT document FROM langchain.embedding | |
ORDER BY embedding <-> '[9.0, 0.0, 8.0, 1.0, 1.0, 1.0, 9.0, 1.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0]' LIMIT 4; | |
SELECT document FROM langchain.embedding | |
ORDER BY embedding <-> '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0]' LIMIT 4; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment