Skip to content

Instantly share code, notes, and snippets.

@amotl
Created December 12, 2023 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amotl/9a1290fa2289407df93820c0e98c3df3 to your computer and use it in GitHub Desktop.
Save amotl/9a1290fa2289407df93820c0e98c3df3 to your computer and use it in GitHub Desktop.
PostgreSQL/pgvector: Exercise storing and searching by vectors using its operators.
/**
* PostgreSQL/pgvector: Exercise storing and searching by vectors using its operators.
* The example uses euclidean distance for vector similarity search.
*
* Synopsis::
*
* docker run --rm -it --publish=5432:5432 --env "POSTGRES_HOST_AUTH_METHOD=trust" ankane/pgvector postgres -c log_statement=all
* psql postgresql://postgres@localhost < pgvector-vector-knn-exercise.sql
*
* Resources:
* - https://github.com/pgvector/pgvector
**/
CREATE EXTENSION IF NOT EXISTS vector;
CREATE SCHEMA IF NOT EXISTS langchain;
DROP TABLE IF EXISTS langchain.embedding;
CREATE TABLE langchain.embedding (
collection_id VARCHAR,
embedding VECTOR(14),
document VARCHAR,
cmetadata JSON,
custom_id VARCHAR,
uuid UUID NOT NULL,
PRIMARY KEY (uuid)
);
INSERT INTO langchain.embedding
(collection_id, embedding, document, cmetadata, custom_id, uuid)
VALUES
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0]', 'foo', '{"tag": "something"}', 'n/a', 'f9b6e677-0542-4323-be2b-a06442384321'),
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]', 'bar', '{"tag": "something else"}', 'n/a', '3f8557fc-8bd0-47f4-bb54-207184302caa'),
('e67d0823-00ee-40c5-934c-b19edfe281fe', '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0]', 'baz', '{"tag": "just anything"}', 'n/a', 'f313c95d-7ecd-4575-9aa0-bf1d73e60535')
;
-- Get nearest neighbors by L2 distance, which is effectively Euclidean, according to the manual.
SELECT document FROM langchain.embedding
ORDER BY embedding <-> '[1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.3]' LIMIT 4;
SELECT document FROM langchain.embedding
ORDER BY embedding <-> '[9.0, 0.0, 8.0, 1.0, 1.0, 1.0, 9.0, 1.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0]' LIMIT 4;
SELECT document FROM langchain.embedding
ORDER BY embedding <-> '[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0]' LIMIT 4;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment