Skip to content

Instantly share code, notes, and snippets.

@amotl
Created December 12, 2023 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amotl/62d590cad849020568e2c2e3d59d8749 to your computer and use it in GitHub Desktop.
Save amotl/62d590cad849020568e2c2e3d59d8749 to your computer and use it in GitHub Desktop.
CrateDB: Exercise storing and searching by vectors using its "FLOAT_VECTOR" and "KNN_MATCH".
/**
* CrateDB: Exercise storing and searching by vectors using its "FLOAT_VECTOR" and "KNN_MATCH".
* The example uses euclidean distance for vector similarity search.
*
* Synopsis::
*
* docker run --rm -it --publish=4200:4200 crate/crate:nightly -Cdiscovery.type=single-node
* crash < cratedb-vector-knn-exercise.sql
*
* Resources:
* - https://github.com/crate/crate
* - https://cratedb.com/docs/crate/reference/en/master/general/ddl/data-types.html#float-vector
* - https://cratedb.com/docs/crate/reference/en/master/general/builtins/scalar-functions.html#scalar-knn-match
**/
DROP TABLE IF EXISTS langchain.embedding;
CREATE TABLE langchain.embedding (
collection_id STRING,
embedding FLOAT_VECTOR(14),
document STRING,
cmetadata OBJECT,
custom_id STRING,
uuid STRING NOT NULL,
PRIMARY KEY (uuid)
);
INSERT INTO langchain.embedding
(collection_id, embedding, document, cmetadata, custom_id, uuid)
VALUES
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], 'foo', {tag='something'}, 'n/a', 'f9b6e677-0542-4323-be2b-a06442384321'),
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'bar', {tag='something else'}, 'n/a', '3f8557fc-8bd0-47f4-bb54-207184302caa'),
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0], 'baz', {tag='just anything'}, 'n/a', 'f313c95d-7ecd-4575-9aa0-bf1d73e60535')
;
REFRESH TABLE langchain.embedding;
-- Get nearest neighbors by Euclidean distance (VectorSimilarityFunction.EUCLIDEAN).
SELECT document, _score FROM langchain.embedding
WHERE KNN_MATCH(embedding, [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.3], 4)
ORDER BY _score DESC;
SELECT document, _score FROM langchain.embedding
WHERE KNN_MATCH(embedding, [9.0, 0.0, 8.0, 1.0, 1.0, 1.0, 9.0, 1.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0], 4)
ORDER BY _score DESC;
SELECT document, _score FROM langchain.embedding
WHERE KNN_MATCH(embedding, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0], 4)
ORDER BY _score DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment