Created
December 12, 2023 21:53
-
-
Save amotl/62d590cad849020568e2c2e3d59d8749 to your computer and use it in GitHub Desktop.
CrateDB: Exercise storing and searching by vectors using its "FLOAT_VECTOR" and "KNN_MATCH".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* CrateDB: Exercise storing and searching by vectors using its "FLOAT_VECTOR" and "KNN_MATCH". | |
* The example uses euclidean distance for vector similarity search. | |
* | |
* Synopsis:: | |
* | |
* docker run --rm -it --publish=4200:4200 crate/crate:nightly -Cdiscovery.type=single-node | |
* crash < cratedb-vector-knn-exercise.sql | |
* | |
* Resources: | |
* - https://github.com/crate/crate | |
* - https://cratedb.com/docs/crate/reference/en/master/general/ddl/data-types.html#float-vector | |
* - https://cratedb.com/docs/crate/reference/en/master/general/builtins/scalar-functions.html#scalar-knn-match | |
**/ | |
DROP TABLE IF EXISTS langchain.embedding; | |
CREATE TABLE langchain.embedding ( | |
collection_id STRING, | |
embedding FLOAT_VECTOR(14), | |
document STRING, | |
cmetadata OBJECT, | |
custom_id STRING, | |
uuid STRING NOT NULL, | |
PRIMARY KEY (uuid) | |
); | |
INSERT INTO langchain.embedding | |
(collection_id, embedding, document, cmetadata, custom_id, uuid) | |
VALUES | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], 'foo', {tag='something'}, 'n/a', 'f9b6e677-0542-4323-be2b-a06442384321'), | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 'bar', {tag='something else'}, 'n/a', '3f8557fc-8bd0-47f4-bb54-207184302caa'), | |
('e67d0823-00ee-40c5-934c-b19edfe281fe', [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0, 1.0, 1.0, 2.0], 'baz', {tag='just anything'}, 'n/a', 'f313c95d-7ecd-4575-9aa0-bf1d73e60535') | |
; | |
REFRESH TABLE langchain.embedding; | |
-- Get nearest neighbors by Euclidean distance (VectorSimilarityFunction.EUCLIDEAN). | |
SELECT document, _score FROM langchain.embedding | |
WHERE KNN_MATCH(embedding, [1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.3], 4) | |
ORDER BY _score DESC; | |
SELECT document, _score FROM langchain.embedding | |
WHERE KNN_MATCH(embedding, [9.0, 0.0, 8.0, 1.0, 1.0, 1.0, 9.0, 1.0, 1.0, 6.0, 1.0, 1.0, 1.0, 0.0], 4) | |
ORDER BY _score DESC; | |
SELECT document, _score FROM langchain.embedding | |
WHERE KNN_MATCH(embedding, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 2.0], 4) | |
ORDER BY _score DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment