Last active
December 21, 2023 07:38
-
-
Save leeduckgo/6fe8feb61e427991342cde0268ea3b00 to your computer and use it in GitHub Desktop.
CodesOnChain.VectorAPI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule CodesOnChain.VectorAPI do | |
@moduledoc """ | |
API for the Vector Datasets of Movespace. | |
> https://movespace.xyz | |
# TODO: record necessary data in local database. | |
# TODO: using embedbase surpase to auth. | |
""" | |
alias Components.{VectorInteractor, MovespaceDB, SmartPrompterInteractor} | |
@error_msg {:error, "this dataset is not allowed"} | |
def get_module_doc, do: @moduledoc | |
def allow_write_list() do | |
[ | |
"roles-for-autonomous-life", | |
"npcs-for-autonomous-life", | |
"events-for-autonomous-life", | |
"eth-smart-contracts", | |
"eth-smart-contracts-fragment-by-structure", | |
"eth-smart-contracts-analysis", | |
"all-whitepapers-test", | |
"base-chain", | |
"base-chain-test", | |
"bodhi-contents", | |
"bodhi-text-contents", | |
"galxe-campaigns" | |
] | |
end | |
def allow_read_list() do | |
[ | |
"roles-for-autonomous-life", | |
"npcs-for-autonomous-life", | |
"events-for-autonomous-life", | |
"eth-smart-contracts", | |
"eth-smart-contracts-fragment-by-structure", | |
"eth-smart-contracts-analysis", | |
"all-whitepapers-test", | |
"base-chain", | |
"base-chain-test", | |
"bodhi-contents", | |
"bodhi-text-contents", | |
"galxe-campaigns" | |
] | |
end | |
def allow_delete_list() do | |
[ | |
] | |
end | |
@doc """ | |
get_count > fetch_data_with_vector/fetch_data | |
""" | |
def get_count(dataset_id) do | |
dataset_id = format(dataset_id) | |
Components.MovespaceDB.get_count(dataset_id) | |
end | |
def insert_data(admin_key, dataset_id, data) do | |
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do | |
# insert data to remote vectorDB | |
{:ok, %{results: results}} = VectorInteractor.insert_data(dataset_id, data) | |
# insert data to local vectorDB | |
{:ok, _} = insert_data_to_local_db(dataset_id, results) | |
result = results |> Enum.fetch!(0) | |
{:ok, result} | |
else | |
@error_msg | |
end | |
end | |
def insert_data(admin_key, dataset_id, data, metadata) do | |
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do | |
# insert data to remote vectorDB | |
{:ok, %{results: results}} = VectorInteractor.insert_data(dataset_id, data, metadata) | |
# TODO: insert a data to local vectorDB | |
{:ok, _} = insert_data_to_local_db(dataset_id, results) | |
result = results |> Enum.fetch!(0) | |
{:ok, result} | |
else | |
@error_msg | |
end | |
end | |
defp insert_data_to_local_db(dataset_id, results) do | |
# format dataset_id | |
dataset_id = format(dataset_id) | |
# create a new dataset | |
MovespaceDB.create_vector_db_if_uncreated(dataset_id) | |
# insert data to local vectorDB | |
item = Enum.fetch!(results, 0) | |
MovespaceDB.insert_vector(dataset_id, item.id, item.data, item.metadata, item.embedding) | |
end | |
def format(dataset_id) do | |
String.replace(dataset_id, "-", "_") | |
end | |
def update_data(admin_key, dataset_id, data, id) do | |
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do | |
{:ok, _} = VectorInteractor.update_data(dataset_id, data, id) | |
# TODO: update a data to local vectorDB | |
{:ok, "data updated."} | |
else | |
@error_msg | |
end | |
end | |
@doc """ | |
there are not embedding in the default fetch. | |
""" | |
def fetch_data(dataset_id, id) do | |
{:ok, res} = | |
MovespaceDB.fetch_data_by_id( | |
format(dataset_id), | |
id) | |
res.rows | |
|> Enum.fetch!(0) | |
|> Enum.slice(0..-2) | |
end | |
def fetch_data_with_vector(dataset_id, id) do | |
{:ok, res} = | |
MovespaceDB.fetch_data_by_id( | |
format(dataset_id), | |
id) | |
res.rows | |
|> Enum.fetch!(0) | |
|> handle_vector() | |
end | |
@doc """ | |
there are not embedding in the default fetch. | |
""" | |
def fetch_data_by_uuid(dataset_id, item_id) do | |
{:ok, res} = | |
MovespaceDB.fetch_data_by_id_in_embedbase( | |
format(dataset_id), | |
item_id) | |
res.rows | |
|> Enum.fetch!(0) | |
|> Enum.slice(0..-2) | |
end | |
def fetch_data_with_vector_by_uuid(dataset_id, item_id) do | |
{:ok, res} = | |
MovespaceDB.fetch_data_by_id_in_embedbase( | |
format(dataset_id), | |
item_id) | |
res.rows | |
|> Enum.fetch!(0) | |
|> handle_vector() | |
end | |
def handle_vector(elem) do | |
# TODO | |
{vector, others} = List.pop_at(elem, -1) | |
others ++ [Pgvector.to_list(vector)] | |
end | |
def search_data(dataset_id, question, search_num \\ 5) do | |
if dataset_id in allow_read_list() do | |
# get embedding according to smart_prompt | |
{:ok, | |
%{similarities: similarities | |
} = result } = | |
VectorInteractor.search_data(dataset_id, question) | |
similarities = handle_similarities(similarities) | |
Map.put(result, :similarities, Enum.take(similarities, search_num)) | |
else | |
@error_msg | |
end | |
end | |
def get_embedding(admin_key, content) do | |
if admin_key == Constants.get_admin_key() do | |
Constants.smart_prompter_endpoint() | |
|> SmartPrompterInteractor.set_session() | |
{:ok, resp} = | |
Constants.smart_prompter_endpoint() | |
|> SmartPrompterInteractor.get_embedding(content) | |
resp | |
else | |
@error_msg | |
end | |
end | |
def delete_data(dataset_id, ids) do | |
if dataset_id in allow_delete_list() do | |
{:ok, _} = VectorInteractor.delete_data(dataset_id, ids) | |
{:ok, "data deleted."} | |
else | |
@error_msg | |
end | |
end | |
def delete_dataset(dataset_id) do | |
if dataset_id in allow_delete_list() do | |
{:ok, _} = VectorInteractor.delete_dataset(dataset_id) | |
{:ok, "dataset deleted."} | |
else | |
@error_msg | |
end | |
end | |
defp handle_similarities(similarities) do | |
Enum.map(similarities, fn item -> | |
Map.delete(item, :embedding) | |
end) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment