Skip to content

Instantly share code, notes, and snippets.

@leeduckgo
Last active December 21, 2023 07:38
Show Gist options
  • Save leeduckgo/6fe8feb61e427991342cde0268ea3b00 to your computer and use it in GitHub Desktop.
Save leeduckgo/6fe8feb61e427991342cde0268ea3b00 to your computer and use it in GitHub Desktop.
CodesOnChain.VectorAPI
defmodule CodesOnChain.VectorAPI do
@moduledoc """
API for the Vector Datasets of Movespace.
> https://movespace.xyz
# TODO: record necessary data in local database.
# TODO: using embedbase surpase to auth.
"""
alias Components.{VectorInteractor, MovespaceDB, SmartPrompterInteractor}
@error_msg {:error, "this dataset is not allowed"}
def get_module_doc, do: @moduledoc
def allow_write_list() do
[
"roles-for-autonomous-life",
"npcs-for-autonomous-life",
"events-for-autonomous-life",
"eth-smart-contracts",
"eth-smart-contracts-fragment-by-structure",
"eth-smart-contracts-analysis",
"all-whitepapers-test",
"base-chain",
"base-chain-test",
"bodhi-contents",
"bodhi-text-contents",
"galxe-campaigns"
]
end
def allow_read_list() do
[
"roles-for-autonomous-life",
"npcs-for-autonomous-life",
"events-for-autonomous-life",
"eth-smart-contracts",
"eth-smart-contracts-fragment-by-structure",
"eth-smart-contracts-analysis",
"all-whitepapers-test",
"base-chain",
"base-chain-test",
"bodhi-contents",
"bodhi-text-contents",
"galxe-campaigns"
]
end
def allow_delete_list() do
[
]
end
@doc """
get_count > fetch_data_with_vector/fetch_data
"""
def get_count(dataset_id) do
dataset_id = format(dataset_id)
Components.MovespaceDB.get_count(dataset_id)
end
def insert_data(admin_key, dataset_id, data) do
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do
# insert data to remote vectorDB
{:ok, %{results: results}} = VectorInteractor.insert_data(dataset_id, data)
# insert data to local vectorDB
{:ok, _} = insert_data_to_local_db(dataset_id, results)
result = results |> Enum.fetch!(0)
{:ok, result}
else
@error_msg
end
end
def insert_data(admin_key, dataset_id, data, metadata) do
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do
# insert data to remote vectorDB
{:ok, %{results: results}} = VectorInteractor.insert_data(dataset_id, data, metadata)
# TODO: insert a data to local vectorDB
{:ok, _} = insert_data_to_local_db(dataset_id, results)
result = results |> Enum.fetch!(0)
{:ok, result}
else
@error_msg
end
end
defp insert_data_to_local_db(dataset_id, results) do
# format dataset_id
dataset_id = format(dataset_id)
# create a new dataset
MovespaceDB.create_vector_db_if_uncreated(dataset_id)
# insert data to local vectorDB
item = Enum.fetch!(results, 0)
MovespaceDB.insert_vector(dataset_id, item.id, item.data, item.metadata, item.embedding)
end
def format(dataset_id) do
String.replace(dataset_id, "-", "_")
end
def update_data(admin_key, dataset_id, data, id) do
if (dataset_id in allow_write_list()) and admin_key == Constants.get_admin_key() do
{:ok, _} = VectorInteractor.update_data(dataset_id, data, id)
# TODO: update a data to local vectorDB
{:ok, "data updated."}
else
@error_msg
end
end
@doc """
there are not embedding in the default fetch.
"""
def fetch_data(dataset_id, id) do
{:ok, res} =
MovespaceDB.fetch_data_by_id(
format(dataset_id),
id)
res.rows
|> Enum.fetch!(0)
|> Enum.slice(0..-2)
end
def fetch_data_with_vector(dataset_id, id) do
{:ok, res} =
MovespaceDB.fetch_data_by_id(
format(dataset_id),
id)
res.rows
|> Enum.fetch!(0)
|> handle_vector()
end
@doc """
there are not embedding in the default fetch.
"""
def fetch_data_by_uuid(dataset_id, item_id) do
{:ok, res} =
MovespaceDB.fetch_data_by_id_in_embedbase(
format(dataset_id),
item_id)
res.rows
|> Enum.fetch!(0)
|> Enum.slice(0..-2)
end
def fetch_data_with_vector_by_uuid(dataset_id, item_id) do
{:ok, res} =
MovespaceDB.fetch_data_by_id_in_embedbase(
format(dataset_id),
item_id)
res.rows
|> Enum.fetch!(0)
|> handle_vector()
end
def handle_vector(elem) do
# TODO
{vector, others} = List.pop_at(elem, -1)
others ++ [Pgvector.to_list(vector)]
end
def search_data(dataset_id, question, search_num \\ 5) do
if dataset_id in allow_read_list() do
# get embedding according to smart_prompt
{:ok,
%{similarities: similarities
} = result } =
VectorInteractor.search_data(dataset_id, question)
similarities = handle_similarities(similarities)
Map.put(result, :similarities, Enum.take(similarities, search_num))
else
@error_msg
end
end
def get_embedding(admin_key, content) do
if admin_key == Constants.get_admin_key() do
Constants.smart_prompter_endpoint()
|> SmartPrompterInteractor.set_session()
{:ok, resp} =
Constants.smart_prompter_endpoint()
|> SmartPrompterInteractor.get_embedding(content)
resp
else
@error_msg
end
end
def delete_data(dataset_id, ids) do
if dataset_id in allow_delete_list() do
{:ok, _} = VectorInteractor.delete_data(dataset_id, ids)
{:ok, "data deleted."}
else
@error_msg
end
end
def delete_dataset(dataset_id) do
if dataset_id in allow_delete_list() do
{:ok, _} = VectorInteractor.delete_dataset(dataset_id)
{:ok, "dataset deleted."}
else
@error_msg
end
end
defp handle_similarities(similarities) do
Enum.map(similarities, fn item ->
Map.delete(item, :embedding)
end)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment