Skip to content

Instantly share code, notes, and snippets.

@buhii
Created April 30, 2023 15:26
Show Gist options
  • Save buhii/701753b4d07f441957692df8a8f6c969 to your computer and use it in GitHub Desktop.
Save buhii/701753b4d07f441957692df8a8f6c969 to your computer and use it in GitHub Desktop.
Merges multiple instances of GPTSimpleVectorIndex
from llama_index.vector_stores.simple import SimpleVectorStoreData, SimpleVectorStore
from llama_index.indices.vector_store import GPTVectorStoreIndex
from llama_index.data_structs.data_structs_v2 import SimpleIndexDict
from llama_index.docstore.simple_docstore import SimpleDocumentStore
def merge_vector_indices(vector_indices):
"""
Merges multiple instances of GPTSimpleVectorIndex from
the LlamaIndex (0.5.27) library into a single GPTVectorStoreIndex instance.
This function takes a list of GPTSimpleVectorIndex instances as input.
It then merges all of these stores into unified versions,
which are used to create a new GPTVectorStoreIndex (you can use it like GPTSimpleVectorIndex)
that is returned.
Please note that this function is still experimental and may not always work as expected.
Additionally, the return value should ideally be a 'GPTSimpleVectorIndex' instance.
Parameters:
vector_indices (List[GPTSimpleVectorIndex]): A list of GPTSimpleVectorIndex instances to be merged.
Returns:
GPTVectorStoreIndex: A new GPTVectorStoreIndex instance that represents the merged input indices.
Example:
>>> index1 = GPTSimpleVectorIndex(...)
>>> index2 = GPTSimpleVectorIndex(...)
>>> merged_index = merge_vector_indices([index1, index2])
>>> print(type(merged_index))
<class 'llama_index.indices.vector_store.base.GPTVectorStoreIndex'>
"""
embedding_dict = {}
text_id_to_doc_id = {}
index_template = vector_indices[0]
index_struct = index_template.index_struct
docs_map = {}
for vi in vector_indices:
embedding_dict.update(vi._vector_store._data.embedding_dict)
text_id_to_doc_id.update(vi._vector_store._data.text_id_to_doc_id)
index_struct.nodes_dict.update(vi.index_struct.nodes_dict)
docs_map.update(vi._docstore.docs)
vector_store = SimpleVectorStore(
simple_vector_store_data_dict=dict(
embedding_dict=embedding_dict,
text_id_to_doc_id=text_id_to_doc_id,
)
)
return GPTVectorStoreIndex(
index_struct=index_template.index_struct,
vector_store=vector_store,
docstore=SimpleDocumentStore(docs_map),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment