Created
April 30, 2023 15:26
-
-
Save buhii/701753b4d07f441957692df8a8f6c969 to your computer and use it in GitHub Desktop.
Merges multiple instances of GPTSimpleVectorIndex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from llama_index.vector_stores.simple import SimpleVectorStoreData, SimpleVectorStore | |
from llama_index.indices.vector_store import GPTVectorStoreIndex | |
from llama_index.data_structs.data_structs_v2 import SimpleIndexDict | |
from llama_index.docstore.simple_docstore import SimpleDocumentStore | |
def merge_vector_indices(vector_indices): | |
""" | |
Merges multiple instances of GPTSimpleVectorIndex from | |
the LlamaIndex (0.5.27) library into a single GPTVectorStoreIndex instance. | |
This function takes a list of GPTSimpleVectorIndex instances as input. | |
It then merges all of these stores into unified versions, | |
which are used to create a new GPTVectorStoreIndex (you can use it like GPTSimpleVectorIndex) | |
that is returned. | |
Please note that this function is still experimental and may not always work as expected. | |
Additionally, the return value should ideally be a 'GPTSimpleVectorIndex' instance. | |
Parameters: | |
vector_indices (List[GPTSimpleVectorIndex]): A list of GPTSimpleVectorIndex instances to be merged. | |
Returns: | |
GPTVectorStoreIndex: A new GPTVectorStoreIndex instance that represents the merged input indices. | |
Example: | |
>>> index1 = GPTSimpleVectorIndex(...) | |
>>> index2 = GPTSimpleVectorIndex(...) | |
>>> merged_index = merge_vector_indices([index1, index2]) | |
>>> print(type(merged_index)) | |
<class 'llama_index.indices.vector_store.base.GPTVectorStoreIndex'> | |
""" | |
embedding_dict = {} | |
text_id_to_doc_id = {} | |
index_template = vector_indices[0] | |
index_struct = index_template.index_struct | |
docs_map = {} | |
for vi in vector_indices: | |
embedding_dict.update(vi._vector_store._data.embedding_dict) | |
text_id_to_doc_id.update(vi._vector_store._data.text_id_to_doc_id) | |
index_struct.nodes_dict.update(vi.index_struct.nodes_dict) | |
docs_map.update(vi._docstore.docs) | |
vector_store = SimpleVectorStore( | |
simple_vector_store_data_dict=dict( | |
embedding_dict=embedding_dict, | |
text_id_to_doc_id=text_id_to_doc_id, | |
) | |
) | |
return GPTVectorStoreIndex( | |
index_struct=index_template.index_struct, | |
vector_store=vector_store, | |
docstore=SimpleDocumentStore(docs_map), | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment