vizovitin/llama-index-migrate-vector-store.py

## llama-index-migrate-vector-store.py
#!/usr/bin/env python3
# Requirements: Python>=3.10 llama-index==0.10.30 llama-index-vector-stores-qdrant
import logging
import sys
import textwrap

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client


def get_file_vector_store_index():
    """ Source vector store. Should be file-based so it allows documents enumeration. """
    logging.info("Creating file vector store index")
    try:
        # You may need to adjust persist_dir below according to your setup.
        storage_context = StorageContext.from_defaults(persist_dir='storage')
        file_index = load_index_from_storage(storage_context=storage_context)
    except FileNotFoundError:
        file_index = VectorStoreIndex([])

    return file_index


def get_target_vector_store_index():
    """ Target vector store. Any one can be used. """
    # Adjust this function and respective imports according to your needs.
    host, port, db = 'localhost', 6333, 'test'

    logging.info("Creating Qdrant vector store index")
    client = qdrant_client.QdrantClient(host=host, port=port)
    vector_store = QdrantVectorStore(client=client, collection_name=db)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    qdrant_index = VectorStoreIndex([], storage_context=storage_context)
    return qdrant_index


def filter_node(node, node_id):
    """ Callback to filter nodes during migration. Return None to skip the node.
        Return the original or modified/copied/new node to insert it.
    """
    logging.debug("%r: %r %r", node_id, node.metadata, textwrap.shorten(node.text, 40))
    # E.g. 'return node' to migrate everything as is.
    # This implementation just logs every node and skips it.
    return None


file_index = get_file_vector_store_index()
target_index = get_target_vector_store_index()

logging.info("Fetching nodes from the file index")
nodes = []
for node_id, node in file_index.docstore.docs.items():
    if target_node := filter_node(node, node_id):
        target_node.embedding = file_index.vector_store.get(node_id)
        nodes.append(target_node)

logging.info("Inserting nodes into the target index")
target_index.insert_nodes(nodes)
# Assumes the target_index doesn't need explicit persistence, otherwise feel free to call
# target_index.storage_context.persist(persist_dir=target_dir)

logging.info("Finished")
	#!/usr/bin/env python3
	# Requirements: Python>=3.10 llama-index==0.10.30 llama-index-vector-stores-qdrant
	import logging
	import sys
	import textwrap

	logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

	from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
	from llama_index.vector_stores.qdrant import QdrantVectorStore
	import qdrant_client


	def get_file_vector_store_index():
	""" Source vector store. Should be file-based so it allows documents enumeration. """
	logging.info("Creating file vector store index")
	try:
	# You may need to adjust persist_dir below according to your setup.
	storage_context = StorageContext.from_defaults(persist_dir='storage')
	file_index = load_index_from_storage(storage_context=storage_context)
	except FileNotFoundError:
	file_index = VectorStoreIndex([])

	return file_index


	def get_target_vector_store_index():
	""" Target vector store. Any one can be used. """
	# Adjust this function and respective imports according to your needs.
	host, port, db = 'localhost', 6333, 'test'

	logging.info("Creating Qdrant vector store index")
	client = qdrant_client.QdrantClient(host=host, port=port)
	vector_store = QdrantVectorStore(client=client, collection_name=db)
	storage_context = StorageContext.from_defaults(vector_store=vector_store)
	qdrant_index = VectorStoreIndex([], storage_context=storage_context)
	return qdrant_index


	def filter_node(node, node_id):
	""" Callback to filter nodes during migration. Return None to skip the node.
	Return the original or modified/copied/new node to insert it.
	"""
	logging.debug("%r: %r %r", node_id, node.metadata, textwrap.shorten(node.text, 40))
	# E.g. 'return node' to migrate everything as is.
	# This implementation just logs every node and skips it.
	return None


	file_index = get_file_vector_store_index()
	target_index = get_target_vector_store_index()

	logging.info("Fetching nodes from the file index")
	nodes = []
	for node_id, node in file_index.docstore.docs.items():
	if target_node := filter_node(node, node_id):
	target_node.embedding = file_index.vector_store.get(node_id)
	nodes.append(target_node)

	logging.info("Inserting nodes into the target index")
	target_index.insert_nodes(nodes)
	# Assumes the target_index doesn't need explicit persistence, otherwise feel free to call
	# target_index.storage_context.persist(persist_dir=target_dir)

	logging.info("Finished")