Created
January 9, 2024 13:56
-
-
Save tazarov/2e0496d5c23617a7bdd5812df6bb287e to your computer and use it in GitHub Desktop.
Clean up ChromaDB defunct binary indices.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if ! command -v sqlite3 &> /dev/null; then | |
echo "sqlite3 could not be found. Please install sqlite3 and try again." | |
exit 1 | |
fi | |
PERSIST_DIR=$(realpath "$1") | |
BACKUP_DIR=$(realpath "$2") | |
if [ ! -d "$PERSIST_DIR" ]; then | |
echo "PERSIST_DIR does not exist: $PERSIST_DIR" | |
exit 1 | |
fi | |
if [ ! -f "${PERSIST_DIR}/chroma.sqlite3" ]; then | |
echo "SQLite database not found at ${PERSIST_DIR}/chroma.sqlite3" | |
exit 1 | |
fi | |
if ! mkdir -p "$BACKUP_DIR" 2>/dev/null; then | |
echo "Cannot create or do not have write permission for BACKUP_DIR: $BACKUP_DIR" | |
exit 1 | |
fi | |
input=$(sqlite3 "${PERSIST_DIR}/chroma.sqlite3" "select distinct c.name,c.id,s.id from collections c left join segments s on c.id=s.collection where s.scope='VECTOR'") | |
while IFS='|' read -r name collection_id segment_id; do | |
for dir in "${PERSIST_DIR}"/*/ ; do | |
dir_base=$(basename "$dir") | |
if [[ $dir_base != $segment_id ]]; then | |
echo "Moving $dir to $BACKUP_DIR" | |
mv "$dir" "$BACKUP_DIR/" | |
fi | |
done | |
done <<< "$input" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment