Skip to content

Instantly share code, notes, and snippets.

@chezou
Last active October 29, 2023 23:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chezou/12f52b3683dc523f5dfc855e8d3cfa72 to your computer and use it in GitHub Desktop.
Save chezou/12f52b3683dc523f5dfc855e8d3cfa72 to your computer and use it in GitHub Desktop.
Blog recommendation with prelims and chroma
annotated-types==0.6.0
anyio==3.7.1
backoff==2.2.1
bcrypt==4.0.1
cachetools==5.3.2
certifi==2023.7.22
charset-normalizer==3.3.1
chroma-hnswlib==0.7.3
chromadb==0.4.15
click==8.1.7
coloredlogs==15.0.1
Deprecated==1.2.14
exceptiongroup==1.1.3
fastapi==0.104.0
filelock==3.13.0
flatbuffers==23.5.26
fsspec==2023.10.0
google-auth==2.23.3
googleapis-common-protos==1.61.0
graphlib-backport==1.0.3
grpcio==1.59.0
h11==0.14.0
httptools==0.6.1
huggingface-hub==0.17.3
humanfriendly==10.0
idna==3.4
importlib-metadata==6.8.0
importlib-resources==6.1.0
Jinja2==3.1.2
joblib==1.3.2
kubernetes==28.1.0
MarkupSafe==2.1.3
monotonic==1.6
mpmath==1.3.0
networkx==3.1
nltk==3.8.1
numpy==1.24.4
nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
nvidia-cusparse-cu12==12.1.0.106
nvidia-nccl-cu12==2.18.1
nvidia-nvjitlink-cu12==12.3.52
nvidia-nvtx-cu12==12.1.105
oauthlib==3.2.2
onnxruntime==1.16.1
opentelemetry-api==1.20.0
opentelemetry-exporter-otlp-proto-common==1.20.0
opentelemetry-exporter-otlp-proto-grpc==1.20.0
opentelemetry-proto==1.20.0
opentelemetry-sdk==1.20.0
opentelemetry-semantic-conventions==0.41b0
overrides==7.4.0
packaging==23.2
Pillow==10.1.0
posthog==3.0.2
prelims==0.0.7
protobuf==4.24.4
pulsar-client==3.3.0
pyasn1==0.5.0
pyasn1-modules==0.3.0
pydantic==2.4.2
pydantic-core==2.10.1
PyPika==0.48.9
pysqlite3-binary==0.5.2.post1
python-dateutil==2.8.2
python-dotenv==1.0.0
PyYAML==6.0.1
regex==2023.10.3
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
safetensors==0.4.0
scikit-learn==1.3.2
scipy==1.10.1
sentence-transformers==2.2.2
sentencepiece==0.1.99
six==1.16.0
sniffio==1.3.0
starlette==0.27.0
sympy==1.12
tenacity==8.2.3
threadpoolctl==3.2.0
tokenizers==0.14.1
torch==2.1.0
torchvision==0.16.0
tqdm==4.66.1
transformers==4.34.1
triton==2.1.0
typer==0.9.0
typing-extensions==4.8.0
urllib3==2.0.7
uvicorn==0.23.2
uvloop==0.19.0
watchfiles==0.21.0
websocket-client==1.6.4
websockets==12.0
wrapt==1.15.0
zipp==3.17.0
import prelims
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
import chromadb
class PostRecommender:
def __init__(self, db_path: str, post_path: str, model_name: str, collection_name: str):
self.handler = prelims.handler.StaticSitePostsHandler(post_path)
self.client = chromadb.PersistentClient(path=db_path)
self.collection_name = collection_name
self.embedding_function = chromadb.utils.embedding_functions.SentenceTransformerEmbeddingFunction(model_name=model_name)
self.collection = None
def create_collection(self) -> None:
self.collection = self.client.create_collection(self.collection_name, embedding_function=self.embedding_function)
posts = self.handler.load_posts()
contents = [post.content for post in posts]
paths = [str(post.path) for post in posts]
self.collection.add(documents=contents, ids=paths)
def search(self, query: str, top_k: int = 3) -> list:
if not self.collection:
self.collection = self.client.get_collection(self.collection_name, embedding_function=self.embedding_function)
return self.collection.query(query_texts=[query], n_results=top_k)
POST_PATH = "../chezo.uno/content/post"
DB_PATH = "tmp/chroma.db"
MODEL_NAME = "oshizo/sbert-jsnli-luke-japanese-base-lite"
COLLECTION_NAME = "ja_posts"
if __name__ == "__main__":
searcher = PostRecommender(db_path=DB_PATH, post_path=POST_PATH, model_name=MODEL_NAME, collection_name=COLLECTION_NAME)
# searcher.create_collection()
print(searcher.search(query="ホームページ", top_k=3))
sentence_transformers
prelims
chromadb
wheel
pysqlite3-binary
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment