Created
March 20, 2024 13:19
-
-
Save dcbark01/59b8ad9f12f4cbecc791c3618bdde0b0 to your computer and use it in GitHub Desktop.
Save Langchain Documents to JSONL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import typing as t | |
import jsonlines | |
from langchain.schema import Document | |
def save_docs_to_jsonl(documents: t.Iterable[Document], file_path: str) -> None: | |
with jsonlines.open(file_path, mode="w") as writer: | |
for doc in documents: | |
writer.write(doc.dict()) | |
def load_docs_from_jsonl(file_path) -> t.Iterable[Document]: | |
documents = [] | |
with jsonlines.open(file_path, mode="r") as reader: | |
for doc in reader: | |
documents.append(Document(**doc)) | |
return documents |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment