Skip to content

Instantly share code, notes, and snippets.

@hursh-desai
Created January 13, 2023 10:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save hursh-desai/61ea24bca576055f649e3b8706e2dc03 to your computer and use it in GitHub Desktop.
Save hursh-desai/61ea24bca576055f649e3b8706e2dc03 to your computer and use it in GitHub Desktop.
obsidian + langchain
import os
import re
import faiss
from langchain import FAISS
import obsidiantools.api as otools
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
os.environ["OPENAI_API_KEY"] = 'sk-********'
dirpath = '/Users/hursh/<vault_name>'
vault = otools.Vault(dirpath).connect().gather()
embeddings = OpenAIEmbeddings() # type: ignore
def markdown_to_dict(markdown_text):
# Initialize empty dictionary
markdown_dict = {}
# Split markdown file into a list of lines
lines = markdown_text.split('\n')
# Initialize current header and text
current_header = None
current_text = ''
# Iterate through lines
for line in lines:
# Check if line is a markdown header
header_match = re.match(r'^#+\s', line)
if header_match:
# If current header is not None, add current header and text to dictionary
if current_header is not None:
markdown_dict[current_header] = current_text
# Update current header and reset current text
current_header = line
current_text = ''
else:
# If line is not a header, append it to current text
current_text += line + '\n'
# Add final header and text to dictionary
markdown_dict[current_header] = current_text
# Remove markdown formatting from header and text
if current_header is not None:
markdown_dict = {re.sub(r'#', '', key).strip(): re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}
else:
markdown_dict = {key: re.sub(r'<.*?>', '', value) for key, value in markdown_dict.items()}
return markdown_dict
df = vault.get_note_metadata()
all_text = []
all_metadata = []
for index, row in df.loc[df['rel_filepath'].notna()].iterrows():
note = vault.get_source_text(index)
clean_note = markdown_to_dict(note)
text = [value for value in clean_note.values()]
metadata = [{'source' : index + '-' + str(key)} for key in clean_note.keys()]
all_text.extend(text)
all_metadata.extend(metadata)
docsearch = FAISS.from_texts(all_text, embeddings, metadatas=all_metadata)
chain = load_qa_with_sources_chain(OpenAI(temperature=0))
def print_answer(question):
print(
chain(
{
"input_documents": docsearch.similarity_search(question, k=4),
"question": question,
},
return_only_outputs=True,
)["output_text"]
)
print_answer('What is the meaning of life?')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment