Skip to content

Instantly share code, notes, and snippets.

@bioshazard
Created May 17, 2023 20:01
Show Gist options
  • Save bioshazard/48fe17a0df7ac2ae291baca88f8ab48f to your computer and use it in GitHub Desktop.
Save bioshazard/48fe17a0df7ac2ae291baca88f8ab48f to your computer and use it in GitHub Desktop.
# WIP attempt at summarizing a podcast given a timestamped transcript
# https://python.langchain.com/en/latest/modules/chains/index_examples/summarize.html
import os
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
llm = OpenAI(temperature=0)
transcript_file = os.getenv("TRANSCRIPT_FILE")
with open(transcript_file, encoding="utf-8") as f:
transcript_text = f.read()
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 3000,
chunk_overlap = 200,
length_function = len,
)
docs = text_splitter.create_documents([transcript_text])
print("Docs array length:", len(docs))
summary_and_bullets = "\n\n\n".join([
"{text}",
"GIVEN THE ABOVE, PROVIDE A SUMMARY AND EXTRACT A BULLET LIST OF 2-WORD-MAXIMUM CATCHY TOPIC WITH ASSOCIATED TIMESTAMP:"
])
PROMPTsab = PromptTemplate(template=summary_and_bullets, input_variables=["text"])
podcast_description = "\n\n\n".join([
"{text}",
"GIVEN THE ABOVE SUMMARIES AND TIMESTAMP BULLETS, PROVIDE A VERY CONCISE CATCHY PODCAST EPISODE SUMMARY AND A BULLET LIST OF THE CATCHIEST 2-WORD-MAXIMUM TOPICS WITH ASSOCIATED TIMESTAMP:"
])
PROMPTpd = PromptTemplate(template=podcast_description, input_variables=["text"])
chain = load_summarize_chain(
llm,
chain_type="map_reduce",
map_prompt=PROMPTsab,
combine_prompt=PROMPTpd
)
result = chain.run(docs)
print(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment