Skip to content

Instantly share code, notes, and snippets.

@christophM
Created April 28, 2023 09:00
Show Gist options
  • Star 10 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save christophM/7fd120cf94b819aaebc9b35c920eebe9 to your computer and use it in GitHub Desktop.
Save christophM/7fd120cf94b819aaebc9b35c920eebe9 to your computer and use it in GitHub Desktop.
import sys
import os
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import MarkdownTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain.schema import (
AIMessage,
HumanMessage,
SystemMessage
)
human_template = """
{text}
"""
human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
system_text = """You are an expert technical editor specializing in machine learning books written for machine learning engineers and data scientist. You are especially good at cutting clutter.
- Improve grammar and language
- fix errors
- cut clutter
- keep tone and voice
- don't change markdown syntax, e.g. keep [@reference]
- never cut jokes
- output 1 line per sentence (same as input)
"""
system_prompt = SystemMessage(content=system_text)
keyfile = "oai.key"
with open(keyfile, 'r') as f:
key = f.read().strip()
# If you get timeouts, you might have to increase timeout parameter
llm = ChatOpenAI(openai_api_key=key, model="gpt-4", request_timeout=240)
def process_file(input_file):
output_file = os.path.splitext(input_file)[0] + ".qmd"
with open(input_file, 'r') as f:
content = f.read()
# Markdown splitter didn't work so well
#splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=0)
splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=0)
docs = splitter.split_text(content)
print("Split into {} docs".format(len(docs)))
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_message_prompt])
with open(output_file, 'w') as f:
for doc in docs:
result = llm(chat_prompt.format_prompt(text=doc).to_messages())
print(result.content)
f.write(result.content + '\n')
print(f"Edited file saved as {output_file}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python script.py input_file")
else:
input_file = sys.argv[1]
process_file(input_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment