Skip to content

Instantly share code, notes, and snippets.

@chrisclark
Created June 29, 2023 00:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrisclark/612ab8fa9c4c6dd5a85c1529162e0efd to your computer and use it in GitHub Desktop.
Save chrisclark/612ab8fa9c4c6dd5a85c1529162e0efd to your computer and use it in GitHub Desktop.
import json, os, requests, openai
openai_api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = openai_api_key
chunks_dir = "novel/chunks/"
edits_dir = "novel/edits/"
#helper function to process files in a directory
def process_directory(d, func):
for filename in sorted(os.listdir(chunks_dir)):
file_path = os.path.join(chunks_dir, filename)
name, _ = os.path.splitext(filename)
if name == '.DS_Store': continue
with open(file_path, 'r') as f:
func(name, f)
def write_chunk(chunk, line_index):
padded_name = 'chunk_{:0{padding}d}.txt'.format(line_index, padding=5)
with open(os.path.join(chunks_dir, padded_name), "w") as chunk_file:
chunk_file.write(chunk)
def chunkify(file_path, max_words=1000):
chunk = ""
word_count = 0
chunk_index = 0
with open(file_path, 'r') as file:
for i, line in enumerate(file):
line_word_count = len(line.split())
if word_count + line_word_count <= max_words:
chunk += line
word_count += line_word_count
else:
write_chunk(chunk, chunk_index)
chunk = line
word_count = line_word_count
chunk_index = i
# write the last chunk
write_chunk(chunk, chunk_index)
chunkify(os.path.join('novel/', "novel.txt"))
prompt = """
You are a copy editor looking for issues in a novel before it is submitted to publishers.
You are looking for obvious grammar and spelling issues and any information that is obviously incorrect. Do not make suggestions related to style, or edits for clarity. Just focus on copy errors.
Please format your responses as a series of bullet points. Start with a quote of a few words from the novel that you are copy-editing (so it's easy to find in the novel), then follow with your comments/corrections.
Here are some examples of good copy edits:
- "an two hundred year" -> "a two hundred year"
- "on to the veranda" -> "onto the veranda"
- "full memory of the night" -> "full memories of the night"
- "Her and Luis's dog" -> "Her and Luis' dog"
- "Felicia stiffened almost indecipherably." -> "Felicia stiffened almost imperceptibly."
- "The is the family kitchen." -> "This is the family kitchen."
Do not suggest substitutions of one type of punctuation mark for another. For example, do not suggest replacing ` with ', or “ with ".
"""
def copy_edit(p):
messages= [
{"role": "system", "content": prompt},
{"role": "user", "content": f'Here is a chunk of the novel to copy edit: {p}'}
]
resp = openai.ChatCompletion.create(
model="gpt-4",
messages=messages,
temperature=0
)
return resp['choices'][0]['message']['content']
def copy_edit_file(name, f):
print(f"Processing file: {name}...")
edits = copy_edit(f.read())
with open(os.path.join(edits_dir, f"{name}_edits.txt"), "w") as edits_file:
edits_file.write(edits)
print(f"Done.")
process_directory(chunks_dir, copy_edit_file)
def is_real_correction(input_str):
left_side = input_str.split('->')[0].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
right_side = input_str.split('->')[1].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
if left_side.endswith('"') and not right_side.endswith('"'):
left_side = left_side[:-1]
if left_side != right_side and 'remove extra space' not in right_side:
return f'- {left_side} -> {right_side}'
def post_process(name, f):
lines = f.readlines()
with open(os.path.join('novel/', "final_edits.txt"), "a") as edits_file:
for line in lines:
result = is_real_correction(line.strip())
if result:
edits_file.write(result + '\n')
process_directory(edits_dir, post_process)
def find_hallucinations()
hallucinations = []
with open(os.path.join('novel/', "novel.txt"), "r") as f:
novel = f.read()
with open(os.path.join('novel/', "consolidated_edits.txt"), "r") as edits_file:
lines = edits_file.readlines()
for line in lines:
left_side = line.split('->')[0].strip(' -"')
# deal with extra " character
if left_side not in novel and left_side[:-1] not in novel:
hallucinations.append(left_side)
return hallucinations
print(find_hallucinations())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment