chrisclark/chatgpt-copy-editing.py

## chatgpt-copy-editing.py
import json, os, requests, openai

openai_api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = openai_api_key

chunks_dir = "novel/chunks/"
edits_dir = "novel/edits/"

#helper function to process files in a directory
def process_directory(d, func):
    for filename in sorted(os.listdir(chunks_dir)):
        file_path = os.path.join(chunks_dir, filename)
        name, _ = os.path.splitext(filename)
        if name == '.DS_Store': continue
        with open(file_path, 'r') as f:
            func(name, f)


def write_chunk(chunk, line_index):
    padded_name = 'chunk_{:0{padding}d}.txt'.format(line_index, padding=5)
    with open(os.path.join(chunks_dir, padded_name), "w") as chunk_file:
        chunk_file.write(chunk)

def chunkify(file_path, max_words=1000):
    chunk = ""
    word_count = 0
    chunk_index = 0

    with open(file_path, 'r') as file:
        for i, line in enumerate(file):
            line_word_count = len(line.split())
            if word_count + line_word_count <= max_words:
                chunk += line
                word_count += line_word_count
            else:
                write_chunk(chunk, chunk_index)
                chunk = line
                word_count = line_word_count
                chunk_index = i

    # write the last chunk
    write_chunk(chunk, chunk_index)

chunkify(os.path.join('novel/', "novel.txt"))


prompt = """
You are a copy editor looking for issues in a novel before it is submitted to publishers.

You are looking for obvious grammar and spelling issues and any information that is obviously incorrect. Do not make suggestions related to style, or edits for clarity. Just focus on copy errors.

Please format your responses as a series of bullet points. Start with a quote of a few words from the novel that you are copy-editing (so it's easy to find in the novel), then follow with your comments/corrections.

Here are some examples of good copy edits:

- "an two hundred year" -> "a two hundred year"
- "on to the veranda" -> "onto the veranda"
- "full memory of the night" -> "full memories of the night"
- "Her and Luis's dog" -> "Her and Luis' dog"
- "Felicia stiffened almost indecipherably." -> "Felicia stiffened almost imperceptibly."
- "The is the family kitchen." -> "This is the family kitchen."

Do not suggest substitutions of one type of punctuation mark for another. For example, do not suggest replacing ` with ', or “ with ".
"""

def copy_edit(p):
    messages= [
        {"role": "system", "content": prompt},
        {"role": "user", "content": f'Here is a chunk of the novel to copy edit: {p}'}
    ]

    resp = openai.ChatCompletion.create(
      model="gpt-4",
      messages=messages,
      temperature=0
    )
    return resp['choices'][0]['message']['content']

def copy_edit_file(name, f):
    print(f"Processing file: {name}...")
    edits = copy_edit(f.read())
    with open(os.path.join(edits_dir, f"{name}_edits.txt"), "w") as edits_file:
        edits_file.write(edits)
    print(f"Done.")

process_directory(chunks_dir, copy_edit_file)

def is_real_correction(input_str):
    left_side = input_str.split('->')[0].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
    right_side = input_str.split('->')[1].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
    if left_side.endswith('"') and not right_side.endswith('"'):
        left_side = left_side[:-1]
    if left_side != right_side and 'remove extra space' not in right_side:
        return f'- {left_side} -> {right_side}'


def post_process(name, f):
    lines = f.readlines()
    with open(os.path.join('novel/', "final_edits.txt"), "a") as edits_file:
        for line in lines:
            result = is_real_correction(line.strip())
            if result:
                edits_file.write(result + '\n')

process_directory(edits_dir, post_process)

def find_hallucinations()
    hallucinations = []
    with open(os.path.join('novel/', "novel.txt"), "r") as f:
        novel = f.read()

    with open(os.path.join('novel/', "consolidated_edits.txt"), "r") as edits_file:
        lines = edits_file.readlines()
        for line in lines:
            left_side = line.split('->')[0].strip(' -"')
            # deal with extra " character
            if left_side not in novel and left_side[:-1] not in novel:
                hallucinations.append(left_side)
    return hallucinations
print(find_hallucinations())
	import json, os, requests, openai

	openai_api_key = os.getenv('OPENAI_API_KEY')
	openai.api_key = openai_api_key

	chunks_dir = "novel/chunks/"
	edits_dir = "novel/edits/"

	#helper function to process files in a directory
	def process_directory(d, func):
	for filename in sorted(os.listdir(chunks_dir)):
	file_path = os.path.join(chunks_dir, filename)
	name, _ = os.path.splitext(filename)
	if name == '.DS_Store': continue
	with open(file_path, 'r') as f:
	func(name, f)


	def write_chunk(chunk, line_index):
	padded_name = 'chunk_{:0{padding}d}.txt'.format(line_index, padding=5)
	with open(os.path.join(chunks_dir, padded_name), "w") as chunk_file:
	chunk_file.write(chunk)

	def chunkify(file_path, max_words=1000):
	chunk = ""
	word_count = 0
	chunk_index = 0

	with open(file_path, 'r') as file:
	for i, line in enumerate(file):
	line_word_count = len(line.split())
	if word_count + line_word_count <= max_words:
	chunk += line
	word_count += line_word_count
	else:
	write_chunk(chunk, chunk_index)
	chunk = line
	word_count = line_word_count
	chunk_index = i

	# write the last chunk
	write_chunk(chunk, chunk_index)

	chunkify(os.path.join('novel/', "novel.txt"))


	prompt = """
	You are a copy editor looking for issues in a novel before it is submitted to publishers.

	You are looking for obvious grammar and spelling issues and any information that is obviously incorrect. Do not make suggestions related to style, or edits for clarity. Just focus on copy errors.

	Please format your responses as a series of bullet points. Start with a quote of a few words from the novel that you are copy-editing (so it's easy to find in the novel), then follow with your comments/corrections.

	Here are some examples of good copy edits:

	- "an two hundred year" -> "a two hundred year"
	- "on to the veranda" -> "onto the veranda"
	- "full memory of the night" -> "full memories of the night"
	- "Her and Luis's dog" -> "Her and Luis' dog"
	- "Felicia stiffened almost indecipherably." -> "Felicia stiffened almost imperceptibly."
	- "The is the family kitchen." -> "This is the family kitchen."

	Do not suggest substitutions of one type of punctuation mark for another. For example, do not suggest replacing ` with ', or “ with ".
	"""

	def copy_edit(p):
	messages= [
	{"role": "system", "content": prompt},
	{"role": "user", "content": f'Here is a chunk of the novel to copy edit: {p}'}
	]

	resp = openai.ChatCompletion.create(
	model="gpt-4",
	messages=messages,
	temperature=0
	)
	return resp['choices'][0]['message']['content']

	def copy_edit_file(name, f):
	print(f"Processing file: {name}...")
	edits = copy_edit(f.read())
	with open(os.path.join(edits_dir, f"{name}_edits.txt"), "w") as edits_file:
	edits_file.write(edits)
	print(f"Done.")

	process_directory(chunks_dir, copy_edit_file)

	def is_real_correction(input_str):
	left_side = input_str.split('->')[0].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
	right_side = input_str.split('->')[1].strip(' -"').replace('’', "'").replace('“', '"').replace('”', '"')
	if left_side.endswith('"') and not right_side.endswith('"'):
	left_side = left_side[:-1]
	if left_side != right_side and 'remove extra space' not in right_side:
	return f'- {left_side} -> {right_side}'


	def post_process(name, f):
	lines = f.readlines()
	with open(os.path.join('novel/', "final_edits.txt"), "a") as edits_file:
	for line in lines:
	result = is_real_correction(line.strip())
	if result:
	edits_file.write(result + '\n')

	process_directory(edits_dir, post_process)

	def find_hallucinations()
	hallucinations = []
	with open(os.path.join('novel/', "novel.txt"), "r") as f:
	novel = f.read()

	with open(os.path.join('novel/', "consolidated_edits.txt"), "r") as edits_file:
	lines = edits_file.readlines()
	for line in lines:
	left_side = line.split('->')[0].strip(' -"')
	# deal with extra " character
	if left_side not in novel and left_side[:-1] not in novel:
	hallucinations.append(left_side)
	return hallucinations
	print(find_hallucinations())