avelican/keypoints.py

## keypoints.py
import os
import openai

prompt = "I'm a busy CEO, please summarize the following in 10-20 bullet points. Include only the most important, useful or interesting information.\n\n"
# model = 'gpt-3.5-turbo' # 'gpt-4' # moved to gpt_chat_completion since we use both
CONTEXT_SIZE = 4096 # 2048 for GPT-3 (as half the input.
# CONTEXT_SIZE = 1024 # tiny context for testing
MAX_TOKENS_IN = int(CONTEXT_SIZE*2/3)
MAX_TOKENS_OUT = CONTEXT_SIZE - MAX_TOKENS_IN

def get_file_size(file_path):
    try:
        file_size = os.path.getsize(file_path)
        return file_size
    except OSError as e:
        print(f"Error: {e}")
        return None

def counttokens(text):
	return int(len(text)/3) # todo tiktoken

def gpt(text, gpt4=False):
	if gpt4:
		return gpt_chat_completion(text, gpt4=True)
	else:
		return gpt_chat_completion(text)

def gpt_chat_completion(text, gpt4=False):
	model = 'gpt-4' if gpt4 else 'gpt-3.5-turbo'
	max_tokens_out = MAX_TOKENS_OUT # had to make a 2nd variable so we can double it if needed
	if gpt4:
		model = 'gpt-4'
		max_tokens_out *= 2

	# print('===\n\nSummarize:\n\n' + text + '\n\n=====')
	# this version gets the key points
	# note: the prompt is already included in text
	messages = [
		{"role": "system", "content": "You are a helpful assistant."},
		{"role": "user", "content": text}
	]

	temperature = 0
	# max_tokens = 500

	response = openai.ChatCompletion.create(
		model=model,
		messages=messages,
		temperature=temperature,
		max_tokens=max_tokens_out
	)
	return response.choices[0].message["content"]

def save_key_points(filename_in, filename_out, gpt4=False):
	max_length_in = MAX_TOKENS_IN * 3
	if gpt4:
		max_length_in *= 2
	chunk = prompt

	with open(filename_in, "r", encoding='utf-8') as input_file, open(filename_out, "w", encoding='utf-8', buffering=1) as output_file: # buffering=1 means write each line
		for line in input_file:
			line = line.strip()
			if len(chunk) + len(line) > max_length_in:
				summary = gpt(chunk, gpt4)
				output_file.write(summary + '\n\n')
				# output_file.write(chunk) # debug
				# exit()
				chunk = prompt

			chunk += ' ' + line
			# chunk += '\n' + line # debug


		# Generating the summary for the remaining chunk (if any)
		if len(chunk) > len(prompt):
			summary = gpt(chunk, gpt4)
			output_file.write(summary + '\n\n')
			# output_file.write(chunk) # debug

def main():
	print('starting')
	if not os.path.exists('summary.txt'):
		save_key_points('input.txt', 'summary.txt')
	else:
		print('Found summary.txt, skipping phase 1')

	if not os.path.exists('summary-summary.txt'):
		save_key_points('summary.txt', 'summary-summary.txt', gpt4=True)
	else:
		print('Found summary-summary.txt, skipping phase 2')
	print('done')

if __name__ == "__main__":
	main()
	import os
	import openai

	prompt = "I'm a busy CEO, please summarize the following in 10-20 bullet points. Include only the most important, useful or interesting information.\n\n"
	# model = 'gpt-3.5-turbo' # 'gpt-4' # moved to gpt_chat_completion since we use both
	CONTEXT_SIZE = 4096 # 2048 for GPT-3 (as half the input.
	# CONTEXT_SIZE = 1024 # tiny context for testing
	MAX_TOKENS_IN = int(CONTEXT_SIZE*2/3)
	MAX_TOKENS_OUT = CONTEXT_SIZE - MAX_TOKENS_IN

	def get_file_size(file_path):
	try:
	file_size = os.path.getsize(file_path)
	return file_size
	except OSError as e:
	print(f"Error: {e}")
	return None

	def counttokens(text):
	return int(len(text)/3) # todo tiktoken

	def gpt(text, gpt4=False):
	if gpt4:
	return gpt_chat_completion(text, gpt4=True)
	else:
	return gpt_chat_completion(text)

	def gpt_chat_completion(text, gpt4=False):
	model = 'gpt-4' if gpt4 else 'gpt-3.5-turbo'
	max_tokens_out = MAX_TOKENS_OUT # had to make a 2nd variable so we can double it if needed
	if gpt4:
	model = 'gpt-4'
	max_tokens_out *= 2

	# print('===\n\nSummarize:\n\n' + text + '\n\n=====')
	# this version gets the key points
	# note: the prompt is already included in text
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": text}
	]

	temperature = 0
	# max_tokens = 500

	response = openai.ChatCompletion.create(
	model=model,
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens_out
	)
	return response.choices[0].message["content"]

	def save_key_points(filename_in, filename_out, gpt4=False):
	max_length_in = MAX_TOKENS_IN * 3
	if gpt4:
	max_length_in *= 2
	chunk = prompt

	with open(filename_in, "r", encoding='utf-8') as input_file, open(filename_out, "w", encoding='utf-8', buffering=1) as output_file: # buffering=1 means write each line
	for line in input_file:
	line = line.strip()
	if len(chunk) + len(line) > max_length_in:
	summary = gpt(chunk, gpt4)
	output_file.write(summary + '\n\n')
	# output_file.write(chunk) # debug
	# exit()
	chunk = prompt

	chunk += ' ' + line
	# chunk += '\n' + line # debug


	# Generating the summary for the remaining chunk (if any)
	if len(chunk) > len(prompt):
	summary = gpt(chunk, gpt4)
	output_file.write(summary + '\n\n')
	# output_file.write(chunk) # debug

	def main():
	print('starting')
	if not os.path.exists('summary.txt'):
	save_key_points('input.txt', 'summary.txt')
	else:
	print('Found summary.txt, skipping phase 1')

	if not os.path.exists('summary-summary.txt'):
	save_key_points('summary.txt', 'summary-summary.txt', gpt4=True)
	else:
	print('Found summary-summary.txt, skipping phase 2')
	print('done')

	if __name__ == "__main__":
	main()