Skip to content

Instantly share code, notes, and snippets.

@e-p-armstrong
Last active May 6, 2024 03:04
Show Gist options
  • Save e-p-armstrong/edfe99929e5ae0fbf7088c482e5caf0e to your computer and use it in GitHub Desktop.
Save e-p-armstrong/edfe99929e5ae0fbf7088c482e5caf0e to your computer and use it in GitHub Desktop.
import json
import traceback
# Was this helpful? I have another open source project you can check out if you're interested at https://github.com/e-p-armstrong/augmentoolkit/tree/master
def process_openai_data(input_file, output_file, system_prompt_func):
with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
data = json.load(f_in)
for obj in data:
try:
title = obj['title']
system_prompt = system_prompt_func(title)
conversation = [{"from": "system", "value": system_prompt}]
for node_id, node in obj['mapping'].items():
if 'message' in node and node['message']:
message = node['message']
if message['author']['role'] == 'user':
if any(isinstance(part, dict) and part.get('content_type') == 'code' for part in message['content']['parts']):
break
if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
break
conversation.append({"from": "human", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})
elif message['author']['role'] == 'assistant':
if any(isinstance(part, dict) and part.get('content_type') == 'image_asset_pointer' for part in message['content']['parts']):
break
conversation.append({"from": "gpt", "value": message['content']['parts'][0] if isinstance(message['content']['parts'][0], str) else message['content']['parts'][0]['content']})
if len(conversation) > 1:
f_out.write(json.dumps({"conversations": conversation}) + '\n')
except Exception as e:
print(f"Error processing conversation: {str(e)}")
traceback.print_exc()
def generate_system_prompt(title):
return f"The following conversation is related to the topic: {title}"
if __name__ == "__main__":
input_file = 'gpt_data_export/conversations.json'
output_file = 'gpt_data_export.json'
process_openai_data(input_file, output_file, generate_system_prompt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment