Skip to content

Instantly share code, notes, and snippets.

@lukestanley
Created February 14, 2024 00:31
Show Gist options
  • Save lukestanley/eb1037478b1129a5ca0560eea761967e to your computer and use it in GitHub Desktop.
Save lukestanley/eb1037478b1129a5ca0560eea761967e to your computer and use it in GitHub Desktop.
A method to get easily parsable conversations from a ChatGPT data export of Feb 2024 vintage
def extract_messages(file_path="conversations.json", conversation_limit=None, message_limit=None):
with open(file_path, "r") as file:
data = json.load(file)
extracted_conversations = []
conv_titles = []
# Use the specified limits or the entire data length if no limit is specified
conversation_count = conversation_limit if conversation_limit else len(data)
# Iterate over conversations within the specified limit
for conversation in data[:conversation_count]:
title = conversation.get("title", "")
conv_titles.append(title)
extracted_messages = []
message_count = 0
for message_id, message_info in conversation["mapping"].items():
if (
message_limit and message_count >= message_limit
): # Apply message limit if specified
break
if message_info["message"]: # Ensure there's a message
content = message_info["message"]["content"]
if (
"parts" in content and content["parts"]
): # Ensure there are parts with content
message_string = content["parts"][0]
role = message_info["message"]["author"]["role"]
if len(message_string) > 0:
extracted_messages.append(
{"role": role, "message": message_string}
)
message_count += 1
extracted_conversations.append(extracted_messages)
return extracted_conversations, conv_titles
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment