Skip to content

Instantly share code, notes, and snippets.

@PriNova
Last active October 30, 2024 13:06
Show Gist options
  • Select an option

  • Save PriNova/d3d9393eb84d112106703820146c11b4 to your computer and use it in GitHub Desktop.

Select an option

Save PriNova/d3d9393eb84d112106703820146c11b4 to your computer and use it in GitHub Desktop.
This transforms an exported chat history from Cody into separate Markdown formatted files
"""
Processes a JSON file containing chat history and converts each session to a Markdown file.
Args:
input_json (str): Path to the input JSON file containing chat history.
output_dir (str): Directory where the generated Markdown files will be saved.
Raises:
FileNotFoundError: If the input JSON file does not exist.
json.JSONDecodeError: If the JSON file is improperly formatted.
# Load and parse the JSON file
with open(input_json, "r", encoding="utf-8") as f:
chat_history = json.load(f)
# Process each session
for session in chat_history:
create_markdown_file(session, output_dir)
"""
import argparse
import json
from pathlib import Path
def create_markdown_file(session, output_dir):
"""
Generates a Markdown file from a chat session object,
with separate sections for human and assistant messages.
Args:
session (dict): A dictionary representing a chat session, containing a list of interactions.
output_dir (str): The output directory where the Markdown file will be saved.
Returns:
str: The filename of the generated Markdown file.
"""
# Create output directory if it doesn't exist
Path(output_dir).mkdir(exist_ok=True)
# Extract session ID and create filename
session_id = session.get("id", "unknown_session")
filename = Path(output_dir) / f"chat_session_{session_id}.md"
# Start building markdown content
markdown_content = [f"# Chat Session {session_id}\n\n"]
# Process each interaction
for interaction in session.get("interactions", []):
# Extract human message
if "humanMessage" in interaction:
human_text = interaction["humanMessage"]["text"]
markdown_content.append("## Human\n")
markdown_content.append(f"{human_text}\n\n")
# Extract assistant message
if "assistantMessage" in interaction:
assistant_text = interaction["assistantMessage"]["text"]
markdown_content.append("## Assistant\n")
markdown_content.append(f"{assistant_text}\n\n")
# Write to file
with open(filename, "w", encoding="utf-8") as f:
f.write("".join(markdown_content))
return filename
def process_chat_history(input_json, output_dir):
"""
Processes the chat history in a given input JSON file
and generates Markdown files for each chat session,
saving them in the specified output directory.
Args:
input_json (str): The path to the input JSON file containing the chat history.
output_dir (str): The path to the output directory where the Markdown files will be saved.
Returns:
list: A list of the filenames of the generated Markdown files.
"""
# Read and parse JSON file
with open(input_json, "r", encoding="utf-8") as f:
chat_history = json.load(f)
# Process each session
generated_files = []
for session in chat_history:
if isinstance(session, dict): # Skip if it's not a valid session object
filename = create_markdown_file(session, output_dir)
generated_files.append(filename)
return generated_files
# Execute the script
if __name__ == "__main__":
# Create argument parser
parser = argparse.ArgumentParser(
description="Convert chat history JSON to markdown files"
)
parser.add_argument(
"-i",
"--input",
default="data/ChatHistory.json",
help="Input JSON file path (default: data/ChatHistory.json)",
)
parser.add_argument(
"-o",
"--output",
default="data/chat_sessions",
help="Output directory for markdown files (default: data/chat_sessions)",
)
# Parse arguments
args = parser.parse_args()
# Process the chat history
processed_files = process_chat_history(args.input, args.output)
print(f"Generated {len(processed_files)} markdown files:")
for file in processed_files:
print(f"- {file}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment