Last active
October 30, 2024 13:06
-
-
Save PriNova/d3d9393eb84d112106703820146c11b4 to your computer and use it in GitHub Desktop.
This transforms an exported chat history from Cody into separate Markdown formatted files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Processes a JSON file containing chat history and converts each session to a Markdown file. | |
| Args: | |
| input_json (str): Path to the input JSON file containing chat history. | |
| output_dir (str): Directory where the generated Markdown files will be saved. | |
| Raises: | |
| FileNotFoundError: If the input JSON file does not exist. | |
| json.JSONDecodeError: If the JSON file is improperly formatted. | |
| # Load and parse the JSON file | |
| with open(input_json, "r", encoding="utf-8") as f: | |
| chat_history = json.load(f) | |
| # Process each session | |
| for session in chat_history: | |
| create_markdown_file(session, output_dir) | |
| """ | |
| import argparse | |
| import json | |
| from pathlib import Path | |
| def create_markdown_file(session, output_dir): | |
| """ | |
| Generates a Markdown file from a chat session object, | |
| with separate sections for human and assistant messages. | |
| Args: | |
| session (dict): A dictionary representing a chat session, containing a list of interactions. | |
| output_dir (str): The output directory where the Markdown file will be saved. | |
| Returns: | |
| str: The filename of the generated Markdown file. | |
| """ | |
| # Create output directory if it doesn't exist | |
| Path(output_dir).mkdir(exist_ok=True) | |
| # Extract session ID and create filename | |
| session_id = session.get("id", "unknown_session") | |
| filename = Path(output_dir) / f"chat_session_{session_id}.md" | |
| # Start building markdown content | |
| markdown_content = [f"# Chat Session {session_id}\n\n"] | |
| # Process each interaction | |
| for interaction in session.get("interactions", []): | |
| # Extract human message | |
| if "humanMessage" in interaction: | |
| human_text = interaction["humanMessage"]["text"] | |
| markdown_content.append("## Human\n") | |
| markdown_content.append(f"{human_text}\n\n") | |
| # Extract assistant message | |
| if "assistantMessage" in interaction: | |
| assistant_text = interaction["assistantMessage"]["text"] | |
| markdown_content.append("## Assistant\n") | |
| markdown_content.append(f"{assistant_text}\n\n") | |
| # Write to file | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write("".join(markdown_content)) | |
| return filename | |
| def process_chat_history(input_json, output_dir): | |
| """ | |
| Processes the chat history in a given input JSON file | |
| and generates Markdown files for each chat session, | |
| saving them in the specified output directory. | |
| Args: | |
| input_json (str): The path to the input JSON file containing the chat history. | |
| output_dir (str): The path to the output directory where the Markdown files will be saved. | |
| Returns: | |
| list: A list of the filenames of the generated Markdown files. | |
| """ | |
| # Read and parse JSON file | |
| with open(input_json, "r", encoding="utf-8") as f: | |
| chat_history = json.load(f) | |
| # Process each session | |
| generated_files = [] | |
| for session in chat_history: | |
| if isinstance(session, dict): # Skip if it's not a valid session object | |
| filename = create_markdown_file(session, output_dir) | |
| generated_files.append(filename) | |
| return generated_files | |
| # Execute the script | |
| if __name__ == "__main__": | |
| # Create argument parser | |
| parser = argparse.ArgumentParser( | |
| description="Convert chat history JSON to markdown files" | |
| ) | |
| parser.add_argument( | |
| "-i", | |
| "--input", | |
| default="data/ChatHistory.json", | |
| help="Input JSON file path (default: data/ChatHistory.json)", | |
| ) | |
| parser.add_argument( | |
| "-o", | |
| "--output", | |
| default="data/chat_sessions", | |
| help="Output directory for markdown files (default: data/chat_sessions)", | |
| ) | |
| # Parse arguments | |
| args = parser.parse_args() | |
| # Process the chat history | |
| processed_files = process_chat_history(args.input, args.output) | |
| print(f"Generated {len(processed_files)} markdown files:") | |
| for file in processed_files: | |
| print(f"- {file}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment