Skip to content

Instantly share code, notes, and snippets.

@amane-katagiri
Last active July 31, 2023 06:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amane-katagiri/e68857257e144544b9cca4f3ab7fdabf to your computer and use it in GitHub Desktop.
Save amane-katagiri/e68857257e144544b9cca4f3ab7fdabf to your computer and use it in GitHub Desktop.
List all conversation lines from exported data of ChatGPT.
#!/usr/bin/env python3
# Usage: ./export_chatgpt.py conversations.json ./path/to/export/dir
from dataclasses import dataclass
import itertools
import json
import logging
from logging import getLogger, StreamHandler
import sys
__author__ = "Amane Katagiri"
__author_email__ = "amane@ama.ne.jp"
__url__ = ""
__copyright__ = "Copyright (C) 2023 Amane Katagiri"
__credits__ = [
"Amane Katagiri",
]
__date__ = "2023-07-31"
__license__ = "MIT License"
__version__ = "0.0.1"
logger = getLogger(__name__)
stream_handler = StreamHandler()
stream_handler.setLevel(logging.DEBUG)
logger.setLevel(logging.DEBUG)
logger.addHandler(stream_handler)
@dataclass
class Event:
author: str
content: str
@dataclass
class Conversation:
event: Event
parent: str | None
children: list[str]
@dataclass
class Session:
title: str
mapping: dict[str, Conversation]
def new_session(session: dict) -> Session:
return Session(
session.get("title", "untitled"),
dict(
map(
lambda conversation: [
conversation["id"],
Conversation(
Event(
(conversation.get("message") or {})
.get("author", {})
.get("role", "someone"),
"\n".join(
(conversation.get("message") or {})
.get("content", {})
.get("parts", [])
),
),
conversation.get("parent"),
conversation["children"],
),
],
session["mapping"].values(),
)
),
)
def parse(cursor: str, conversations: dict[str, Conversation]) -> list[list[Event]]:
if cursor not in conversations:
return [[]]
children = conversations[cursor].children
cursor_event = conversations[cursor].event
if len(children) == 0:
return [[cursor_event]]
parsed = itertools.chain.from_iterable(
map(lambda child: parse(child, conversations), children)
)
return [
*map(
lambda event: [
*filter(lambda event: len(event.content) != 0, [cursor_event, *event])
],
parsed,
)
]
def main(infile: str, outdir: str):
with open(infile, encoding="utf-8") as file:
sessions: list[Session] = [
*map(
new_session,
json.load(file),
)
]
for i, session in enumerate(sessions):
title = session.title
conversations = session.mapping
root_id = next(
filter(
lambda conversation: conversation[1].parent is None,
conversations.items(),
)
)[0]
flattened = map(
lambda conversation: "\n\n".join(
map(lambda event: f"**{event.author}**: {event.content}", conversation)
),
parse(root_id, conversations),
)
for j, conversation in enumerate(flattened):
with open(
f"{outdir}/{i:03d}_{title}_{j:03d}.md", "w", encoding="utf-8"
) as file:
file.write(f"# {title} {i}\n\n" + conversation + "\n")
if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment