Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save 14790897/29533887b0b5aba7c8cecb419c4be72d to your computer and use it in GitHub Desktop.
Save 14790897/29533887b0b5aba7c8cecb419c4be72d to your computer and use it in GitHub Desktop.
提取ChatGPT官方json数据中的完整对话
import json
def find_bottom_most_node(conversation_data):
"""
Finds the bottom-most node in the conversation, which is the node without any children.
:param conversation_data: The conversation data in a nested dictionary format.
:return: The ID of the bottom-most node, or None if not found.
"""
if not isinstance(conversation_data, dict):
raise TypeError("Conversation data must be a dictionary.")
try:
for node_id, node in reversed(conversation_data.items()):
# Ensure that 'node' is a dictionary
if not isinstance(node, dict):
raise TypeError(f"Node {node_id} is not a dictionary.")
if "children" not in node:
raise KeyError(f"'children' key not found in node {node_id}.")
if not node.get("children"): # Check if the node has no children
return node_id
except Exception as e:
print(f"Error finding bottom-most node: {e}")
return None
def extract_conversation_path(conversation_data, start_node_id):
"""
Extracts the conversation path starting from the given node and going up to the root.
:param conversation_data: The conversation data in a nested dictionary format.
:param start_node_id: The ID of the starting node (bottom-most node in the conversation).
:return: A list of nodes representing the conversation path from the start node to the root.
"""
path = []
current_node_id = start_node_id
while current_node_id:
# Get the current node
node = conversation_data.get(current_node_id)
if node is None:
break # Node not found
# Add the current node to the path
path.append(node)
# Move to the parent node
current_node_id = node.get("parent")
return path[::-1] # Reverse the path to start from the root
def extract_full_conversation(conversation_data, start_node_id):
"""
Extracts the full conversation path along with message contents, starting from the given node and going up to the root.
:param conversation_data: The conversation data in a nested dictionary format.
:param start_node_id: The ID of the starting node (bottom-most node in the conversation).
:return: A list of tuples, each containing the node ID and its message content, representing the conversation path.
"""
full_conversation = []
current_node_id = start_node_id
while current_node_id:
# Get the current node
node = conversation_data.get(current_node_id)
if node is None:
break # Node not found
# Extracting the message content if available
message = node.get("message", {})
content = message.get("content", {})
parts = content.get("parts", [])
message_text = " ".join(
parts
) # Joining parts to form the complete message text
print("message_text:", message_text)
# Add the node ID and message text to the conversation
full_conversation.append((current_node_id, message_text))
# Move to the parent node
current_node_id = node.get("parent")
return full_conversation[::-1] # Reverse to start from the root
file_path = "./example.json" # Replace with the actual file path
try:
# Load JSON data from the file
with open(file_path, "r", encoding="utf-8") as file:
conversation_data = json.load(file)
# print(conversation_data)
except Exception as e:
print(f"Error reading or processing the file: {e}")
conversation_data = conversation_data["mapping"] # Convert to a dictionary~
# 获取最底部节点的ID
bottom_most_node_id = find_bottom_most_node(conversation_data)
print("find bottom_most_node_id:", bottom_most_node_id)
# 然后使用这个ID来提取对话路径
if bottom_most_node_id:
conversation_text = extract_full_conversation(
conversation_data, bottom_most_node_id
)
# 指定要保存的文件名
file_name = "conversation_output.txt"
# 使用 with 语句打开文件,确保文件正确关闭
with open(file_name, "w", encoding="utf-8") as file:
# 遍历对话中的每个节点
for node_id, message_content in conversation_text:
# 将节点ID和消息内容写入文件
file.write(f"Node ID: {node_id}\nMessage Content:\n{message_content}\n\n")
else:
print("No bottom-most node found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment