Skip to content

Instantly share code, notes, and snippets.

@Elijah-Bodden
Created May 28, 2024 00:12
Show Gist options
  • Save Elijah-Bodden/5307365cbf35a3250c77e43e63669c66 to your computer and use it in GitHub Desktop.
Save Elijah-Bodden/5307365cbf35a3250c77e43e63669c66 to your computer and use it in GitHub Desktop.
import json
filenames = []
out_name = "processed.json"
myname = "YOURDISCORDUSERNAME"
for filename in filenames:
with open(filename, encoding="utf-8") as file:
data = json.load(file)["messages"]
newdata = [[data[i]["author"]["name"], data[i]["content"]] for i in range(len(data)) if data[i]["content"] != ""]
merged = []
current_name = newdata[0][0]
acc = ""
for i in newdata:
if i[0] != current_name:
merged += [acc]
current_name = i[0]
acc = ""
acc += i[1] + "\n"
if newdata[0][0] == myname:
merged = merged[1:]
# Iterate over merged, two messages at a time
# Make an array of exchanges 10 pairs in length
long_conversation = []
for i in range(0, len(merged) - 1, 2):
long_conversation += [{"from": "human", "value": merged[i]}, {"from": "gpt", "value": merged[i+1]}]
jsons = []
while len(long_conversation) > 0:
chunk_size = 10
chunk = long_conversation[:chunk_size * 2]
jsons += [{"conversations": chunk}]
long_conversation = long_conversation[len(chunk):]
# Add Jsons to output file, with each json on a new line
with open(out_name, "a", encoding="utf-8") as f:
for this_json in jsons:
JSON = json.dumps(this_json) + "\n"
f.write(JSON)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment