Skip to content

Instantly share code, notes, and snippets.

@qfeuilla
Last active August 22, 2023 17:43
Show Gist options
  • Save qfeuilla/06cc7f2980a87b2b265725a849acb56c to your computer and use it in GitHub Desktop.
Save qfeuilla/06cc7f2980a87b2b265725a849acb56c to your computer and use it in GitHub Desktop.
Extract info from ChatGPT history
# Step 1: Download ChatGPT history
# Go to ChatGPT -> click on profile (...) -> Settings & Beta -> Data controls -> Export data
# You should receive a zip by email from OpenAI
PATH_TO_EXPORT_CONVERSATION = "" # Set this as the path of the "conversations.json" file in the zip you received
# Step 2: Execute this code
import pandas as pd
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
all_text = ""
data = pd.read_json(f"{PATH_TO_EXPORT_CONVERSATION}").sort_values(by="create_time", ascending=True)
days_of_interactions = (data.iloc[-1]["create_time"] - data.iloc[0]["create_time"]).days
print(f"""Amount of days between first and last interactions: {days_of_interactions}""")
for row in data.iterrows():
mapping = row[1]["mapping"]
for i in mapping:
try:
for text in mapping[i]["message"]["content"]["parts"]:
all_text += text + " "
except:
pass
tokens = encoding.encode(all_text)
all_text = all_text.split()
print(f"Amount of tokens: {len(tokens)}")
# 250 words per pages
print(f"Estimated number of pages: {len(all_text) / 250} pages")
print(f"Estimated number of pages per weekdays: {len(all_text) / 250 / ((days_of_interactions / 7) * 5)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment