Last active
August 22, 2023 17:43
-
-
Save qfeuilla/06cc7f2980a87b2b265725a849acb56c to your computer and use it in GitHub Desktop.
Extract info from ChatGPT history
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Step 1: Download ChatGPT history | |
# Go to ChatGPT -> click on profile (...) -> Settings & Beta -> Data controls -> Export data | |
# You should receive a zip by email from OpenAI | |
PATH_TO_EXPORT_CONVERSATION = "" # Set this as the path of the "conversations.json" file in the zip you received | |
# Step 2: Execute this code | |
import pandas as pd | |
import tiktoken | |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
all_text = "" | |
data = pd.read_json(f"{PATH_TO_EXPORT_CONVERSATION}").sort_values(by="create_time", ascending=True) | |
days_of_interactions = (data.iloc[-1]["create_time"] - data.iloc[0]["create_time"]).days | |
print(f"""Amount of days between first and last interactions: {days_of_interactions}""") | |
for row in data.iterrows(): | |
mapping = row[1]["mapping"] | |
for i in mapping: | |
try: | |
for text in mapping[i]["message"]["content"]["parts"]: | |
all_text += text + " " | |
except: | |
pass | |
tokens = encoding.encode(all_text) | |
all_text = all_text.split() | |
print(f"Amount of tokens: {len(tokens)}") | |
# 250 words per pages | |
print(f"Estimated number of pages: {len(all_text) / 250} pages") | |
print(f"Estimated number of pages per weekdays: {len(all_text) / 250 / ((days_of_interactions / 7) * 5)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment