qfeuilla/Extract_info_from_chatgpt_logs.py

## Extract_info_from_chatgpt_logs.py
# Step 1: Download ChatGPT history
# Go to ChatGPT -> click on profile (...) -> Settings & Beta -> Data controls -> Export data
# You should receive a zip by email from OpenAI
PATH_TO_EXPORT_CONVERSATION = "" # Set this as the path of the "conversations.json" file in the zip you received
# Step 2: Execute this code
import pandas as pd
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

all_text = ""

data = pd.read_json(f"{PATH_TO_EXPORT_CONVERSATION}").sort_values(by="create_time", ascending=True)
days_of_interactions = (data.iloc[-1]["create_time"] - data.iloc[0]["create_time"]).days

print(f"""Amount of days between first and last interactions: {days_of_interactions}""")

for row in data.iterrows():
    mapping = row[1]["mapping"]
    for i in mapping:
        try:
            for text in mapping[i]["message"]["content"]["parts"]:
                all_text += text + " "
        except:
            pass

tokens = encoding.encode(all_text)
all_text = all_text.split()

print(f"Amount of tokens: {len(tokens)}")
# 250 words per pages
print(f"Estimated number of pages: {len(all_text) / 250} pages")

print(f"Estimated number of pages per weekdays: {len(all_text) / 250 / ((days_of_interactions / 7) * 5)}")
	# Step 1: Download ChatGPT history
	# Go to ChatGPT -> click on profile (...) -> Settings & Beta -> Data controls -> Export data
	# You should receive a zip by email from OpenAI
	PATH_TO_EXPORT_CONVERSATION = "" # Set this as the path of the "conversations.json" file in the zip you received
	# Step 2: Execute this code
	import pandas as pd
	import tiktoken

	encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

	all_text = ""

	data = pd.read_json(f"{PATH_TO_EXPORT_CONVERSATION}").sort_values(by="create_time", ascending=True)
	days_of_interactions = (data.iloc[-1]["create_time"] - data.iloc[0]["create_time"]).days

	print(f"""Amount of days between first and last interactions: {days_of_interactions}""")

	for row in data.iterrows():
	mapping = row[1]["mapping"]
	for i in mapping:
	try:
	for text in mapping[i]["message"]["content"]["parts"]:
	all_text += text + " "
	except:
	pass

	tokens = encoding.encode(all_text)
	all_text = all_text.split()

	print(f"Amount of tokens: {len(tokens)}")
	# 250 words per pages
	print(f"Estimated number of pages: {len(all_text) / 250} pages")

	print(f"Estimated number of pages per weekdays: {len(all_text) / 250 / ((days_of_interactions / 7) * 5)}")