Skip to content

Instantly share code, notes, and snippets.

@RamenJunkie
Created July 1, 2024 14:47
Show Gist options
  • Save RamenJunkie/f294b952ca71d48eab090bccf2849a17 to your computer and use it in GitHub Desktop.
Save RamenJunkie/f294b952ca71d48eab090bccf2849a17 to your computer and use it in GitHub Desktop.
# A simple script that takes an exported tweets.js file and outputs it to a markdown text file for archiving.
# In pulling data for this, I noticed that older Twitter exports use a csv file instead of a .js file.
# As such, this is for newer exports.
# The Tweets.js file is in the 'data' directory of a standard Twitter archive export file.
# Open the tweet.js file containing all the tweets, should eb in the same folder
with open("tweets.js", encoding="utf-8") as file:
filedata = file.readlines()
tweet_data = []
current_tweet = []
# The Tweets don't seem to be in order, so I needed to sort them out, this is admitedly ugly
# but I only need to cover so many years of sorting and this was the easiest way to avoid index errors
sorted_tweets = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
# Does a simple search through the file. It pulls out the date posted and the full text.
# This does not do anything with images, sorry, that gets more complicated, it would be doable
for line in filedata:
if "created_at" in line:
timesplit = line.split(":")
posted_at = line.split("\" : \"")[1].replace(" +0000 ", " ")[:-3]
current_tweet.append(posted_at)
elif "full_text" in line:
current_tweet.append(line.split("\" : \"")[1][:-3])
# current_tweet.append(line.split(":")[1].split("\"")[1])
tweet_data.append(current_tweet)
current_tweet = []
# Because full text is always after the date, it just moves on after it gets both
else:
pass
# An ugly sort, it simply looks for the year in the date, then creates an array of arrays based on year.
# I did it this way partly in case I wanted to output to seperate files based on year, but I can copy/paste that
# It probably is still out of order based on date, but whatever, I just want a simple archive file
for each in tweet_data:
for year in range(2004, 2026):
if str(year) in each[0]:
sorted_tweets[year - 2004].append(each)
# Prints the output and dumps it to a file.
with open("output.md", encoding="utf-8", mode="w") as output:
for eachyear in sorted_tweets:
for each in reversed(eachyear):
output.write(each[0] + " : " + each[1] + "\n")
print(each[0] + " : " + each[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment