Created
July 1, 2024 14:47
-
-
Save RamenJunkie/f294b952ca71d48eab090bccf2849a17 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple script that takes an exported tweets.js file and outputs it to a markdown text file for archiving. | |
# In pulling data for this, I noticed that older Twitter exports use a csv file instead of a .js file. | |
# As such, this is for newer exports. | |
# The Tweets.js file is in the 'data' directory of a standard Twitter archive export file. | |
# Open the tweet.js file containing all the tweets, should eb in the same folder | |
with open("tweets.js", encoding="utf-8") as file: | |
filedata = file.readlines() | |
tweet_data = [] | |
current_tweet = [] | |
# The Tweets don't seem to be in order, so I needed to sort them out, this is admitedly ugly | |
# but I only need to cover so many years of sorting and this was the easiest way to avoid index errors | |
sorted_tweets = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []] | |
# Does a simple search through the file. It pulls out the date posted and the full text. | |
# This does not do anything with images, sorry, that gets more complicated, it would be doable | |
for line in filedata: | |
if "created_at" in line: | |
timesplit = line.split(":") | |
posted_at = line.split("\" : \"")[1].replace(" +0000 ", " ")[:-3] | |
current_tweet.append(posted_at) | |
elif "full_text" in line: | |
current_tweet.append(line.split("\" : \"")[1][:-3]) | |
# current_tweet.append(line.split(":")[1].split("\"")[1]) | |
tweet_data.append(current_tweet) | |
current_tweet = [] | |
# Because full text is always after the date, it just moves on after it gets both | |
else: | |
pass | |
# An ugly sort, it simply looks for the year in the date, then creates an array of arrays based on year. | |
# I did it this way partly in case I wanted to output to seperate files based on year, but I can copy/paste that | |
# It probably is still out of order based on date, but whatever, I just want a simple archive file | |
for each in tweet_data: | |
for year in range(2004, 2026): | |
if str(year) in each[0]: | |
sorted_tweets[year - 2004].append(each) | |
# Prints the output and dumps it to a file. | |
with open("output.md", encoding="utf-8", mode="w") as output: | |
for eachyear in sorted_tweets: | |
for each in reversed(eachyear): | |
output.write(each[0] + " : " + each[1] + "\n") | |
print(each[0] + " : " + each[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment