RamenJunkie/tweets2markdown.py

## tweets2markdown.py
# A simple script that takes an exported tweets.js file and outputs it to a markdown text file for archiving.
# In pulling data for this, I noticed that older Twitter exports use a csv file instead of a .js file.
# As such, this is for newer exports.
# The Tweets.js file is in the 'data' directory of a standard Twitter archive export file.

# Open the tweet.js file containing all the tweets, should eb in the same folder
with open("tweets.js", encoding="utf-8") as file:
    filedata = file.readlines()

tweet_data = []
current_tweet = []
# The Tweets don't seem to be in order, so I needed to sort them out, this is admitedly ugly
# but I only need to cover so many years of sorting and this was the easiest way to avoid index errors
sorted_tweets = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

# Does a simple search through the file.  It pulls out the date posted and the full text.
# This does not do anything with images, sorry, that gets more complicated, it would be doable
for line in filedata:
    if "created_at" in line:
        timesplit = line.split(":")
        posted_at = line.split("\" : \"")[1].replace(" +0000 ", " ")[:-3]
        current_tweet.append(posted_at)
    elif "full_text" in line:
        current_tweet.append(line.split("\" : \"")[1][:-3])
        #        current_tweet.append(line.split(":")[1].split("\"")[1])
        tweet_data.append(current_tweet)
        current_tweet = []
        # Because full text is always after the date, it just moves on after it gets both
    else:
        pass

# An ugly sort, it simply looks for the year in the date, then creates an array of arrays based on year.
# I did it this way partly in case I wanted to output to seperate files based on year, but I can copy/paste that
# It probably is still out of order based on date, but whatever, I just want a simple archive file
for each in tweet_data:
    for year in range(2004, 2026):
        if str(year) in each[0]:
            sorted_tweets[year - 2004].append(each)

# Prints the output and dumps it to a file.
with open("output.md", encoding="utf-8", mode="w") as output:
    for eachyear in sorted_tweets:
        for each in reversed(eachyear):
            output.write(each[0] + " : " + each[1] + "\n")
            print(each[0] + " : " + each[1])
	# A simple script that takes an exported tweets.js file and outputs it to a markdown text file for archiving.
	# In pulling data for this, I noticed that older Twitter exports use a csv file instead of a .js file.
	# As such, this is for newer exports.
	# The Tweets.js file is in the 'data' directory of a standard Twitter archive export file.

	# Open the tweet.js file containing all the tweets, should eb in the same folder
	with open("tweets.js", encoding="utf-8") as file:
	filedata = file.readlines()

	tweet_data = []
	current_tweet = []
	# The Tweets don't seem to be in order, so I needed to sort them out, this is admitedly ugly
	# but I only need to cover so many years of sorting and this was the easiest way to avoid index errors
	sorted_tweets = [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

	# Does a simple search through the file. It pulls out the date posted and the full text.
	# This does not do anything with images, sorry, that gets more complicated, it would be doable
	for line in filedata:
	if "created_at" in line:
	timesplit = line.split(":")
	posted_at = line.split("\" : \"")[1].replace(" +0000 ", " ")[:-3]
	current_tweet.append(posted_at)
	elif "full_text" in line:
	current_tweet.append(line.split("\" : \"")[1][:-3])
	# current_tweet.append(line.split(":")[1].split("\"")[1])
	tweet_data.append(current_tweet)
	current_tweet = []
	# Because full text is always after the date, it just moves on after it gets both
	else:
	pass

	# An ugly sort, it simply looks for the year in the date, then creates an array of arrays based on year.
	# I did it this way partly in case I wanted to output to seperate files based on year, but I can copy/paste that
	# It probably is still out of order based on date, but whatever, I just want a simple archive file
	for each in tweet_data:
	for year in range(2004, 2026):
	if str(year) in each[0]:
	sorted_tweets[year - 2004].append(each)

	# Prints the output and dumps it to a file.
	with open("output.md", encoding="utf-8", mode="w") as output:
	for eachyear in sorted_tweets:
	for each in reversed(eachyear):
	output.write(each[0] + " : " + each[1] + "\n")
	print(each[0] + " : " + each[1])