Skip to content

Instantly share code, notes, and snippets.

@samhenrigold
Created July 5, 2023 14:09
Show Gist options
  • Save samhenrigold/a10478246e5e69a501868ca532c31843 to your computer and use it in GitHub Desktop.
Save samhenrigold/a10478246e5e69a501868ca532c31843 to your computer and use it in GitHub Desktop.
Node script to generate a cleaned, sorted list of your tweets. Drop this into the base directory of your unzipped Twitter archive & run from there.
const fs = require('fs');
const TWEETS_PATH = './data/tweets.js';
// Read and parse tweet data.
function readAndParseTweets(filePath) {
const rawData = fs.readFileSync(filePath, 'utf8');
const jsonData = rawData.replace('window.YTD.tweets.part0 = ', '');
return JSON.parse(jsonData);
}
// Remove unnecessary fields from tweet data.
function cleanTweetData(tweets) {
return tweets.map(tweet => {
let cleanedTweet = { ...tweet.tweet };
// Remove unnecessary top-level fields.
const fieldsToRemove = [
'display_text_range', 'edit_info', 'favorited', 'id_str', 'lang', 'possibly_sensitive', 'retweeted', 'source', 'truncated', 'edit_info'
];
fieldsToRemove.forEach(field => delete cleanedTweet[field]);
// Clean media objects. Wrapping in a try so that we can handle tweets without media.
try {
if (cleanedTweet.extended_entities) {
cleanedTweet.extended_entities.media.forEach(removeMediaCruft);
}
if (cleanedTweet.entities) {
cleanedTweet.entities.media.forEach(removeMediaCruft);
}
} catch (e) {}
// Remove any 'indices' property. This is only used for display purposes.
removeIndices(cleanedTweet);
return cleanedTweet;
});
}
// Remove unnecessary fields from media object.
function removeMediaCruft(media) {
delete media.sizes;
}
// Recursively remove 'indices' properties from an object.
function removeIndices(obj) {
for (let prop in obj) {
if (prop === 'indices') {
delete obj[prop];
} else if (typeof obj[prop] === 'object') {
removeIndices(obj[prop]);
}
}
}
// Sort an array of objects by a specific key in descending order.
function sortByKeyDesc(array, key) {
return array.sort((a, b) => b[key] - a[key]);
}
// Main execution
const rawTweets = readAndParseTweets(TWEETS_PATH);
const cleanedTweets = cleanTweetData(rawTweets);
const sortedTweets = sortByKeyDesc(cleanedTweets, "favorite_count"); // You can also use `retweet_count`
// Write sorted tweets to a JSON file.
fs.writeFileSync('tweets.json', JSON.stringify(sortedTweets, null, 2));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment