Skip to content

Instantly share code, notes, and snippets.

@tylor
Created December 16, 2012 23:07
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tylor/4314006 to your computer and use it in GitHub Desktop.
Save tylor/4314006 to your computer and use it in GitHub Desktop.
Scan your Twitter archive to get a basic count of the people you've talked to and the words that you've used.
/*
* Scan your official Twitter archive to get a basic count of the
* people you've talked to and the words that you've used.
*
* $ node tweet-counts.js ~/path/to/tweets-archive/
*
*/
var fs = require('fs');
var base = process.argv[2].replace(/\/$/, ''); // Strip trailing slash.
var Grailbird = { data: {} }; // The Twitter archive uses this.
var statistics = {
total: 0,
text: {},
mentions: {},
hashtags: {},
sources: {},
hourly: {},
daily: {},
}
function addOne(statistic, key) {
statistic[key] = (statistic[key] || 0) + 1;
}
// Gross eval... load up the index.
eval(fs.readFileSync(base + '/data/js/tweet_index.js').toString());
tweet_index.forEach(function(tweet_file) {
// Gross eval... load up a monthly tweet archive.
eval(fs.readFileSync(base + '/' + tweet_file.file_name).toString());
Grailbird.data[tweet_file.var_name].forEach(function(tweet) {
// Total.
statistics.total++;
// Text.
tweet.text.split(/\s+/).forEach(function(word) {
addOne(statistics.text, word.toLowerCase());
});
// Mentions.
tweet.entities.user_mentions.forEach(function(user) {
addOne(statistics.mentions, user.screen_name.toLowerCase());
});
// Hashtags.
tweet.entities.hashtags.forEach(function(hashtag) {
addOne(statistics.hashtags, hashtag.text.toLowerCase());
});
// Sources.
addOne(statistics.sources, tweet.source.toLowerCase());
// Hourly and Daily.
var created_at = new Date(tweet.created_at);
addOne(statistics.hourly, created_at.getHours());
addOne(statistics.daily, created_at.getDay());
});
});
function printSortedByValues(statistic, threshold) {
Object.keys(statistic).sort(function(a, b) { return statistic[a] - statistic[b]; }).forEach(function(key) {
if (statistic[key] > threshold) {
console.log(statistic[key] + ' ' + (statistic[key] * 100 / statistics.total).toPrecision(2) + '% ' + key);
}
});
}
function printSortedByKeys(statistic) {
Object.keys(statistic).forEach(function(key) {
console.log(key + ' ' + statistic[key] + ' ' + (statistic[key] * 100 / statistics.total).toPrecision(2) + '%');
});
}
function printDays(statistic) {
var days = ['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'];
for (var day = 0; day < days.length; day++) {
console.log(statistic[day] + ' ' + (statistic[day] * 100 / statistics.total).toPrecision(2) + '% ' + days[day]);
}
}
console.log('Popular words:');
printSortedByValues(statistics.text, 10);
console.log('');
console.log('Who you talk to:');
printSortedByValues(statistics.mentions, 2);
console.log('');
console.log('Hashtags used:');
printSortedByValues(statistics.hashtags, 1);
console.log('');
console.log('Tweet sources:');
printSortedByValues(statistics.sources, 0);
console.log('');
console.log('What time of the day:');
printSortedByKeys(statistics.hourly);
console.log('');
console.log('What day of the week:');
printDays(statistics.daily);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment