Skip to content

Instantly share code, notes, and snippets.

@divamgupta
Created April 11, 2019 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save divamgupta/a3693858dff15351c6b6a361d7000d08 to your computer and use it in GitHub Desktop.
Save divamgupta/a3693858dff15351c6b6a361d7000d08 to your computer and use it in GitHub Desktop.
Fetch tweets from twitter via phantomjs
/*
* Author Divam Gupta
This is a small script to fetch tweets of a given url ( handle / search )
*/
var page = require('webpage').create();
var fs = require('fs');
var system = require('system');
var userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"
page.settings.userAgent = userAgent;
page.settings.loadImages = false;
allTweets = [];
var args = system.args;
if (args.length != 3)
{
console.log( args[0] + ' "https://mobile.twitter.com/search?q=search" tweets.json');
phantom.exit();
}
var url = args[1] || 'https://mobile.twitter.com/search?q=mein+hoon';
var path = args[2] || 'tweets.json';
function save()
{
try
{
var content = JSON.stringify(allTweets) ;
fs.write(path, content, 'w');
}catch(err) {
console.log(err)
}
}
function scrapeMore(link)
{
page.open(link, function(status) {
// list all the a.href links in the hello kitty etsy page
var tweet_texts = page.evaluate(function() {
return [].map.call(document.querySelectorAll('div.tweet-text div.dir-ltr'), function(p) {
return p.textContent;
});
});
var tweetTime = page.evaluate(function() {
return [].map.call(document.querySelectorAll('td.timestamp a'), function(p) {
return p.textContent;
});
});
if( page.content.indexOf("You've made a few too many attempts. Please try again later") > 0 )
{
console.log("too many attempts");
setTimeout(function(){
return scrapeMore(link);
} , 5000 )
}
else{
for( var index in tweet_texts)
{
allTweets.push({
text : tweet_texts[index] ,
time : tweetTime[index]
});
}
var nextLink = page.evaluate(function() {
return [].map.call(document.querySelectorAll('div.w-button-more a'), function(link) {
return link.getAttribute('href');
});
});
console.log(nextLink);
if(nextLink.length > 0 )
{
nextLink = "https://mobile.twitter.com"+nextLink[0];
save();
return scrapeMore(nextLink);
}
else
{
console.log("doneeeee");
save();
phantom.exit();
}
}
});
}
scrapeMore(url);
@divamgupta
Copy link
Author

You would need phantomjs version 1.9.8 for this.

The command to run it would be :

phantomjs script.js "" out.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment