Skip to content

Instantly share code, notes, and snippets.

@linux08
Last active April 22, 2019 15:46
Show Gist options
  • Save linux08/e4ff8dcd9c0262904b333de7f679674c to your computer and use it in GitHub Desktop.
Save linux08/e4ff8dcd9c0262904b333de7f679674c to your computer and use it in GitHub Desktop.
exports.scrapeTwittter = async (req, res) => {
let ret = [];
const { search } = req.query;
try {
const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
const page = await browser.newPage();
await page.goto(`https://twitter.com/search?f=tweets&vertical=default&q=${search}&src=typd`);
//set viewport for the autoscroll function
await page.setViewport({
width: 1200,
height: 800
});
const tweets = await page.evaluate(function () {
//constant selector for the actual tweets on the screen
const TWEET_SELECTOR = '.js-stream-tweet';
//grab the DOM elements for the tweets
let elements = Array.from(document.querySelectorAll(TWEET_SELECTOR));
//create an array to return
let ret = [];
//get the info from within the tweet DOM elements
for (var i = 0; i < elements.length; i += 1) {
//object to store data
let tweet = {};
//get text of tweet
const TWEET_TEXT_SELECTOR = ".tweet-text";
tweet.text = elements[i].querySelector(TWEET_TEXT_SELECTOR).textContent;
//get timestamp
const TWEET_TIMESTAMP_SELECTOR = '.tweet-timestamp';
tweet.timestamp = elements[i].querySelector(TWEET_TIMESTAMP_SELECTOR).getAttribute('title');
//get tweet id
const TWEET_ID_SELECTOR = 'data-tweet-id';
tweet.id = elements[i].getAttribute(TWEET_ID_SELECTOR);
//get likes/retweets
const ACTIONS_SELECTOR = ".ProfileTweet-actionCountForPresentation";
let actions = elements[i].querySelectorAll(ACTIONS_SELECTOR);
//loop through the DOM elements for the actions
for (var j = 0; j < actions.length; j += 1) {
//for some reason, retweets are the 2nd action and likes are the 4th
tweet.retweets = actions[1].innerHTML ? actions[1].innerHTML : 0;
tweet.likes = actions[3].innerHTML ? actions[3].innerHTML : 0;
}
//add tweet data to return array
ret.push(tweet);
}
return ret;
});
ret.push(tweets);
//close the page
await page.close();
//close the browser
await browser.close();
res.send(ret);
}
catch (err) {
res.status(500).send(err.message);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment