Last active
April 22, 2019 15:46
-
-
Save linux08/e4ff8dcd9c0262904b333de7f679674c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
exports.scrapeTwittter = async (req, res) => { | |
let ret = []; | |
const { search } = req.query; | |
try { | |
const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] }); | |
const page = await browser.newPage(); | |
await page.goto(`https://twitter.com/search?f=tweets&vertical=default&q=${search}&src=typd`); | |
//set viewport for the autoscroll function | |
await page.setViewport({ | |
width: 1200, | |
height: 800 | |
}); | |
const tweets = await page.evaluate(function () { | |
//constant selector for the actual tweets on the screen | |
const TWEET_SELECTOR = '.js-stream-tweet'; | |
//grab the DOM elements for the tweets | |
let elements = Array.from(document.querySelectorAll(TWEET_SELECTOR)); | |
//create an array to return | |
let ret = []; | |
//get the info from within the tweet DOM elements | |
for (var i = 0; i < elements.length; i += 1) { | |
//object to store data | |
let tweet = {}; | |
//get text of tweet | |
const TWEET_TEXT_SELECTOR = ".tweet-text"; | |
tweet.text = elements[i].querySelector(TWEET_TEXT_SELECTOR).textContent; | |
//get timestamp | |
const TWEET_TIMESTAMP_SELECTOR = '.tweet-timestamp'; | |
tweet.timestamp = elements[i].querySelector(TWEET_TIMESTAMP_SELECTOR).getAttribute('title'); | |
//get tweet id | |
const TWEET_ID_SELECTOR = 'data-tweet-id'; | |
tweet.id = elements[i].getAttribute(TWEET_ID_SELECTOR); | |
//get likes/retweets | |
const ACTIONS_SELECTOR = ".ProfileTweet-actionCountForPresentation"; | |
let actions = elements[i].querySelectorAll(ACTIONS_SELECTOR); | |
//loop through the DOM elements for the actions | |
for (var j = 0; j < actions.length; j += 1) { | |
//for some reason, retweets are the 2nd action and likes are the 4th | |
tweet.retweets = actions[1].innerHTML ? actions[1].innerHTML : 0; | |
tweet.likes = actions[3].innerHTML ? actions[3].innerHTML : 0; | |
} | |
//add tweet data to return array | |
ret.push(tweet); | |
} | |
return ret; | |
}); | |
ret.push(tweets); | |
//close the page | |
await page.close(); | |
//close the browser | |
await browser.close(); | |
res.send(ret); | |
} | |
catch (err) { | |
res.status(500).send(err.message); | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment