Skip to content

Instantly share code, notes, and snippets.

@slaughtr
Last active December 13, 2019 20:29
Show Gist options
  • Save slaughtr/626e4066b344bbc42a0d5f8aabc9978a to your computer and use it in GitHub Desktop.
Save slaughtr/626e4066b344bbc42a0d5f8aabc9978a to your computer and use it in GitHub Desktop.
Twitter JS Scraper
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const getTweets = async username => {
const req = await fetch(`https://twitter.com/${username}`);
const page = await req.text();
const $ = cheerio.load(page, {xmlMode: true});
const actualTweets = [];
const tweetHTML = $('.tweet-text').toArray();
// TODO: this could be a reduce
tweetHTML.forEach(x => {
const tweet = [];
if (x.type === 'tag' && x.name === 'p') {
if (x.children) {
x.children.forEach(child => {
if (child.type === 'text' && child.data) {
const trimmed = child.data.trim();
if (trimmed) {
tweet.push(child.data);
// TODO: something with newlines?
if (trimmed.includes('\n')) null;
}
}
if (child.type === 'tag' && child.name === 'a') {
if (child.attribs.href && child.attribs.href.includes('hashtag')) {
const realHashTag = child.attribs.href.slice(9).split('?')[0];
tweet.push(`#${realHashTag}`);
} else if (child.attribs.href && child.attribs['data-mentioned-user-id']) {
tweet.push(child.attribs.href.replace('/', '@'));
}
}
});
}
}
if (tweet && tweet.length) actualTweets.push(tweet.join(' '));
});
console.log(actualTweets);
};
getTweets('iamdevloper').then(res => res);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment