Created
May 28, 2021 01:42
-
-
Save Ephellon/497684b35e02f20efe3fb166389b9445 to your computer and use it in GitHub Desktop.
Tweet Search Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// How to - Search for tweets on Twitter. Run the code below. Scroll (click mouse wheel for "auto") as long as needed | |
/** Tweet Search Scraper - Scrapes Tweets from the search results page: https://twitter.com/search | |
* | |
* Function $(query:string[, multiple:boolean[, container:Node]]) -> Array|Node | |
* # Returns an array or node using `querySelectorAll` or `querySelector` (respectively) | |
* | |
* Object Tweets | |
* // The main object, containing the scraped tweets | |
* Getter size -> Number | |
* // Returns the current amount of scraped tweets | |
* Object [TweedID] | |
* // Each tweet will be structured like this | |
* String content // the text contained in the tweet | |
* String html // the HTML node that conatains the tweet | |
* String id // the ID of the tweet (same as the `TweetID`) | |
* String link // the link to the tweet | |
* String timestamp // the UTC timestamp of the tweet | |
* Object user | |
* // An object of user (tweet creator) data | |
* String displayName // the user's display name (may contain non-alphanumeric characters) | |
* String handle // the user's handle (begins with "@") | |
* String location // the user's link to their page | |
* String picture // the link to the user's profile picture | |
* | |
*/ | |
let $ = (query, multiple = false, container = document) => (multiple? [...container.querySelectorAll(query)]: container.querySelector(query)); | |
let Tweets = { get size() { return Object.keys(Tweets).length } }, | |
Counter = 0, | |
Counted = 0; | |
let Collector = setInterval(() => { | |
let tweets = $(`[aria-label*="search"i] [data-testid="tweet"i]`, true); | |
for(let tweet of tweets) { | |
let [profile, user, date] = $('a', true, tweet), | |
[name, at] = user.textContent.split('@'), | |
twid = date.href.split('/').pop(), | |
time = $('time', false, date).getAttribute('datetime'), | |
text = $('[dir][lang]', false, tweet).textContent; | |
Tweets[twid] = ({ | |
html: tweet, | |
link: date.href, | |
id: twid, | |
content: text, | |
timestamp: time, | |
user: { | |
picture: $('img', false, profile).src, | |
location: user.href, | |
displayName: name.trim(), | |
handle: '@' + at.trim(), | |
}, | |
}); | |
} | |
if(Tweets.size == Counter) | |
++Counted; | |
else | |
Counted = 0 * (Counter = Tweets.size); | |
// Waits 15s to stop scraping | |
if(Counted > 150) { | |
clearInterval(Collector); | |
console.log('Done.', Tweets); | |
} | |
}, 100); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment