Skip to content

Instantly share code, notes, and snippets.

@CryogenicPlanet
Last active May 1, 2020 16:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CryogenicPlanet/b2dd54a8c946999e9fe497b33ae2037a to your computer and use it in GitHub Desktop.
Save CryogenicPlanet/b2dd54a8c946999e9fe497b33ae2037a to your computer and use it in GitHub Desktop.
A Client Side Twitter DOM Scrapper, Full Post https://medium.com/etwas/twitter-client-side-dom-scrapping-6f5a36ce3243
// Functions
let tweetParser = async function (tweetDom) {
let tweetContent = tweetDom.innerText;
let tweet = {
name: "",
username: "",
time: "",
content: "",
interaction: {
reply: "",
retweets: "",
like: "",
},
};
//console.log("Tweet Content", tweetContent)
let timeElm = tweetDom.getElementsByTagName("time")[0];
let timeDis = timeElm.innerText;
//console.log("Tweet Time Element ",timeElm)
let dateTimeAtri = timeElm.getAttribute("datetime");
let splitTweet = tweetContent.split(/\n/);
let splitLength = splitTweet.length;
let breakpoint = 4;
let endContent = splitLength - 4;
for (let i = 0; i < splitLength; i++) {
if (splitTweet[i] === timeDis) {
breakpoint = i;
}
}
//console.log("Split Tweet",splitTweet)
tweet.name = splitTweet[0];
tweet.username = splitTweet[1];
tweet.time = dateTimeAtri;
tweet.content = splitTweet.slice(breakpoint + 1, endContent + 1);
tweet.content = tweet.content.join("\n");
tweet.interaction.reply = splitTweet[endContent + 1];
tweet.interaction.retweets = splitTweet[endContent + 2];
tweet.interaction.like = splitTweet[endContent + 3];
//console.log(tweet)
return tweet;
};
async function getTweets() {
// Function to get New Tweet Bodies
let divs = document.querySelectorAll("div"); // Load Div Elements
tweets = [];
tweetIds = [];
for (let div of divs) {
//console.log(div.innerHTML)
let dataTestId = div.getAttribute("data-testid");
// data-tweet-id
if (dataTestId == "tweet") {
tweets.push(div);
}
} // Load Tweet Elements by checking for specific Attribute
tweetContent = {};
let parsedTweets = {};
for (let tweet of tweets) {
//console.log(tweet)
let aTags = tweet.getElementsByTagName("a");
for (let aTag of aTags) {
let href = aTag.getAttribute("href");
if (href.includes("/status/")) {
let start = href.indexOf("/status/");
let tweetId = href.split("/status/");
tweetId = tweetId[1];
if (!(tweetId in parsedTweets)) {
//console.log(tweetId)
tweetIds.push(tweetId);
//console.log(tweet.innerText)
parsedTweets[tweetId] = await tweetParser(tweet);
}
}
} // Finding Tweet Id for every tweet by processing all <a> tags within the tweet
} // Iterating through tweets
return parsedTweets;
}
// ------- End of Functions ------
// Main
let main = async function () {
let parsedTweetsGlobal = {};
parsedTweetsGlobal = await getTweets();
window.addEventListener("scroll", async function () {
let newParsedTweets = await getTweets();
console.log(
"From Scroll Event Listener ",
Object.keys(newParsedTweets).length
);
let newDistinctTweets = new Object();
for (let newTweetID in newParsedTweets) {
if (!(newTweetID in parsedTweetsGlobal)) {
newDistinctTweets[newTweetID] = newParsedTweets[newTweetID];
console.log("New Distinct Tweet from Scroll Event");
}
}
parsedTweetsGlobal = { ...parsedTweetsGlobal, ...newParsedTweets };
console.log(
"New Key Length From Scroll Even",
Object.keys(parsedTweetsGlobal).length
);
});
};
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment