Skip to content

Instantly share code, notes, and snippets.

@heyjoeway
Last active October 16, 2019 05:01
Show Gist options
  • Save heyjoeway/236f04bf2cfa8c0f8c12f35d016fbc5f to your computer and use it in GitHub Desktop.
Save heyjoeway/236f04bf2cfa8c0f8c12f35d016fbc5f to your computer and use it in GitHub Desktop.
SocialBlade Top Channels Scraper
// Paste this code into the developer console while on this page:
// https://socialblade.com/youtube/top/5000/mostsubscribed
// In Chrome, open the Request Blocking tool, Enable Request Blocking, and add the following URL:
// ring.socialblade.com/ding.json
// SocialBlade seems to auto-refresh, so lets kill all timeouts to try and prevent that.
let id = window.setTimeout(function() {}, 0);
while (id--) {
window.clearTimeout(id); // will do nothing if no timeout with id is present
}
// We'll be waiting inbetween requests to not look as much like a spambot.
const sleep = (milliseconds) => {
return new Promise(resolve => setTimeout(resolve, milliseconds))
}
// Filter all the SB urls out of the page.
let allHrefUrls = $("#sort-by").parent().find("a").toArray().map(x => $(x).attr("href"));
let filteredHrefUrls = allHrefUrls.filter(x => {
if (typeof x == "undefined") return false;
return x.indexOf("/youtube/") == 0;
});
// Now we're gonna actually get the channel ids.
let channelIds = [];
// Periodically download data, just in case.
// Remember to allow automatic downloads for this site!
// https://ourcodeworld.com/articles/read/189/how-to-create-a-file-and-generate-a-download-with-javascript-in-the-browser-without-a-server
function download(filename, text) {
var element = document.createElement('a');
element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
element.setAttribute('download', filename);
element.style.display = 'none';
document.body.appendChild(element);
element.click();
document.body.removeChild(element);
}
function downloadChannelIds() {
download(
"backup.json",
JSON.stringify(channelIds)
);
}
let downloadInterval = setInterval(() => downloadChannelIds(), 60 * 1000);
// If we're aiming to get all 5000 top channels, it will take:
// 5000 * 0.5s = 2500s = ~42 min
(async () => {
for (let i = 0; i < filteredHrefUrls.length; i++) {
let sbUrl = filteredHrefUrls[i];
console.log("Current SB URL: " + sbUrl);
console.log("Current index: " + i + "/" + (filteredHrefUrls.length - 1));
$.ajax({
url: sbUrl,
type: "GET",
dataType: "html",
success: function(data) {
let $html = $(data);
let channelId = ($html
.find(".fa-youtube-play")
.parent("a")
.attr("href")
.substr(28)
);
channelIds[i] = channelId;
}
});
await sleep(500);
}
clearInterval(downloadInterval);
downloadChannelIds();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment