Skip to content

Instantly share code, notes, and snippets.

@titomus
Last active May 18, 2017 21:03
Show Gist options
  • Save titomus/cd9db4cfad9010da6e84a6a3ee4f87b5 to your computer and use it in GitHub Desktop.
Save titomus/cd9db4cfad9010da6e84a6a3ee4f87b5 to your computer and use it in GitHub Desktop.
Bookmarklet pour Scraper Les Résultats Google
javascript: (function() {
function extractHostname(url) {
var hostname;
if (url.indexOf("://") > -1) {
hostname = url.split('/')[2];
}
else {
hostname = url.split('/')[0];
}
hostname = hostname.split(':')[0];
hostname = hostname.split('?')[0];
return hostname;
}
output = '<html><head><title>SEO: Urls Google</title><style type=\'text/css\'>body,table{font-family:Tahoma,Verdana,Segoe,sans-serif;font-size:11px;color:#000}h1,h2,th{color:#405850}th{text-align:left}h2{font-size:11px;margin-bottom:3px}</style></head><body>';
pageAnchors = document.getElementsByTagName('a');
divClasses = document.getElementsByTagName('div');
var linkcount = 0;
var linkLocation = '';
var linkAnchorText = '';
var formated_links = '';
output += '<table><th>ID</th><th>Lien</th><th>Ancre</th>';
for (i = 0; i < pageAnchors.length; i++) {
if (pageAnchors[i].parentNode.parentNode.getAttribute('class') != '_Rm') {
var anchorText = pageAnchors[i].textContent;
var anchorLink = pageAnchors[i].href;
var linkAnchor = anchorLink + '\t' + anchorText;
var anchorID = pageAnchors[i].id;
if (anchorLink != '') {
if (anchorLink.match(/^((?!google\.|cache|blogger.com|\.yahoo\.|youtube\.com\/\?gl=|youtube\.com\/results|javascript:|api\.technorati\.com|botw\.org\/search|del\.icio\.us\/url\/check|digg\.com\/search|search\.twitter\.com\/search|search\.yahoo\.com\/search|siteanalytics\.compete\.com|tools\.seobook\.com\/general\/keyword\/suggestions|web\.archive\.org\/web\/|whois\.domaintools\.com|www\.alexa\.com\/data\/details\/main|www\.bloglines\.com\/search|www\.majesticseo\.com\/search\.php|www\.semrush\.com\/info\/|www\.semrush\.com\/search\.php|www\.stumbleupon\.com\/url|wikipedia.org\/wiki\/Special:Search).)*$/i)) {
if (anchorID.match(/^((?!hdtb_more|hdtb_tls|uh_hl).)*$/i)) {
linkLocation += anchorLink + '<br/>';
formated_links += extractHostname(anchorLink) + '<br/>';
linkAnchorText += anchorText + '<br/>';
linkcount++;
if (anchorText === undefined) anchorText = pageAnchors[i].innerText;
output += '<tr>';
output += '<td>' + linkcount + '</td>';
output += '<td>' + pageAnchors[i].href + '</a></td>';
output += '<td>' + anchorText + '</td>';
output += '</tr>\n';
}
}
}
}
}
output += '</table><br/><h2>Liste formatée</h2><div>';
output += linkLocation;
output += '</div><br/><h2>Ancres formatées</h2><div>';
output += linkAnchorText;
output += '</div><br/><h2>Liens formatés</h2><div>';
output += formated_links;
output += '<br/><br/><p align=center><a href="http://jeremy-allard.com">www.jeremy-allard.com</a></p>';
with(window.open()) {
document.write(output);
document.close();
}
})();
@titomus
Copy link
Author

titomus commented May 18, 2017

Code du Bookmarklet pour Scraper Les Résultats Google

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment