Last active
May 18, 2017 21:03
-
-
Save titomus/cd9db4cfad9010da6e84a6a3ee4f87b5 to your computer and use it in GitHub Desktop.
Bookmarklet pour Scraper Les Résultats Google
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript: (function() { | |
function extractHostname(url) { | |
var hostname; | |
if (url.indexOf("://") > -1) { | |
hostname = url.split('/')[2]; | |
} | |
else { | |
hostname = url.split('/')[0]; | |
} | |
hostname = hostname.split(':')[0]; | |
hostname = hostname.split('?')[0]; | |
return hostname; | |
} | |
output = '<html><head><title>SEO: Urls Google</title><style type=\'text/css\'>body,table{font-family:Tahoma,Verdana,Segoe,sans-serif;font-size:11px;color:#000}h1,h2,th{color:#405850}th{text-align:left}h2{font-size:11px;margin-bottom:3px}</style></head><body>'; | |
pageAnchors = document.getElementsByTagName('a'); | |
divClasses = document.getElementsByTagName('div'); | |
var linkcount = 0; | |
var linkLocation = ''; | |
var linkAnchorText = ''; | |
var formated_links = ''; | |
output += '<table><th>ID</th><th>Lien</th><th>Ancre</th>'; | |
for (i = 0; i < pageAnchors.length; i++) { | |
if (pageAnchors[i].parentNode.parentNode.getAttribute('class') != '_Rm') { | |
var anchorText = pageAnchors[i].textContent; | |
var anchorLink = pageAnchors[i].href; | |
var linkAnchor = anchorLink + '\t' + anchorText; | |
var anchorID = pageAnchors[i].id; | |
if (anchorLink != '') { | |
if (anchorLink.match(/^((?!google\.|cache|blogger.com|\.yahoo\.|youtube\.com\/\?gl=|youtube\.com\/results|javascript:|api\.technorati\.com|botw\.org\/search|del\.icio\.us\/url\/check|digg\.com\/search|search\.twitter\.com\/search|search\.yahoo\.com\/search|siteanalytics\.compete\.com|tools\.seobook\.com\/general\/keyword\/suggestions|web\.archive\.org\/web\/|whois\.domaintools\.com|www\.alexa\.com\/data\/details\/main|www\.bloglines\.com\/search|www\.majesticseo\.com\/search\.php|www\.semrush\.com\/info\/|www\.semrush\.com\/search\.php|www\.stumbleupon\.com\/url|wikipedia.org\/wiki\/Special:Search).)*$/i)) { | |
if (anchorID.match(/^((?!hdtb_more|hdtb_tls|uh_hl).)*$/i)) { | |
linkLocation += anchorLink + '<br/>'; | |
formated_links += extractHostname(anchorLink) + '<br/>'; | |
linkAnchorText += anchorText + '<br/>'; | |
linkcount++; | |
if (anchorText === undefined) anchorText = pageAnchors[i].innerText; | |
output += '<tr>'; | |
output += '<td>' + linkcount + '</td>'; | |
output += '<td>' + pageAnchors[i].href + '</a></td>'; | |
output += '<td>' + anchorText + '</td>'; | |
output += '</tr>\n'; | |
} | |
} | |
} | |
} | |
} | |
output += '</table><br/><h2>Liste formatée</h2><div>'; | |
output += linkLocation; | |
output += '</div><br/><h2>Ancres formatées</h2><div>'; | |
output += linkAnchorText; | |
output += '</div><br/><h2>Liens formatés</h2><div>'; | |
output += formated_links; | |
output += '<br/><br/><p align=center><a href="http://jeremy-allard.com">www.jeremy-allard.com</a></p>'; | |
with(window.open()) { | |
document.write(output); | |
document.close(); | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Code du Bookmarklet pour Scraper Les Résultats Google