Last active
August 4, 2023 07:44
-
-
Save estevecastells/f070c602042d31caf4031bcb7a6f8392 to your computer and use it in GitHub Desktop.
Bookmarklet - Visualize URLs blocked by Robots.txt in a page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
javascript:(function(){ | |
let links = Array.from(document.querySelectorAll('a')) | |
.map(a => new URL(a.href, window.location.origin).href) | |
.filter(url => { | |
try { | |
let urlObj = new URL(url); | |
return urlObj.origin === window.location.origin && !url.includes('#'); | |
} catch (err) { | |
console.error(`Error parsing URL: ${url}`); | |
return false; | |
} | |
}); | |
links = Array.from(new Set(links)); | |
let robotsTxtUrl = window.location.origin + '/robots.txt'; | |
fetch('https://tools.estevecastells.com/api/bulk-robots-txt/v1', { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
body: JSON.stringify({ robots_txt_url: robotsTxtUrl, links: links }) | |
}) | |
.then(response => response.json()) | |
.then(data => { | |
let boxId = 'box-' + Math.random().toString(36).substr(2, 9); | |
let box = document.createElement('div'); | |
box.attachShadow({mode: 'open'}); | |
let content = document.createElement('div'); | |
content.style = 'position:fixed;bottom:5px;right:5px;background-color:white;width:300px;padding:10px;border:1px solid grey;z-index:9999;'; | |
content.id = boxId; | |
document.body.appendChild(box); | |
box.shadowRoot.appendChild(content); | |
if (data.error) { | |
content.innerHTML = `<span style="position:absolute;right:5px;top:0px;cursor:pointer;padding:0 4px;background-color:white;" id="close">X</span> | |
<p>${data.error}</p>`; | |
box.shadowRoot.getElementById('close').addEventListener('click', () => { box.remove(); }); | |
} else { | |
let blockedLinks = []; | |
for (let url in data) { | |
if (!data[url]) { | |
let linkElements = document.querySelectorAll(`a[href="${url}"]`); | |
if (linkElements.length === 0) { | |
let urlObj = new URL(url); | |
let relativeLink = urlObj.pathname + urlObj.search + urlObj.hash; | |
linkElements = document.querySelectorAll(`a[href="${relativeLink}"]`); | |
} | |
if (linkElements.length > 0) { | |
linkElements.forEach(link => { | |
link.style.setProperty('background-color', 'red', 'important'); | |
link.style.setProperty('color', 'black', 'important'); | |
link.style.setProperty('border', '2px solid black', 'important'); | |
link.style.setProperty('padding', '3px', 'important'); | |
}); | |
blockedLinks.push(url); | |
} | |
} | |
} | |
content.innerHTML += `<span style="position:absolute;right:5px;top:0px;cursor:pointer;padding:0 4px;background-color:white;" id="close">X</span> | |
<p>Blocked URLs count: ${blockedLinks.length}</p> | |
<a href="${robotsTxtUrl}" target="_blank" style="display: block; margin-bottom: 10px;">Open robots.txt</a> | |
<button id="copy" style="margin-bottom: 10px;">Copy Blocked URLs</button> | |
<p><small>Note: Blocked by robots.txt links will be highlighted as red, if there is any.</small></p>`; | |
box.shadowRoot.getElementById('copy').addEventListener('click', () => { | |
navigator.clipboard.writeText(blockedLinks.join('\n')).then(() => { | |
alert('Blocked URLs copied to clipboard.'); | |
}); | |
}); | |
box.shadowRoot.getElementById('close').addEventListener('click', () => { box.remove(); }); | |
} | |
}) | |
.catch(error => { | |
console.error(`Error fetching or processing data: ${error}`); | |
}); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment