Skip to content

Instantly share code, notes, and snippets.

@estevecastells
Last active August 4, 2023 07:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save estevecastells/f070c602042d31caf4031bcb7a6f8392 to your computer and use it in GitHub Desktop.
Save estevecastells/f070c602042d31caf4031bcb7a6f8392 to your computer and use it in GitHub Desktop.
Bookmarklet - Visualize URLs blocked by Robots.txt in a page
javascript:(function(){
let links = Array.from(document.querySelectorAll('a'))
.map(a => new URL(a.href, window.location.origin).href)
.filter(url => {
try {
let urlObj = new URL(url);
return urlObj.origin === window.location.origin && !url.includes('#');
} catch (err) {
console.error(`Error parsing URL: ${url}`);
return false;
}
});
links = Array.from(new Set(links));
let robotsTxtUrl = window.location.origin + '/robots.txt';
fetch('https://tools.estevecastells.com/api/bulk-robots-txt/v1', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ robots_txt_url: robotsTxtUrl, links: links })
})
.then(response => response.json())
.then(data => {
let boxId = 'box-' + Math.random().toString(36).substr(2, 9);
let box = document.createElement('div');
box.attachShadow({mode: 'open'});
let content = document.createElement('div');
content.style = 'position:fixed;bottom:5px;right:5px;background-color:white;width:300px;padding:10px;border:1px solid grey;z-index:9999;';
content.id = boxId;
document.body.appendChild(box);
box.shadowRoot.appendChild(content);
if (data.error) {
content.innerHTML = `<span style="position:absolute;right:5px;top:0px;cursor:pointer;padding:0 4px;background-color:white;" id="close">X</span>
<p>${data.error}</p>`;
box.shadowRoot.getElementById('close').addEventListener('click', () => { box.remove(); });
} else {
let blockedLinks = [];
for (let url in data) {
if (!data[url]) {
let linkElements = document.querySelectorAll(`a[href="${url}"]`);
if (linkElements.length === 0) {
let urlObj = new URL(url);
let relativeLink = urlObj.pathname + urlObj.search + urlObj.hash;
linkElements = document.querySelectorAll(`a[href="${relativeLink}"]`);
}
if (linkElements.length > 0) {
linkElements.forEach(link => {
link.style.setProperty('background-color', 'red', 'important');
link.style.setProperty('color', 'black', 'important');
link.style.setProperty('border', '2px solid black', 'important');
link.style.setProperty('padding', '3px', 'important');
});
blockedLinks.push(url);
}
}
}
content.innerHTML += `<span style="position:absolute;right:5px;top:0px;cursor:pointer;padding:0 4px;background-color:white;" id="close">X</span>
<p>Blocked URLs count: ${blockedLinks.length}</p>
<a href="${robotsTxtUrl}" target="_blank" style="display: block; margin-bottom: 10px;">Open robots.txt</a>
<button id="copy" style="margin-bottom: 10px;">Copy Blocked URLs</button>
<p><small>Note: Blocked by robots.txt links will be highlighted as red, if there is any.</small></p>`;
box.shadowRoot.getElementById('copy').addEventListener('click', () => {
navigator.clipboard.writeText(blockedLinks.join('\n')).then(() => {
alert('Blocked URLs copied to clipboard.');
});
});
box.shadowRoot.getElementById('close').addEventListener('click', () => { box.remove(); });
}
})
.catch(error => {
console.error(`Error fetching or processing data: ${error}`);
});
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment