Skip to content

Instantly share code, notes, and snippets.

@giehlman
Created November 15, 2021 19:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save giehlman/18a6195280a69eebe3925d32d50876e3 to your computer and use it in GitHub Desktop.
Save giehlman/18a6195280a69eebe3925d32d50876e3 to your computer and use it in GitHub Desktop.
Gelbe Seiten DOM Scraper
// copy&paste in chrome dev tools console
var hits = document.getElementsByClassName("mod-Treffer");
var all = [];
for (var i = 0; i < hits.length; i++) {
var res = getLineFromDomHit(hits[i]);
console.log(res);
all.push(res);
}
function getLineFromDomHit(hit) {
var title = hit.getElementsByTagName("h2")[0].textContent;
var cloned = hit.getElementsByTagName("address")[0].getElementsByTagName("p")[0].cloneNode(true);
cloned.getElementsByClassName("mod-AdresseKompakt__entfernung")[0].remove();
var raw = cloned.innerText;
var clean = raw.replace(/\s\s+/g, ' ').trim();
var street = clean.split(",")[0].trim();
var postalcity = clean.split(",")[1].trim();
return {
title:title,
street: street,
postalcity: postalcity
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment