Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save myselfhimself/85897805926fe89d6cffb1511662dd30 to your computer and use it in GitHub Desktop.
Save myselfhimself/85897805926fe89d6cffb1511662dd30 to your computer and use it in GitHub Desktop.
Parish contact details scraping into CSV
// Just paste everything in any web browser's developer console and press enter to obtain a CSV file
// This is intended for scraping parishes address details from a famous french city's website
// Taylor at your will
// Public domain - no attribution required
(() => {
let consoleLogBuffer = "";
let console_log = (s) => (consoleLogBuffer += s + "\n");
let csvSeparator = "|";
let csvHeader = [
"parishName",
"phoneNumber",
"email",
"recommendedPostAddress1",
"recommendedPostAddress2",
"postAddress1",
"postAddress2",
"churchAddress1",
"churchAddress2",
"address1",
"address2",
];
let re = new RegExp("\\d{1,2}", "g");
let asSpacedPhoneNumber = (phoneNumberRaw) => {
return phoneNumberRaw.replace(" ", "").match(re).join(" ");
};
console_log(csvHeader.join(csvSeparator));
let asCsvLine = (parishData) => {
return [
"Paroisse " + parishData.name,
parishData.phoneNumber,
parishData.email,
parishData.postAddress?.join(csvSeparator) ||
parishData.address?.join(csvSeparator) ||
parishData.churchAddress?.join(csvSeparator) ||
csvSeparator,
parishData.postAddress?.join(csvSeparator) || csvSeparator,
parishData.churchAddress?.join(csvSeparator) || csvSeparator,
parishData.address?.join(csvSeparator) || csvSeparator,
].join(csvSeparator);
};
let asAddressObject = (rawAddressLine) => {
let ret = rawAddressLine.split(", ");
if (ret.length != 2) {
throw new Exception("address lines count issue: " + rawAddressLine);
} else {
return ret;
}
};
$("h4").map(function () {
let parishData = {};
parishData.name = $(this).text();
if (
[
"Choisir un arrondissement",
"Prier, Célébrer",
"S’informer",
"Agir",
"Donner à l’Église",
].includes(parishData.name)
) {
return;
}
parishData.rawDetails = $(this)
.next("p")
.text()
.replace(/[\f\t\n\v\r]+| : /g, "|")
.split("|")
.filter(Boolean);
let i = 0;
do {
let current = parishData.rawDetails[i]?.trim();
let next =
parishData.rawDetails[(i + 1) % parishData.rawDetails.length]?.trim();
if (current === "Adresse de l'église") {
parishData.churchAddress = asAddressObject(next);
i += 2;
continue;
} else if (current == "Adresse postale") {
parishData.postAddress = asAddressObject(next);
i += 2;
continue;
} else if (current == "Adresse") {
parishData.address = asAddressObject(next);
i += 2;
continue;
} else if (current?.startsWith("Tél")) {
parishData.phoneNumber = asSpacedPhoneNumber(next);
i += 2;
continue;
} else if (current === "Email") {
parishData.email = next;
i += 2;
continue;
} else if (current === "Site") {
parishData.website = next;
i += 2;
continue;
} else {
i++;
}
} while (i < parishData.rawDetails.length);
console_log(asCsvLine(parishData));
});
console.log(consoleLogBuffer);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment