Created
May 16, 2019 14:39
-
-
Save revolunet/b88f6d049a4f6f2c2a0021c856bf7cc9 to your computer and use it in GitHub Desktop.
Extracted place info from google search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// scrape location data from google search | |
const fetch = require("node-fetch"); | |
const jsdom = require("jsdom"); | |
const serialExec = require("promise-serial-exec"); | |
const { JSDOM } = jsdom; | |
const search = query => | |
fetch(`https://www.google.com/search?q=${encodeURIComponent(query)}`, { | |
headers: { | |
"user-agent": | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36" | |
} | |
}).then(r => r.text()); | |
const extractKnowledge = html => { | |
const dom = new JSDOM(html); | |
const document = dom.window.document; | |
const knowledge = document.querySelector(".knowledge-panel"); | |
if (knowledge) { | |
const positionNode = knowledge.querySelector( | |
"a[jsaction='jsa.logVedAndGo']" | |
); | |
let lat, lng; | |
if (positionNode && positionNode.dataset.url) { | |
const match = positionNode.dataset.url.match(/\/@(\d+\.\d+),(\d+\.\d+),/); | |
if (match) { | |
[_, lat, lng] = match; | |
} | |
} | |
const title = knowledge.querySelector("div[data-local-attribute='d3bn']") | |
.textContent; | |
const addressNode = knowledge.querySelector( | |
"div[data-attrid='kc:/location/location:address']" | |
); | |
const address = | |
addressNode && addressNode.textContent.replace(/^Adresse\s*:\s*/, ""); | |
const phoneNode = knowledge.querySelector( | |
"div[data-attrid='kc:/collection/knowledge_panels/has_phone:phone']" | |
); | |
const phone = | |
phoneNode && phoneNode.textContent.replace(/^Téléphone\s*:\s*/, ""); | |
const hours = Array.from( | |
knowledge.querySelectorAll( | |
"div[jsaction='oh.handleHoursAction'] table tr" | |
) | |
).map(tr => ({ | |
day: tr.querySelector("td:first-child").textContent, | |
hours: tr.querySelector("td:last-child").textContent | |
})); | |
return { | |
dpt, | |
title, | |
lat, | |
lng, | |
address, | |
phone, | |
hours | |
}; | |
} | |
}; | |
search("boutique orange auxerre") | |
.then(extractKnowledge) | |
.then(console.log); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment