Skip to content

Instantly share code, notes, and snippets.

@joshski
Created July 24, 2018 10:00
Show Gist options
  • Save joshski/0f767f897c4564a56455ca38a325dfcd to your computer and use it in GitHub Desktop.
Save joshski/0f767f897c4564a56455ca38a325dfcd to your computer and use it in GitHub Desktop.
const express = require("express");
const cheerio = require("cheerio");
const httpism = require("httpism");
if (process.env.PORT) {
startApp({ port: process.env.PORT, fetchCities })
} else {
module.exports = startApp
}
async function startApp({ port, fetchCities }) {
const app = express();
app.get("/", (_, res) => fetchCities().then(html => res.json(parseCities(html))))
await new Promise(function(resolve, reject) {
app.listen(port, err => err ? reject(err) : resolve());
})
}
function fetchCities() {
return httpism.get("https://en.wikipedia.org/wiki/List_of_metropolitan_areas_in_Asia")
}
function parseCities(html) {
const $ = cheerio.load(html);
const table = $("h2:contains('List') + table")
const headings = table.find("th").toArray()
.map((th, index) => ({ text: $(th).text().trim(), index }))
const columns = [
{
heading: /Area/, parse: td => ({
name: td.text().trim(),
url: `https://en.wikipedia.org${td.find('a').attr('href')}`
})
},
{ heading: /Country/, parse: td => ({ country: td.text().trim() }) },
{ heading: /Population/, parse: parsePopulation },
{ heading: /Demographia/, parse: parsePopulation }
].map(parser => ({
heading: headings.find(h => parser.heading.test(h.text)).index,
parse: parser.parse
}))
return table.find('tr:not(:has(th))').toArray()
.map(tr => columns.reduce((city, column) => Object.assign(
column.parse($(tr).find("td").eq(column.heading)), city
), {}))
}
function parsePopulation(td) {
const population = Number(td.text().trim().replace(/\[.+\]+/g, '').replace(/,/g, ''))
return population > 0 ? { population } : {}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment