Skip to content

Instantly share code, notes, and snippets.

@swashcap
Created August 17, 2019 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save swashcap/d6786f14ff340e285d4b0ab6d785a6bf to your computer and use it in GitHub Desktop.
Save swashcap/d6786f14ff340e285d4b0ab6d785a6bf to your computer and use it in GitHub Desktop.
/**
* Transform RSS-style XML with geolocation data to YAML
*
* {@link https://yaml.org}
*
* ```
* mkdir -p data
* yarn add hard-rejection cheerio he fast-xml-parser js-yaml
* # ...
* node xml-to-yaml.js
* ```
*/
require('hard-rejection/register')
const cheerio = require('cheerio')
const he = require('he')
const path = require('path')
const xml = require('fast-xml-parser')
const yaml = require('js-yaml')
const { readFile, writeFile } = require('fs').promises
;(async () => {
const data = await readFile(path.join(__dirname, 'data.xml'), 'utf8')
if (!xml.validate(data)) {
throw new Error('data is invalid XML')
}
const addressTag = /<\/?address>/g
const brTag = /<br\s*\/?>/g
const zipPattern = /(\d{5})(-\d+)?$/
const items = xml.parse(data).rss.channel.item.reduce((memo, item) => {
const name = he.decode(item.title)
if (!(name in memo)) {
memo[name] = {
name,
links: [item.link],
locations: [],
}
}
if (
typeof item['geo:lat'] !== 'number' ||
typeof item['geo:long'] !== 'number'
) {
throw new Error(`${item.title} lacks coordinates`)
}
const address = item['content:encoded']
.replace(addressTag, '')
.split(brTag)
.map(s => s.trim())
.filter(Boolean)
let zip = address[address.length - 1].match(zipPattern)[1]
zip = !Number.isNaN(parseInt(zip, 10)) ? parseInt(zip, 10) : zip
memo[name].locations.push({
name: item.description ? he.decode(item.description) : name,
address: {
street: address[address.length === 3 ? 1 : 0],
zip,
},
geo: {
latitude: item['geo:lat'],
longitude: item['geo:long'],
},
})
return memo
}, {})
const writes = Object.keys(items).reduce((memo, name) => {
const filename = path.join(
__dirname,
'data',
name
.toLowerCase()
.replace(/\.|&/g, ' ')
.replace(/’/g, '')
.trim()
.replace(/ +/g, '-') + '.yml'
)
return memo.concat([
[filename, writeFile(filename, yaml.safeDump(items[name]))],
])
}, [])
await Promise.all(writes.map(([, x]) => x))
console.log(writes.map(([filename]) => filename).join('\n'))
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment