Skip to content

Instantly share code, notes, and snippets.

@bertspaan
Created February 9, 2017 16:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bertspaan/84461a16a0da0b5d9542e9bf597a60a1 to your computer and use it in GitHub Desktop.
Save bertspaan/84461a16a0da0b5d9542e9bf597a60a1 to your computer and use it in GitHub Desktop.
Building Inspector ETL script for NYC Space/Time Directory (adapted)
'use strict'
const fs = require('fs')
const path = require('path')
const request = require('request')
const async = require('async')
const H = require('highland')
const JSONStream = require('JSONStream')
const baseUrl = 'http://buildinginspector.nypl.org/api/'
const createPromise = (fun, ...args) => {
return new Promise((resolve, reject) => {
const callback = (err, ...args) => {
if (!err) {
resolve.apply(this, [...args])
} else {
reject(err)
}
}
fun.apply(this, [...args, callback])
})
}
var requestCallback = function (sleep, url, callback) {
request(url, (err, response, body) => {
if (err) {
callback(err)
} else {
if (sleep) {
setTimeout(() => {
callback(null, JSON.parse(body))
}, sleep)
} else {
callback(null, JSON.parse(body))
}
}
})
}
var allPagesToFile = function (baseUrl, paginated, filename, onPage, callback) {
if (paginated) {
var writeStream = H()
writeStream.flatten()
.pipe(JSONStream.stringify('{\n"type":"FeatureCollection","features":[', '\n,\n', '\n]}\n'))
.pipe(fs.createWriteStream(filename))
var page = 0
var nextPage
async.doWhilst(
function (callback) {
page += 1
var url = baseUrl + '/page/' + page
requestCallback(1000, url, (err, json) => {
if (onPage) {
onPage(err, page, url, json)
}
if (json && json.features && json.features.length > 0) {
nextPage = true
writeStream.write(json.features)
} else {
nextPage = false
}
callback(err)
})
},
() => {
return nextPage // && page < 2
},
(err) => {
writeStream.end()
callback(err)
})
} else {
request(baseUrl)
.pipe(fs.createWriteStream(filename))
.on('finish', callback)
}
}
function download (dirs, callback) {
const onPage = (err, page, url) => {
if (err) {
console.error(` Error downloading ${url}`)
} else {
console.log(` Downloaded ${url}`)
}
}
const allPagesPromise = (name, paginated) => createPromise(allPagesToFile, `${baseUrl}${name}`, paginated, path.join(dirs.current, `${name}.geojson`), onPage)
// Download three Building Inspector datasets:
// - http://buildinginspector.nypl.org/api/consolidated/page/:page
// - http://buildinginspector.nypl.org/api/toponyms
// - http://buildinginspector.nypl.org/api/sheets/
allPagesPromise('consolidated', true)
.then(() => {
console.log(' Done downloading consolidated polygons!')
return allPagesPromise('toponyms', false)
})
.then(() => {
console.log(' Done downloading toponyms!')
return allPagesPromise('sheets', false)
})
.then(() => {
console.log(' Done downloading sheets!')
callback()
})
.catch((err) => {
callback(err)
})
}
// ==================================== API ====================================
const dirs = {
current: __dirname,
}
download(dirs, (err) => {
console.log('Done!')
})
{
"name": "spacetime-etl-building-inspector-clay-test",
"version": "0.1.0",
"description": "Download and convert Building Inspector data to Space/Time NDJSON",
"main": "building-inspector.js",
"author": "Bert Spaan",
"license": "MIT",
"dependencies": {
"JSONStream": "^1.0.7",
"async": "^1.5.2",
"highland": "^2.6.0",
"request": "^2.69.0"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment