View download.js
var request = require('request')
var link = process.argv[2]
var start = process.argv[3]
if (!start) start = 0
else start = +start
dl(link, start, function (err) {
if (err) throw err
console.error('All done')
})
View links.json
View epaurls.txt
This file has been truncated, but you can view the full file.
639 "https://www.epa.gov/enviroatlas/enviroatlas-data"
276 "https://enviroatlas.epa.gov/arcgis/rest/services"
194 "https://enviroatlas.epa.gov/arcgis/rest/services/Communities"
142 "https://edg.epa.gov/metadata/"
118 "https://www3.epa.gov/enviro/html/fii/downloads/state_files/Facility%20State%20File%20Documentation.pdf"
54 "http://www.epa.gov/geospatial/"
41 "https://edg.epa.gov/clipship/"
34 "https://epa.maps.arcgis.com/home/webmap/viewer.html?&url=https%3A%2F%2Fenviroatlas.epa.gov%2Farcgis%2Frest%2Fservices%2FSupplemental%2FConnectivity_AllCommunities%2FMapServer"
34 "https://enviroatlas.epa.gov/arcgis/rest/services/Supplemental/Connectivity_AllCommunities/MapServer/kml/mapImage.kmz"
View index.js
// modified version of http://va2577.github.io/post/51/ to produce ndjson
const sqlite3 = require('sqlite3').verbose();
const db = new sqlite3.Database('../imessage.sqlite')
const Iconv = require('iconv').Iconv;
const sjis = new Iconv('UTF-8', 'Shift_JIS//TRANSLIT//IGNORE');
const filename = './sms.csv';
db.serialize(() => {
const sql = [];
View readme.md

proposal 1

CKAN mirror

deploy a full fledged ckan mirror as a live database backed web service and load it up with all the data.gov metadata

pros

  • could mirror the data.gov functionality completely, including all the rendered views
  • brings the full ckan feature set including search and user accounts and plugins
View index.js
var socket = dgram.createSocket({ type: 'udp4', reuseAddr: true })
socket.bind(5004, function () {
socket.addMembership('239.255.42.42', getIpForInterface('en2'))
socket.setMulticastTTL(255)
})
socket.on('message', function (m) {
// m is a buffer
})
View readme.md

for ubuntu, to set up a dynamic dns service that tells you what the external ip of some machine is

  • npm install dat lil-pids run-every add-to-systemd -g
  • mkdir ipdat; cd ipdat; dat create; cd ..;
  • edit file services with this:
cd ipdat && dat sync
cd ipdat && run-every 3600 curl ipinfo.io/ip > ip.txt
View ftp-servers.txt
901441 "rockyftp.cr.usgs.gov"
259 "ghrc.nsstc.nasa.gov"
185 "acdisc.gsfc.nasa.gov"
158 "ftp2.census.gov"
119 "podaac-ftp.jpl.nasa.gov"
73 "gpm.nsstc.nasa.gov"
71 "airbornescience.nsstc.nasa.gov"
65 "hydro1.sci.gsfc.nasa.gov"
56 "ftp.nhtsa.dot.gov"
53 "measures.gsfc.nasa.gov"
View index.sh
#!/bin/sh
mkdir bags
nugget "https://www.datarefuge.org/api/3/action/package_search?rows=1000" -o datarefuge.json
cat datarefuge.json | jsonfilter result.results.*.resources.*.url | grep bag | xargs nugget -c -d bags