View index.js
var socket = dgram.createSocket({ type: 'udp4', reuseAddr: true })
socket.bind(5004, function () {
socket.addMembership('239.255.42.42', getIpForInterface('en2'))
socket.setMulticastTTL(255)
})
socket.on('message', function (m) {
// m is a buffer
})
View readme.md

for ubuntu, to set up a dynamic dns service that tells you what the external ip of some machine is

  • npm install dat lil-pids run-every add-to-systemd -g
  • mkdir ipdat; cd ipdat; dat create; cd ..;
  • edit file services with this:
cd ipdat && dat sync
cd ipdat && run-every 3600 curl ipinfo.io/ip > ip.txt
View ftp-servers.txt
901441 "rockyftp.cr.usgs.gov"
259 "ghrc.nsstc.nasa.gov"
185 "acdisc.gsfc.nasa.gov"
158 "ftp2.census.gov"
119 "podaac-ftp.jpl.nasa.gov"
73 "gpm.nsstc.nasa.gov"
71 "airbornescience.nsstc.nasa.gov"
65 "hydro1.sci.gsfc.nasa.gov"
56 "ftp.nhtsa.dot.gov"
53 "measures.gsfc.nasa.gov"
View index.sh
#!/bin/sh
mkdir bags
nugget "https://www.datarefuge.org/api/3/action/package_search?rows=1000" -o datarefuge.json
cat datarefuge.json | jsonfilter result.results.*.resources.*.url | grep bag | xargs nugget -c -d bags
View readme.md
~/Desktop 🐈  cat results-opendataphilly.json | jsonfilter url | awk -F/ '{print $1}' | sort | uniq -c | sort -rn
1183 "http:
466 "https:
36 "ftp:

~/Desktop 🐈  cat results-opendataphilly.json | jsonfilter status | sort | uniq -c | sort -rn
1575 200
  35 404
  10 500
View datagovmetadata.json
{"help": "https://catalog.data.gov/api/3/action/help_show?name=package_search", "success": true, "result": {"count": 48, "sort": "views_recent desc", "facets": {}, "results": [{"license_title": "License not specified", "maintainer": "New Media", "relationships_as_object": [], "private": false, "maintainer_email": "newmedia@whitehouse.gov", "num_tags": 5, "id": "59694770-b6b6-4ae0-a4b9-4ae69c0be2f6", "metadata_created": "2016-07-02T10:06:26.199575", "metadata_modified": "2016-07-02T10:06:26.199575", "author": null, "author_email": null, "state": "active", "version": null, "creator_user_id": "47303a9e-1187-4290-85a3-1fc02dc49e4a", "type": "dataset", "resources": [{"cache_last_updated": null, "package_id": "59694770-b6b6-4ae0-a4b9-4ae69c0be2f6", "webstore_last_updated": null, "id": "3a8a0ad1-19e7-4153-bb2f-d70cf88aaaf8", "size": null, "state": "active", "hash": "", "description": "", "format": "CSV", "tracking_summary": {"total": 32, "recent": 1}, "last_modified": null, "url_type": null, "no_real_name": "True",
View readme.md
View dlcollection.js
// i noticed even though the counts for collection on the /detail page match the count i get back
// from the /advancedsearch results, the actual count after deduping is much lower (e.g.
// for a 350 item collection i only got ~250 unique items after deduping)
// so maybe that means this script is wrong? or the count that comes back from advancedsearch is wrong? not sure
var request = require('request')
var url = 'http://archive.org/advancedsearch.php?q=collection%3A' + collection + '&fl%5B%5D=identifier&sort%5B%5D=&sort%5B%5D=&sort%5B%5D=&rows=50&output=json'
var page = 1
View getissues.js
var fs = require('fs')
var request = require('request')
request({url: 'https://api.github.com/repos/daniellecrobinson/data-rescue-pdx/issues', json: true, headers: {'user-agent': 'nodejs'}}, function (err, resp, json) {
console.log(json.length)
var bod = ''
json.forEach(function (j) { bod += JSON.stringify(j) + '\n' })
fs.writeFile('./issues.json', bod)
})
View readme.md

line count of all files in a tree

( find ./ -type f -print0 | xargs -0 cat ) | wc -l

convert cdx to ndjson

ls cdx/*.gz | xargs -I {} sh -c "cat {} | gunzip-maybe | csv-parser -s ' ' -h 'N b a m s k r M S V g'" | sort | uniq > cdx.json