View index.js
var path = require('path')
var fs = require('fs')
var ytdl = require('youtube-dl')
function playlist (url) {
var video = ytdl(url, ['--audio-format=mp3'])
video.on('error', function error (err) {
console.log('error 2:', err)
})
View index.js
// $ node read.js nt.00.nin
var fs = require('fs')
var int53 = require('int53')
f = process.argv[2]
function read (fd, start, end, cb) {
var len = end - start + 1
var buf = new Buffer(len)
fs.read(fd, buf, 0, len, start, function (err) {
cb(err, buf)
View Containerfile
ENV NSPAWN_BOOTSTRAP_IMAGE_SIZE=10GB
FROM ubuntu:xenial
# set unlimited bash history
# nspawn needs resolv.conf to be set up for internet to work
# password gets changed so we can login later
RUN mkdir /usr/local/anacapa && \
cd /usr/local/anacapa && \
echo "export HISTFILESIZE=" >> .bashrc && \
echo "export HISTSIZE=" >> .bashrc && \
View index.js
var $ = require('cheerio')
var fs = require('fs')
var walker = require('folder-walker')
var transform = require('parallel-transform')
var ndjson = require('ndjson')
var walk = walker('./pageblobs') // generated by abstract-blob-store
var scraper = transform(10, scrape)
var out = ndjson.serialize()
View index.js
var hyperdb = require('hyperdb')
var hyperdiscovery = require('hyperdiscovery')
var npmkey = '0f8a60595af5387d52b053af4a8a4aecd5d6d3799741c3993916798e71ea0730'
var db = hyperdb('./npm.db', npmkey, {sparse: true, valueEncoding: 'json'})
db.on('ready', function () {
var swarm = hyperdiscovery(db, {live: true})
db.once('remote-update', function () {
db.get('/modules/aws.js', function (err, data) {
View index.js
var fs = require('fs')
var request = require('request')
var through = require('through2')
var ndjson = require('ndjson')
var once = require('once')
var pump = require('pump')
var concat = require('concat-stream')
var parallel = require('parallel-transform')
var hyperdb = require('hyperdb')
var db = hyperdb('./npm.db', {valueEncoding: 'json'})
View index.sh
// data from geojson lines file from https://mapzen.com/data/metro-extracts/
cat portland_oregon_osm_line.geojson | jsonfilter features.* > lines.ndjson
cat lines.ndjson | jsonfilter --match="this.properties.bicycle && this.properties.bicycle !== 'no'" > sharedpaths.json
cat lines.ndjson | grep "cycleway" >> sharedpaths.json
cat sharedpaths.json | sort | uniq > dedupe.json
mv dedupe.json sharedpaths.json
cat sharedpaths.json | ndjson-reduce | ndjson-map '{type: "FeatureCollection", features: d}' > sharedpaths.geojson
mkdir shp
cd shp
ogr2ogr -f "ESRI Shapefile" sharedpaths.shp ../sharedpaths.geojson OGRGeoJSON
View index.txt
379490 referenceworks.brillonline.com
377682 doi.apa.org
244045 primarysources.brillonline.com
189587 f1000.com
106769 www.iucnredlist.org
78961 www.e-enlightenment.com
67194 doi.namesforlife.com
20335 www.degruyter.com
17940 www.icpsr.umich.edu
17044 www.scivee.tv
View index.txt
672055 www.ccdc.cam.ac.uk
618996 figshare.com
493410 rgdoi.net
487454 plutof.ut.ee
378396 ba.e-pics.ethz.ch
376822 retro.seals.ch
373193 www.die-bonn.de
358476 doi.pangaea.de
313951 www.gbif.org
237629 www.hepdata.net
View index.sh
npm install gunzip-maybe xml-json jsonfilter nugget -g
curl "https://search.datacite.org/sitemaps/sitemap.xml.gz" | gunzip-maybe | xml-json sitemapindex | jsonfilter sitemap.*.loc | xargs nugget -d datacite
ls datacite | xargs -I {} sh -c "cat datacite/{} | gunzip-maybe | xml-json urlset | jsonfilter url.*.loc | grep works" >> urls.txt