Created
October 2, 2017 23:29
-
-
Save remasis/87ae71df0f5ce7ae5a78ebc3725e3e9a to your computer and use it in GitHub Desktop.
FCC comment scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Yes, this scraper sucks but it worked and we have 44 gigs of comments | |
var https = require('https'); | |
var fs = require('fs'); | |
var pagesize = 100; | |
var offset = 0; | |
var stop = 300; | |
var outfile = fs.createWriteStream("./fcc-comments-" + offset + ".json"); | |
var url = "https://ecfsapi.fcc.gov/filings?proceedings.name=17-108&sort=date_disseminated,ASC&"; | |
//limit=3&offset=0 | |
function getChunk() { | |
https.get(url + "limit=" + pagesize + "&offset=" + offset, function(res) { | |
var data = ""; | |
res.on('data', function(chunk) { | |
data += chunk; | |
}); | |
res.on('end', function() { | |
var comments; | |
try { | |
comments = JSON.parse(data); | |
} catch (e) { | |
// console.error(data); | |
} | |
if (comments === undefined || comments.filings === undefined) { | |
console.error("NO MORE RESULTS"); | |
console.error("Offset", offset); | |
console.error("pagesize", pagesize); | |
process.stdout.write("stopped at offset", offset); | |
process.exit(); | |
} | |
comments.filings.forEach(function(filing, ind, arr) { | |
outfile.write(JSON.stringify(filing) + "\n"); | |
// process.stdout.write(filing.confirmation_number + "\n"); | |
}); | |
process.stdout.write("got:", comments.filings.length, 'of range', offset, "-", offset + pagesize - 1); | |
offset += pagesize; | |
if (offset < stop) { | |
getChunk(); | |
} | |
}); | |
}); | |
} | |
getChunk(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment