Skip to content

Instantly share code, notes, and snippets.

@remasis
Created October 2, 2017 23:29
Show Gist options
  • Save remasis/87ae71df0f5ce7ae5a78ebc3725e3e9a to your computer and use it in GitHub Desktop.
Save remasis/87ae71df0f5ce7ae5a78ebc3725e3e9a to your computer and use it in GitHub Desktop.
FCC comment scraper
//Yes, this scraper sucks but it worked and we have 44 gigs of comments
var https = require('https');
var fs = require('fs');
var pagesize = 100;
var offset = 0;
var stop = 300;
var outfile = fs.createWriteStream("./fcc-comments-" + offset + ".json");
var url = "https://ecfsapi.fcc.gov/filings?proceedings.name=17-108&sort=date_disseminated,ASC&";
//limit=3&offset=0
function getChunk() {
https.get(url + "limit=" + pagesize + "&offset=" + offset, function(res) {
var data = "";
res.on('data', function(chunk) {
data += chunk;
});
res.on('end', function() {
var comments;
try {
comments = JSON.parse(data);
} catch (e) {
// console.error(data);
}
if (comments === undefined || comments.filings === undefined) {
console.error("NO MORE RESULTS");
console.error("Offset", offset);
console.error("pagesize", pagesize);
process.stdout.write("stopped at offset", offset);
process.exit();
}
comments.filings.forEach(function(filing, ind, arr) {
outfile.write(JSON.stringify(filing) + "\n");
// process.stdout.write(filing.confirmation_number + "\n");
});
process.stdout.write("got:", comments.filings.length, 'of range', offset, "-", offset + pagesize - 1);
offset += pagesize;
if (offset < stop) {
getChunk();
}
});
});
}
getChunk();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment