NeuronQ/callbacks_async_scrape.js

## callbacks_async_scrape.js
// callbacks_async_scrape.js (tested with node 11.3)
const http = require('http');
const https = require('https');

const fetchUrl = (url, onSuccess, onError) => {
    console.time(`fetchUrl(${url})`);
    (url.indexOf('https') === 0 ? https : http).get(url, resp => {
        let html = '';
        resp.on('data', chunk => html += chunk);
        resp.on('end', () => {
            console.timeEnd(`fetchUrl(${url})`);
            onSuccess(html);
        });
    }).on('error', onError);
}

const scrapeData = html => {
    const re = /href="([^"]+)"/g;
    const hrefs = [];
    let m;
    while (m = re.exec(html)) hrefs.push(m[1]);
    return hrefs;
};

const urls = [
    "http://neverssl.com/",
    "https://www.ietf.org/rfc/rfc2616.txt",
    "https://en.wikipedia.org/wiki/Asynchronous_I/O",
];
const extactedData = {};
let nUrlsProcessed = 0;

console.time("elapsed");
for (url of urls) {
    fetchUrl(
        url,
        html => {
            nUrlsProcessed++;
            extactedData[url] = scrapeData(html);
            if (nUrlsProcessed <= 0) {
                console.log("> extracted data:", extactedData)
                console.timeEnd("elapsed");
            }
        },
        () => nUrlsProcessed++
    );
}
	// callbacks_async_scrape.js (tested with node 11.3)
	const http = require('http');
	const https = require('https');

	const fetchUrl = (url, onSuccess, onError) => {
	console.time(`fetchUrl(${url})`);
	(url.indexOf('https') === 0 ? https : http).get(url, resp => {
	let html = '';
	resp.on('data', chunk => html += chunk);
	resp.on('end', () => {
	console.timeEnd(`fetchUrl(${url})`);
	onSuccess(html);
	});
	}).on('error', onError);
	}

	const scrapeData = html => {
	const re = /href="([^"]+)"/g;
	const hrefs = [];
	let m;
	while (m = re.exec(html)) hrefs.push(m[1]);
	return hrefs;
	};

	const urls = [
	"http://neverssl.com/",
	"https://www.ietf.org/rfc/rfc2616.txt",
	"https://en.wikipedia.org/wiki/Asynchronous_I/O",
	];
	const extactedData = {};
	let nUrlsProcessed = 0;

	console.time("elapsed");
	for (url of urls) {
	fetchUrl(
	url,
	html => {
	nUrlsProcessed++;
	extactedData[url] = scrapeData(html);
	if (nUrlsProcessed <= 0) {
	console.log("> extracted data:", extactedData)
	console.timeEnd("elapsed");
	}
	},
	() => nUrlsProcessed++
	);
	}