Last active
August 29, 2015 14:06
-
-
Save rxaviers/87e089c35d46fd3a1492 to your computer and use it in GitHub Desktop.
cldr-data: npm (custom fetch) approach (See https://github.com/rxaviers/cldrjs/issues/18#issuecomment-56438757)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A simplified HTTP request client that accepts glob patterns v@VERSION | |
* | |
* Copyright 2013 Rafael Xavier de Souza | |
* Released under the MIT license | |
* https://github.com/rxaviers/request-glob/blob/master/LICENSE-MIT | |
* | |
* Date: @DATE | |
*/ | |
var minimatch = require("minimatch"); | |
var path = require("path"); | |
var request = require("request"); | |
var url = require("url") | |
function debug() { | |
console.log.apply(console.log, arguments); | |
} | |
function globRequest(initialUrl, globbedUrl, options, callback) { | |
var found = 0; | |
var urls = [initialUrl]; | |
if (!callback) { | |
callback = options; | |
options = undefined; | |
} | |
function crawl(baseUrl, body) { | |
body.replace(/a href="([^"]*)"/g, function(match, localPath) { | |
var url; | |
// Skip "../". | |
if (localPath === "../") { | |
return; | |
} | |
url = urlJoin(baseUrl, localPath); | |
// Skip anything that doesn't lead to matching glob. | |
if (!mayMatchGlob(url)) { | |
return; | |
} | |
// Add url to urls. | |
debug("+ ", url); | |
urls.push(url); | |
}); | |
} | |
function get(url) { | |
debug("GET", url); | |
var thisRequest = _request(url, options, function(error, response, body) { | |
if(error) { | |
debug("Whops, error!"); | |
return callback(error); | |
} | |
if ((/^text\/html/).test(response.headers["content-type"])) { | |
debug("Crawl"); | |
crawl(url, body); | |
next(); | |
} else { | |
if (!matchGlob(url)) { | |
debug("Content skipped"); | |
return next(); | |
} | |
debug("Got content"); | |
found++; | |
thisRequest.on("end", next); | |
return callback(error, thisRequest, response, body, urls.length); | |
} | |
}); | |
} | |
function matchGlob(url) { | |
return minimatch(url, globbedUrl); | |
} | |
function mayMatchGlob(url) { | |
var _globbedUrl, minLength; | |
// If globbedUrl contains "**", any url up to the initial ** may match glob. | |
// For example: both /a/b/c and /a/b/c/d may match /a/b/**/x, because both | |
// match /a/b/**. | |
if ((/\*\*/).test(globbedUrl)) { | |
_globbedUrl = globbedUrl.split("**")[0] + "**"; | |
} else { | |
_globbedUrl = globbedUrl; | |
} | |
// Test url and globbedUrl against each other until their minimum common | |
// path length are reached. | |
url = url.split("/"); | |
_globbedUrl = _globbedUrl.split("/"); | |
minLength = Math.min(url.length, _globbedUrl.length); | |
return minimatch( | |
url.slice(0, minLength).join("/"), | |
_globbedUrl.slice(0, minLength).join("/") | |
); | |
} | |
function next() { | |
var url = urls.shift(); | |
if (url) { | |
get(url); | |
} else if (!found) { | |
return callback(new Error("Nothing found")); | |
} | |
} | |
function _request(url, options, callback) { | |
if (options) { | |
return request(url, options, callback); | |
} else { | |
return request(url, callback); | |
} | |
} | |
function urlJoin(a, b) { | |
a = url.parse(a); | |
a.pathname = path.join(a.pathname, b).replace(/\/$/, ""); | |
return url.format(a); | |
} | |
next(); | |
} | |
module.exports = globRequest; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
requestGlob = require("./request-glob") | |
requestGlob( | |
// Initial url to start crawling | |
"http://www.unicode.org/repos/cldr-aux/json/26", | |
// Glob pattern to find. Examples: | |
// - "http://www.unicode.org/repos/cldr-aux/json/26/main/en/numbers.json" | |
// - "http://www.unicode.org/repos/cldr-aux/json/26/main/*/numbers.json" | |
// - "http://www.unicode.org/repos/cldr-aux/json/26/**/numbers.json" | |
"http://www.unicode.org/repos/cldr-aux/json/26/main/*/numbers.json", | |
// Callback function | |
function(error, request, response, body, remaining) { | |
if (error) { | |
return console.log("Whops", error.message); | |
} | |
console.log("=-=-=-= (", body.length, ") +", remaining); | |
} | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To run this POC, execute: