Skip to content

Instantly share code, notes, and snippets.

@davidmz
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save davidmz/74ea660def073168a952 to your computer and use it in GitHub Desktop.
Save davidmz/74ea660def073168a952 to your computer and use it in GitHub Desktop.
var findURLs = (function() {
var bracketBalance = function(text) {
var brackets = {
"(": 1,
")": -1,
"[": 100,
"]": -100,
"{": 10000,
"}": -10000
},
i, c, b = 0;
for (i = 0; i < text.length; i++) {
c = text.charAt(i);
if (c in brackets) b += brackets[c];
}
return b;
};
return function(text) {
var urlRe = /\b(((https?|ftp):\/\/|www\.)[^\s<>]+|([a-zа-я0-9][a-zа-я0-9-]*\.)+(ru|com|net|org|рф|xn--[a-z0-9]+)(?![a-zа-я0-9-])[^\s<>]*)/ig;
var finalPuncts = /[\x21-\x2e\x3a-\x3f\x5b-\x60\x7b-\x7e\u2026]+$/; // Base latin punctuation except '/' include ellipsis
var found, founds = [];
while ((found = urlRe.exec(text)) !== null) {
founds.push({
match: found[0],
pos: found.index,
withProtocol: !!found[2]
});
}
return founds
.map(function(f) {
// tail punctuation trim & bracket match
var m = finalPuncts.exec(f.match);
if (m === null) return f;
var fin = m[0];
if (!/[)}\]]/.test(fin)) {
// no closes brackets, just cut it all
f.match = f.match.substr(0, f.match.length - fin.length);
return f;
}
var b = bracketBalance(f.match.substr(0, f.match.length - fin.length));
if (b === 0) {
// brackets balanced, just cut it all
f.match = f.match.substr(0, f.match.length - fin.length);
return f;
}
m = /[^)}\]]+$/.exec(fin);
if (m !== null) {
// trim non-brackets
fin = fin.substr(0, fin.length - m[0].length);
f.match = f.match.substr(0, f.match.length - m[0].length);
}
for (var i = 0; i < fin.length; i++) {
b += bracketBalance(fin.charAt(i));
if (b === 0) {
f.match = f.match.substr(0, f.match.length - fin.length + i + 1);
return f;
}
}
return f;
})
.filter(function(f) {
urlRe.lastIndex = 0;
return urlRe.test(f.match);
})
.map(function(f) {
// url-s
f.url = f.withProtocol ? f.match : "http://" + f.match;
delete f.withProtocol;
return f;
});
return founds;
};
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment