Last active
August 29, 2015 14:23
-
-
Save davidmz/74ea660def073168a952 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var findURLs = (function() { | |
var bracketBalance = function(text) { | |
var brackets = { | |
"(": 1, | |
")": -1, | |
"[": 100, | |
"]": -100, | |
"{": 10000, | |
"}": -10000 | |
}, | |
i, c, b = 0; | |
for (i = 0; i < text.length; i++) { | |
c = text.charAt(i); | |
if (c in brackets) b += brackets[c]; | |
} | |
return b; | |
}; | |
return function(text) { | |
var urlRe = /\b(((https?|ftp):\/\/|www\.)[^\s<>]+|([a-zа-я0-9][a-zа-я0-9-]*\.)+(ru|com|net|org|рф|xn--[a-z0-9]+)(?![a-zа-я0-9-])[^\s<>]*)/ig; | |
var finalPuncts = /[\x21-\x2e\x3a-\x3f\x5b-\x60\x7b-\x7e\u2026]+$/; // Base latin punctuation except '/' include ellipsis | |
var found, founds = []; | |
while ((found = urlRe.exec(text)) !== null) { | |
founds.push({ | |
match: found[0], | |
pos: found.index, | |
withProtocol: !!found[2] | |
}); | |
} | |
return founds | |
.map(function(f) { | |
// tail punctuation trim & bracket match | |
var m = finalPuncts.exec(f.match); | |
if (m === null) return f; | |
var fin = m[0]; | |
if (!/[)}\]]/.test(fin)) { | |
// no closes brackets, just cut it all | |
f.match = f.match.substr(0, f.match.length - fin.length); | |
return f; | |
} | |
var b = bracketBalance(f.match.substr(0, f.match.length - fin.length)); | |
if (b === 0) { | |
// brackets balanced, just cut it all | |
f.match = f.match.substr(0, f.match.length - fin.length); | |
return f; | |
} | |
m = /[^)}\]]+$/.exec(fin); | |
if (m !== null) { | |
// trim non-brackets | |
fin = fin.substr(0, fin.length - m[0].length); | |
f.match = f.match.substr(0, f.match.length - m[0].length); | |
} | |
for (var i = 0; i < fin.length; i++) { | |
b += bracketBalance(fin.charAt(i)); | |
if (b === 0) { | |
f.match = f.match.substr(0, f.match.length - fin.length + i + 1); | |
return f; | |
} | |
} | |
return f; | |
}) | |
.filter(function(f) { | |
urlRe.lastIndex = 0; | |
return urlRe.test(f.match); | |
}) | |
.map(function(f) { | |
// url-s | |
f.url = f.withProtocol ? f.match : "http://" + f.match; | |
delete f.withProtocol; | |
return f; | |
}); | |
return founds; | |
}; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment