Created
June 11, 2013 12:59
-
-
Save mmgj/5756597 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.ctrloptcmd.string { | |
/** | |
* Copyright 2009 Martin Jacobsen. | |
* | |
* @langversion ActionScript 3.0 | |
* @playerversion Flash 10.0 | |
* | |
* @author Martin Jacobsen | |
* @since 2009-02-02 | |
* @version 1.0 | |
* @description : Utility class for finding, validating and transforming URLs. | |
* | |
* Several URLParsers exist, but those I found were all too generic, | |
* mainly by being too restrictive (only accepting URLs with 'http' and .com | .org.| .net) | |
* or too lax (accepting .egg, .bacon and so forth). Hence this class. | |
* | |
* The URLParsing logic here is a unholy matrimony of RegExp and String utilities. | |
* The ActionScript implementation of RegExp leaves something to be desired, and I decided | |
* to store the TLDs in a Vector partly because I've been told that running through a Vector | |
* is more effective than using RegExp when you're looking for exact matches and partly | |
* because I just couldn't get the RegExp to work when it reached a certain length... | |
* Might just be my incompetence. | |
* | |
* The usage is pretty self-explanatory, but to sum it up; | |
* | |
* var validator : URLValidator = new URLValidator(); | |
* validator.validate("http://google.com") // returns true; | |
* validator.validate("google.com") // returns true unless protocolOptional is set to false; | |
* | |
* var a:Array = validator.find(longStringWithScatteredURLs) // returns an Array of objects | |
* a[0].url // the URL. | |
* a[0].startIndex // the startIndex of the URL in the text | |
* a[0].endIndex // you get it... | |
* | |
* validator.tag(longStringWithScatteredURLs) // returns the original string | |
* // with all URLs neatly <a href'ed | |
* // optionally add tags and substitute text. | |
* | |
* | |
* @license: Do what thou wilt shall be the whole of the law. (But, hey: Credit is always welcome.) | |
* | |
*/ | |
public class URLValidator { | |
private var urlStructure : RegExp; | |
private var domainList : Vector.<String>; | |
private var legal : Vector.<String>; | |
public var trimHotChars : Boolean = true; | |
public var protocolOptional : Boolean = true; | |
public function URLValidator () { | |
createRules(); | |
} | |
public function validate(stringToValidate:String) : Boolean { | |
var isURL : Boolean = false; | |
stringToValidate.toLowerCase(); | |
if(domainsValid(stringToValidate)) { | |
if (urlStructure.exec(stringToValidate) != null && | |
stringToValidate.length == urlStructure.exec(stringToValidate)[0].length){ | |
isURL = true; | |
} | |
} | |
return isURL; | |
} | |
public function findValid(stringToValidate:String) : Boolean { | |
var hasURL : Boolean = false; | |
stringToValidate.toLowerCase(); | |
if(domainsValid(stringToValidate)) { | |
if (urlStructure.exec(stringToValidate) != null){ | |
hasURL = true; | |
} | |
} | |
return hasURL; | |
} | |
public function find(stringToSearch:String) : Array { | |
var subToSearch : String = stringToSearch; | |
var urlArray : Array = []; | |
var searchPos : uint = 0; | |
while(subToSearch.length > 3){ | |
if (urlStructure.exec(subToSearch) != null){ | |
var theURL : String = urlStructure.exec(subToSearch)[0]; | |
if(validate(theURL)){ | |
if(trimHotChars) theURL = trim(theURL); | |
var o:Object = new Object; | |
o.url = theURL; | |
o.startIndex = stringToSearch.indexOf(theURL); | |
o.endIndex = o.startIndex + theURL.length; | |
urlArray.push(o); | |
} | |
var n : uint = subToSearch.indexOf(theURL) + theURL.length; | |
subToSearch = subToSearch.substring(n); | |
} else { | |
subToSearch = ""; | |
} | |
} | |
function trim(s:String) : String { | |
for (var i:int = s.length; i > 0; i++){ | |
if(s.substr(s.length-1).match(/[\.\)?!\"",]/) != null){ | |
s = s.slice(0,s.length-1); | |
} else { | |
break; | |
} | |
} | |
return s | |
} | |
return urlArray; | |
} | |
public function tag( stringToSearch : String, | |
startTags : String = "", | |
endTags : String = "", | |
linkText : String = null) : String { | |
var urls:Array = find(stringToSearch); | |
for each (var urlObj:Object in urls){ | |
var theURL : String; | |
var originalURL: String; | |
theURL = originalURL = urlObj.url; | |
var link : String = ""; | |
var start : String = startTags + "<a href='"; | |
var end : String = "</a>" + endTags; | |
if(theURL.search(/((http|https|ftp):\/\/)/) < 0){ | |
theURL = "http://" + theURL; | |
} | |
if (linkText) link = linkText; | |
else link = originalURL; | |
theURL = start + theURL + "'>" + link + end; | |
stringToSearch = stringToSearch.replace(originalURL, theURL); | |
} | |
return stringToSearch; | |
} | |
private function domainsValid(hasDomainString:String) : Boolean { | |
var hasDomain : Boolean = false; | |
for each(var item:String in domainList){ | |
var tld : RegExp = new RegExp("(\\"+item+")"); | |
if (hasDomainString.search(tld) >-1){ | |
var nextChar : String; | |
nextChar = hasDomainString.substr(hasDomainString.search(tld) + item.length, 1); | |
hasDomain = true; | |
/*if(isLegal(nextChar)){ | |
hasDomain = true; //FIXME! | |
break; | |
}*/ | |
} | |
} | |
function isLegal(q:String) : Boolean { | |
var isLegit : Boolean = false; | |
for each (var legalChar:String in legal){ | |
if(q == legalChar){ | |
isLegit = true; | |
break; | |
} | |
} | |
return isLegit; | |
} | |
return hasDomain; | |
} | |
public function createRules() : void { | |
if(protocolOptional) | |
urlStructure = /(((http|https|ftp):\/\/)?([-a-z0-9]*\.+)?[-a-zA-Z0-9]*\.[-a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/ | |
else | |
urlStructure = /(((http|https|ftp):\/\/)([-a-z0-9]*\.+)?[a-zA-Z0-9]*\.[a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/ | |
domainList = new Vector.<String>(); | |
domainList.push (".ac",".ad",".ae",".aero",".af",".ag",".ai",".al",".am",".an",".ao",".aq",".ar",".arpa",".as",".asia",".at",".au",".aw",".ax",".az",".ba",".bb",".bd",".be",".bf",".bg",".bh",".bi",".biz",".bj",".bm",".bn",".bo",".br",".bs",".bt",".bv",".bw",".by",".bz",".ca",".cat",".cc",".cd",".cf",".cg",".ch",".ci",".ck",".cl",".cm",".cn",".com",".coop",".cr",".cu",".cv",".cx",".cy",".cz",".de",".dj",".dk",".dm",".do",".dz",".ec",".edu",".ee",".eg",".er",".es",".et",".eu",".fi",".fj",".fk",".fm",".fo",".fr",".ga",".gb",".gd",".ge",".gf",".gg",".gh",".gi",".gl",".gm",".gn",".gov",".gp",".gq",".gr",".gs",".gt",".gu",".gw",".gy",".hk",".hm",".hn",".hr",".ht",".hu",".id",".ie",".il",".im",".in",".info",".int",".io",".iq",".ir",".is",".it",".je",".jm",".jo",".jobs",".jp",".ke",".kg",".kh",".ki",".km",".kn",".kp",".kr",".kw",".ky",".kz",".la",".lb",".lc",".li",".lk",".lr",".ls",".lt",".lu",".lv",".ly",".ma",".mc",".md",".me",".mg",".mh",".mil",".mk",".ml",".mm",".mn",".mo",".mobi",".mp",".mq",".mr",".ms",".mt",".mu",".museum",".mv",".mw",".mx",".my",".mz",".na",".name",".nc",".net",".nf",".ng",".ni",".nl",".no",".np",".nr",".nu",".nz",".om",".org",".pa",".pe",".pf",".pg",".ph",".pk",".pl",".pm",".pn",".pr",".pro",".ps",".pt",".pw",".py",".qa",".re",".ro",".rs",".ru",".rw",".sa",".sb",".sc",".sd",".se",".sg",".sh",".si",".sj",".sk",".sl",".sm",".sn",".so",".sr",".st",".su",".sv",".sy",".sz",".tc",".td",".tel",".tf",".tg",".th",".tj",".tk",".tl",".tm",".tn",".to",".tp",".tr",".travel",".tt",".tv",".tw",".tz",".ua",".ug",".uk",".us",".uy",".uz",".va",".vc",".ve",".vg",".vi",".vn",".vu",".wf",".ws",".ye",".yt",".yu",".za",".zm",".zw"); | |
domainList.sort(strLenSort); | |
function strLenSort(x:String,y:String) : Number { | |
if (x.length > y.length){ | |
return -1; | |
} else { | |
return 1; | |
} | |
} | |
legal = new Vector.<String>(); | |
legal.push(" ","/","~","*","?",".",",",";",":","-","%","=","_","$","&","'","@","!","+",""); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment