mmgj/URLValidator.as

## URLValidator.as
package com.ctrloptcmd.string {

  	/**
		*	Copyright 2009 Martin Jacobsen.
		*
		*	@langversion ActionScript 3.0
		*	@playerversion Flash 10.0
		*
		*	@author Martin Jacobsen
		*	@since  2009-02-02
		*	@version 1.0
		*	@description :  Utility class for finding, validating and transforming URLs.
		*
		*	Several URLParsers exist, but those I found were all too generic,
		*	mainly by being too restrictive (only accepting URLs with 'http' and .com | .org.| .net)
		*	or too lax (accepting .egg, .bacon and so forth). Hence this class.
		*
		*	The URLParsing logic here is a unholy matrimony of RegExp and String utilities.
		*	The ActionScript implementation of RegExp leaves something to be desired, and I decided
		*	to store the TLDs in a Vector partly because I've been told that running through a Vector
		*	is more effective than using RegExp when you're looking for exact matches and partly
		*	because I just couldn't get the RegExp to work when it reached a certain length...
		*	Might just be my incompetence.
		*
		*	The usage is pretty self-explanatory, but to sum it up;
		*
		*	var validator : URLValidator = new URLValidator();
		*		validator.validate("http://google.com") // returns true;
		*		validator.validate("google.com") // returns true unless protocolOptional is set to false;
		*
		*		var a:Array = validator.find(longStringWithScatteredURLs) // returns an Array of objects
		*			a[0].url // the URL.
		*			a[0].startIndex // the startIndex of the URL in the text
		*			a[0].endIndex // you get it...
		*
		*		validator.tag(longStringWithScatteredURLs) // returns the original string
		*														// with all URLs neatly <a href'ed
		*														// optionally add tags and substitute text.
		*
		*
		*	@license: Do what thou wilt shall be the whole of the law. (But, hey: Credit is always welcome.)
		*
		*/


	public class URLValidator  {

		private var urlStructure 	: RegExp;
		private var domainList		: Vector.<String>;
		private var legal 			: Vector.<String>;

		public var trimHotChars		 : Boolean = true;
		public var protocolOptional : Boolean = true;


	public function URLValidator () {
		createRules();
	}


	public function validate(stringToValidate:String) : Boolean {
		var isURL : Boolean = false;
			stringToValidate.toLowerCase();
		if(domainsValid(stringToValidate)) {
			if (urlStructure.exec(stringToValidate) != null &&
			stringToValidate.length == urlStructure.exec(stringToValidate)[0].length){
				isURL = true;
			}
		}
		return isURL;
	}


	public function findValid(stringToValidate:String) : Boolean {
		var hasURL : Boolean = false;
			stringToValidate.toLowerCase();
		if(domainsValid(stringToValidate)) {
			if (urlStructure.exec(stringToValidate) != null){
				hasURL = true;
			}
		}
		return hasURL;
	}


	public function find(stringToSearch:String) : Array {
		var subToSearch : String = stringToSearch;
		var urlArray : Array = [];
		var searchPos : uint = 0;
			while(subToSearch.length > 3){
				if (urlStructure.exec(subToSearch) != null){
					var theURL : String = urlStructure.exec(subToSearch)[0];
					if(validate(theURL)){
						if(trimHotChars) theURL = trim(theURL);
						var o:Object = new Object;
							o.url = theURL;
							o.startIndex = stringToSearch.indexOf(theURL);
							o.endIndex = o.startIndex + theURL.length;
							urlArray.push(o);
					}
					var n : uint = subToSearch.indexOf(theURL) + theURL.length;
					subToSearch = subToSearch.substring(n);
				} else {
					subToSearch = "";
				}
			}

			function trim(s:String) : String {
				for (var i:int = s.length; i > 0; i++){
					if(s.substr(s.length-1).match(/[\.\)?!\"",]/) != null){
						s = s.slice(0,s.length-1);
					} else {
						break;
					}
				}
				return s
			}
		return urlArray;
	}

	public function tag( stringToSearch : String,
								startTags : String = "",
								endTags : String = "",
								linkText : String = null) : String {


		var urls:Array = find(stringToSearch);
			for each (var urlObj:Object in urls){
				var theURL		: String;
				var originalURL: String;
					theURL = originalURL = urlObj.url;

				var link 		: String	 = "";
				var start 		: String = startTags + "<a href='";
				var end 		: String = "</a>" + endTags;

					if(theURL.search(/((http|https|ftp):\/\/)/) < 0){
						theURL = "http://" + theURL;
					}

					if (linkText) link = linkText;
					else link = originalURL;

					theURL =  start + theURL + "'>" + link + end;
					stringToSearch = stringToSearch.replace(originalURL, theURL);
			}

		return stringToSearch;
	}


	private function domainsValid(hasDomainString:String) : Boolean {
		var hasDomain : Boolean = false;
		for each(var item:String in domainList){
			var tld : RegExp = new RegExp("(\\"+item+")");
			if (hasDomainString.search(tld) >-1){
				var nextChar : String;
				nextChar = hasDomainString.substr(hasDomainString.search(tld) + item.length, 1);
					hasDomain = true;
				/*if(isLegal(nextChar)){
					hasDomain = true;	              //FIXME!
					break;
					}*/
				}
			}

		function isLegal(q:String) : Boolean {
			var isLegit : Boolean = false;
			for each (var legalChar:String in legal){
			if(q == legalChar){
				isLegit = true;
				break;
				}
			}
			return isLegit;
		}
		return hasDomain;
	}


	public function createRules() : void {

		if(protocolOptional)
		urlStructure = /(((http|https|ftp):\/\/)?([-a-z0-9]*\.+)?[-a-zA-Z0-9]*\.[-a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/
		else
		urlStructure = /(((http|https|ftp):\/\/)([-a-z0-9]*\.+)?[a-zA-Z0-9]*\.[a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/

		domainList = new Vector.<String>();
		domainList.push (".ac",".ad",".ae",".aero",".af",".ag",".ai",".al",".am",".an",".ao",".aq",".ar",".arpa",".as",".asia",".at",".au",".aw",".ax",".az",".ba",".bb",".bd",".be",".bf",".bg",".bh",".bi",".biz",".bj",".bm",".bn",".bo",".br",".bs",".bt",".bv",".bw",".by",".bz",".ca",".cat",".cc",".cd",".cf",".cg",".ch",".ci",".ck",".cl",".cm",".cn",".com",".coop",".cr",".cu",".cv",".cx",".cy",".cz",".de",".dj",".dk",".dm",".do",".dz",".ec",".edu",".ee",".eg",".er",".es",".et",".eu",".fi",".fj",".fk",".fm",".fo",".fr",".ga",".gb",".gd",".ge",".gf",".gg",".gh",".gi",".gl",".gm",".gn",".gov",".gp",".gq",".gr",".gs",".gt",".gu",".gw",".gy",".hk",".hm",".hn",".hr",".ht",".hu",".id",".ie",".il",".im",".in",".info",".int",".io",".iq",".ir",".is",".it",".je",".jm",".jo",".jobs",".jp",".ke",".kg",".kh",".ki",".km",".kn",".kp",".kr",".kw",".ky",".kz",".la",".lb",".lc",".li",".lk",".lr",".ls",".lt",".lu",".lv",".ly",".ma",".mc",".md",".me",".mg",".mh",".mil",".mk",".ml",".mm",".mn",".mo",".mobi",".mp",".mq",".mr",".ms",".mt",".mu",".museum",".mv",".mw",".mx",".my",".mz",".na",".name",".nc",".net",".nf",".ng",".ni",".nl",".no",".np",".nr",".nu",".nz",".om",".org",".pa",".pe",".pf",".pg",".ph",".pk",".pl",".pm",".pn",".pr",".pro",".ps",".pt",".pw",".py",".qa",".re",".ro",".rs",".ru",".rw",".sa",".sb",".sc",".sd",".se",".sg",".sh",".si",".sj",".sk",".sl",".sm",".sn",".so",".sr",".st",".su",".sv",".sy",".sz",".tc",".td",".tel",".tf",".tg",".th",".tj",".tk",".tl",".tm",".tn",".to",".tp",".tr",".travel",".tt",".tv",".tw",".tz",".ua",".ug",".uk",".us",".uy",".uz",".va",".vc",".ve",".vg",".vi",".vn",".vu",".wf",".ws",".ye",".yt",".yu",".za",".zm",".zw");

		domainList.sort(strLenSort);
		function strLenSort(x:String,y:String) : Number {
			if (x.length > y.length){
				return -1;
			} else {
				return 1;
			}
		}

	legal = new Vector.<String>();
	legal.push(" ","/","~","*","?",".",",",";",":","-","%","=","_","$","&","'","@","!","+","");

	}
	}

}
	package com.ctrloptcmd.string {

	/**
	* Copyright 2009 Martin Jacobsen.
	*
	* @langversion ActionScript 3.0
	* @playerversion Flash 10.0
	*
	* @author Martin Jacobsen
	* @since 2009-02-02
	* @version 1.0
	* @description : Utility class for finding, validating and transforming URLs.
	*
	* Several URLParsers exist, but those I found were all too generic,
	* mainly by being too restrictive (only accepting URLs with 'http' and .com \| .org.\| .net)
	* or too lax (accepting .egg, .bacon and so forth). Hence this class.
	*
	* The URLParsing logic here is a unholy matrimony of RegExp and String utilities.
	* The ActionScript implementation of RegExp leaves something to be desired, and I decided
	* to store the TLDs in a Vector partly because I've been told that running through a Vector
	* is more effective than using RegExp when you're looking for exact matches and partly
	* because I just couldn't get the RegExp to work when it reached a certain length...
	* Might just be my incompetence.
	*
	* The usage is pretty self-explanatory, but to sum it up;
	*
	* var validator : URLValidator = new URLValidator();
	* validator.validate("http://google.com") // returns true;
	* validator.validate("google.com") // returns true unless protocolOptional is set to false;
	*
	* var a:Array = validator.find(longStringWithScatteredURLs) // returns an Array of objects
	* a[0].url // the URL.
	* a[0].startIndex // the startIndex of the URL in the text
	* a[0].endIndex // you get it...
	*
	* validator.tag(longStringWithScatteredURLs) // returns the original string
	* // with all URLs neatly <a href'ed
	* // optionally add tags and substitute text.
	*
	*
	* @license: Do what thou wilt shall be the whole of the law. (But, hey: Credit is always welcome.)
	*
	*/


	public class URLValidator {

	private var urlStructure : RegExp;
	private var domainList : Vector.<String>;
	private var legal : Vector.<String>;

	public var trimHotChars : Boolean = true;
	public var protocolOptional : Boolean = true;


	public function URLValidator () {
	createRules();
	}



	public function validate(stringToValidate:String) : Boolean {
	var isURL : Boolean = false;
	stringToValidate.toLowerCase();
	if(domainsValid(stringToValidate)) {
	if (urlStructure.exec(stringToValidate) != null &&
	stringToValidate.length == urlStructure.exec(stringToValidate)[0].length){
	isURL = true;
	}
	}
	return isURL;
	}


	public function findValid(stringToValidate:String) : Boolean {
	var hasURL : Boolean = false;
	stringToValidate.toLowerCase();
	if(domainsValid(stringToValidate)) {
	if (urlStructure.exec(stringToValidate) != null){
	hasURL = true;
	}
	}
	return hasURL;
	}


	public function find(stringToSearch:String) : Array {
	var subToSearch : String = stringToSearch;
	var urlArray : Array = [];
	var searchPos : uint = 0;
	while(subToSearch.length > 3){
	if (urlStructure.exec(subToSearch) != null){
	var theURL : String = urlStructure.exec(subToSearch)[0];
	if(validate(theURL)){
	if(trimHotChars) theURL = trim(theURL);
	var o:Object = new Object;
	o.url = theURL;
	o.startIndex = stringToSearch.indexOf(theURL);
	o.endIndex = o.startIndex + theURL.length;
	urlArray.push(o);
	}
	var n : uint = subToSearch.indexOf(theURL) + theURL.length;
	subToSearch = subToSearch.substring(n);
	} else {
	subToSearch = "";
	}
	}

	function trim(s:String) : String {
	for (var i:int = s.length; i > 0; i++){
	if(s.substr(s.length-1).match(/[\.\)?!\"",]/) != null){
	s = s.slice(0,s.length-1);
	} else {
	break;
	}
	}
	return s
	}
	return urlArray;
	}

	public function tag( stringToSearch : String,
	startTags : String = "",
	endTags : String = "",
	linkText : String = null) : String {


	var urls:Array = find(stringToSearch);
	for each (var urlObj:Object in urls){
	var theURL : String;
	var originalURL: String;
	theURL = originalURL = urlObj.url;

	var link : String = "";
	var start : String = startTags + "<a href='";
	var end : String = "</a>" + endTags;

	if(theURL.search(/((http\|https\|ftp):\/\/)/) < 0){
	theURL = "http://" + theURL;
	}

	if (linkText) link = linkText;
	else link = originalURL;

	theURL = start + theURL + "'>" + link + end;
	stringToSearch = stringToSearch.replace(originalURL, theURL);
	}

	return stringToSearch;
	}


	private function domainsValid(hasDomainString:String) : Boolean {
	var hasDomain : Boolean = false;
	for each(var item:String in domainList){
	var tld : RegExp = new RegExp("(\\"+item+")");
	if (hasDomainString.search(tld) >-1){
	var nextChar : String;
	nextChar = hasDomainString.substr(hasDomainString.search(tld) + item.length, 1);
	hasDomain = true;
	/*if(isLegal(nextChar)){
	hasDomain = true; //FIXME!
	break;
	}*/
	}
	}

	function isLegal(q:String) : Boolean {
	var isLegit : Boolean = false;
	for each (var legalChar:String in legal){
	if(q == legalChar){
	isLegit = true;
	break;
	}
	}
	return isLegit;
	}
	return hasDomain;
	}


	public function createRules() : void {

	if(protocolOptional)
	urlStructure = /(((http\|https\|ftp):\/\/)?([-a-z0-9]\.+)?[-a-zA-Z0-9]\.[-a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/
	else
	urlStructure = /(((http\|https\|ftp):\/\/)([-a-z0-9]\.+)?[a-zA-Z0-9]\.[a-zA-Z]+)([\/&\?=\.;%\+\$@~][\S]*)?/

	domainList = new Vector.<String>();
	domainList.push (".ac",".ad",".ae",".aero",".af",".ag",".ai",".al",".am",".an",".ao",".aq",".ar",".arpa",".as",".asia",".at",".au",".aw",".ax",".az",".ba",".bb",".bd",".be",".bf",".bg",".bh",".bi",".biz",".bj",".bm",".bn",".bo",".br",".bs",".bt",".bv",".bw",".by",".bz",".ca",".cat",".cc",".cd",".cf",".cg",".ch",".ci",".ck",".cl",".cm",".cn",".com",".coop",".cr",".cu",".cv",".cx",".cy",".cz",".de",".dj",".dk",".dm",".do",".dz",".ec",".edu",".ee",".eg",".er",".es",".et",".eu",".fi",".fj",".fk",".fm",".fo",".fr",".ga",".gb",".gd",".ge",".gf",".gg",".gh",".gi",".gl",".gm",".gn",".gov",".gp",".gq",".gr",".gs",".gt",".gu",".gw",".gy",".hk",".hm",".hn",".hr",".ht",".hu",".id",".ie",".il",".im",".in",".info",".int",".io",".iq",".ir",".is",".it",".je",".jm",".jo",".jobs",".jp",".ke",".kg",".kh",".ki",".km",".kn",".kp",".kr",".kw",".ky",".kz",".la",".lb",".lc",".li",".lk",".lr",".ls",".lt",".lu",".lv",".ly",".ma",".mc",".md",".me",".mg",".mh",".mil",".mk",".ml",".mm",".mn",".mo",".mobi",".mp",".mq",".mr",".ms",".mt",".mu",".museum",".mv",".mw",".mx",".my",".mz",".na",".name",".nc",".net",".nf",".ng",".ni",".nl",".no",".np",".nr",".nu",".nz",".om",".org",".pa",".pe",".pf",".pg",".ph",".pk",".pl",".pm",".pn",".pr",".pro",".ps",".pt",".pw",".py",".qa",".re",".ro",".rs",".ru",".rw",".sa",".sb",".sc",".sd",".se",".sg",".sh",".si",".sj",".sk",".sl",".sm",".sn",".so",".sr",".st",".su",".sv",".sy",".sz",".tc",".td",".tel",".tf",".tg",".th",".tj",".tk",".tl",".tm",".tn",".to",".tp",".tr",".travel",".tt",".tv",".tw",".tz",".ua",".ug",".uk",".us",".uy",".uz",".va",".vc",".ve",".vg",".vi",".vn",".vu",".wf",".ws",".ye",".yt",".yu",".za",".zm",".zw");

	domainList.sort(strLenSort);
	function strLenSort(x:String,y:String) : Number {
	if (x.length > y.length){
	return -1;
	} else {
	return 1;
	}
	}

	legal = new Vector.<String>();
	legal.push(" ","/","~","*","?",".",",",";",":","-","%","=","_","$","&","'","@","!","+","");

	}
	}

	}