uogbuji/gruber_urlintext.py

## gruber_urlintext.py
#See: http://daringfireball.net/2010/07/improved_regex_for_matching_urls
import re, urllib

GRUBER_URLINTEXT_PAT = re.compile(ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))')

for line in urllib.urlopen("http://daringfireball.net/misc/2010/07/url-matching-regex-test-data.text"):
    print [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(line) ]
	#See: http://daringfireball.net/2010/07/improved_regex_for_matching_urls
	import re, urllib

	GRUBER_URLINTEXT_PAT = re.compile(ur'(?i)\b((?:https?://\|www\d{0,3}[.]\|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+\|\(([^\s()<>]+\|(\([^\s()<>]+\)))\))+(?:\(([^\s()<>]+\|(\([^\s()<>]+\)))\)\|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))')

	for line in urllib.urlopen("http://daringfireball.net/misc/2010/07/url-matching-regex-test-data.text"):
	print [ mgroups[0] for mgroups in GRUBER_URLINTEXT_PAT.findall(line) ]