Skip to content

Instantly share code, notes, and snippets.

@exavolt
Created January 19, 2011 16:37
Show Gist options
  • Save exavolt/786418 to your computer and use it in GitHub Desktop.
Save exavolt/786418 to your computer and use it in GitHub Desktop.
The name says...
#!/usr/bin/env python
import cgi
import re
import elml
def _urlify_re_proc(match):
#TODO: Truncate the display text for long URLs. e.g.:
# http://example.com/post/this-url-is-too-long-so-it-needs-to-be-trucated-nicely
# into
# example.com/post/this-...-nicely
# see the codes I've written for another app or see the Google search
urll = match.group(2)
urlt = urll
if not ':' in urll:
if not '/' in urll:
urll += '/'
# Naked web URL
urll = 'http://' + urll
urlt = urlt.replace('www.', '', 1)
else:
# Remove the protocol
urlt = re.sub(r'[^:]*:', '', urll)
urlt = urlt.lstrip('/')
if urlt.endswith('/') and urlt.count('/') == 1:
urlt = urlt[:-1]
if urll.startswith('http'):
urlt = urlt.replace('www.', '', 1)
return match.group(1) + '<a class="autourl" href="%s">%s</a>' % (urll, urlt)
# Shamelessly stolen from
# http://daringfireball.net/2010/07/improved_regex_for_matching_urls
_urlify_re = re.compile(ur"""(?xi)
([^"]*?)
\b
( # Capture 1: entire matched URL
(?:
[a-z][\w-]+: # URL protocol and colon
(?:
/{1,3} # 1-3 slashes
| # or
[a-z0-9%] # Single letter or digit or '%'
# (Trying not to match e.g. "URI::Escape")
)
| # or
www\d{0,3}[.] # "www.", "www1.", "www2." ... "www999."
| # or
[a-z0-9.\-]+[.][a-z]{2,4}/ # looks like domain name followed by a slash
)
(?: # One or more:
[^\s()<>]+ # Run of non-space, non-()<>
| # or
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
)+
(?: # End with:
\(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
| # or
[^\s`!()\[\]{};:'".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019] # not a space or one of these punct chars
)
)
""")
def urlify(text):
#TODO: naked email address
#TODO: use Google's Safe Browsing API to check for malicious sites
return _urlify_re.sub(_urlify_re_proc, text)
_user_re = re.compile(r"""\B@([0-9a-zA-Z_]+)""")
#TODO: custom params
def convert(text):
outp = urlify(outp)
outp = _user_re.sub(r'@<a href="/user/\1">\1</a>', outp)
return outp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment