Skip to content

Instantly share code, notes, and snippets.

@knzm
Created October 13, 2011 16:55
Show Gist options
  • Save knzm/1284785 to your computer and use it in GitHub Desktop.
Save knzm/1284785 to your computer and use it in GitHub Desktop.
import socket
import re
import urllib
socket.setdefaulttimeout(5)
def get_content(url):
try:
f = urllib.urlopen(url)
except IOError:
return ""
try:
return f.read()
finally:
f.close()
def head(s, n):
return "\n".join(s.splitlines()[:n])
def get_title(url):
content = get_content(url)
m = re.search('<title>(.*?)</title>', content, re.DOTALL|re.IGNORECASE)
if m:
title = m.group(1).strip()
for enc in ('utf-8', 'euc-jp', 'shift-jis'):
try:
title = title.decode(enc)
break
except UnicodeDecodeError:
pass
return title
else:
return ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment