Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@zed
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zed/9903390 to your computer and use it in GitHub Desktop.
Save zed/9903390 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""Lookup word definitions on http://www.urbandictionary.com
Print the result as json list.
Example:
$ python -murbandictionary Honorificabilitudinitatibus
Output:
[
{
"word": "honorificabilitudinitatibus",
"example": "\nFor thou art not so long by the head as honorificabilitudinitatibus: thou art easier swallowed than a flap-dragon.\n",
"def": "\nhonorableness\n\nUsed by Shakespeare in Loves Labors Lost (Costartd; Act V Scene I)\n"
}
]
Based on https://github.com/novel/py-urbandict/blob/e2c7a2cccdd0067573429965d1d955bad0382cb3/urbandict.py
"""
from itertools import count
try:
from html.parser import HTMLParser
from urllib.request import urlopen, Request
from urllib.parse import quote as urlquote
except ImportError: # Python 2
from HTMLParser import HTMLParser
from urllib2 import urlopen, Request
from urllib import quote as urlquote
__all__ = ["UrbanDictParser", "lookup"]
__version__ = '0.1.2'
class UrbanDictParser(HTMLParser):
"""Extract word definitions from html."""
def __init__(self, *args, **kwargs):
data = kwargs.pop('data', None)
HTMLParser.__init__(self, *args, **kwargs)
self.translations = [] # for compatibility with urbandict.py
self._section = None
if data is not None:
self.feed(data)
def handle_starttag(self, tag, attrs):
if tag == 'div':
klass = dict(attrs).get('class')
if klass in ('word', 'meaning', 'example'):
self._section = klass
if klass == 'word': #NOTE: assume 'word' is the first section
self.translations.append(
{'word': '', 'def': '', 'example': ''})
def handle_endtag(self, tag):
if tag == 'div':
#NOTE: assume there is no nested <div> in the known sections
self._section = None
def handle_data(self, data):
if self._section:
if self._section == 'meaning':
self._section = 'def' # for compatibility with urbandict.py
elif self._section == 'word':
data = data.strip() # for compatibility with urbandict.py
self.translations[-1][self._section] += normalize_newlines(data)
def close(self):
HTMLParser.close(self)
return self.translations
def normalize_newlines(text):
return text.replace('\r\n', '\n').replace('\r', '\n')
def lookup(word, maxpages=None):
"""Lookup word definitions on urbandictionary.com
Non-zero maxpages specifies how many pages to download
(default: unlimited).
"""
url = 'http://www.urbandictionary.com/define.php?term={}&page='.format(
urlquote(word))
headers = {'User-Agent': 'urbandictionary.py/' + __version__}
result = []
for page in range(1, maxpages + 1) if maxpages else count(1):
try:
r = urlopen(Request(url + str(page), headers=headers))
except EnvironmentError:
break
else:
if 'page' not in r.url: # non-existing page
break
p = UrbanDictParser(data=r.read().decode('utf-8'))
result.extend(p.close())
return result
if __name__=="__main__":
import json
import sys
definitions = lookup(sys.argv[1])
json.dump(definitions, sys.stdout, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment