Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
A Google Geocoder with a MongoDB memoizer. For the ratelim module, look at https://gist.github.com/3006305
import collections
import time
import requests
import pymongo
import ratelim
import textwrap
G_GEO_SUCCESS = 200
G_GEO_SERVER_ERROR = 500
G_GEO_MISSING_QUERY = 601
G_GEO_UNKNOWN_ADDRESS = 602
G_GEO_UNAVAILABLE_ADDRESS = 603
G_GEO_BAD_KEY = 610
G_GEO_TOO_MANY_QUERIES = 620
ERR_MESSAGES = {
G_GEO_SUCCESS: textwrap.dedent("""\
No errors occurred; the address was
successfully parsed and its geocode was returned."""),
G_GEO_SERVER_ERROR: textwrap.dedent("""\
A geocoding or directions request
could not be successfully processed, yet the exact reason for the failure
is unknown."""),
G_GEO_MISSING_QUERY: textwrap.dedent("""\
An empty address was specified in the HTTP q parameter."""),
G_GEO_UNKNOWN_ADDRESS: textwrap.dedent("""\
No corresponding geographic location could be found for the specified address,
possibly because the address is relatively new, or because it may be incorrect."""),
G_GEO_UNAVAILABLE_ADDRESS: textwrap.dedent("""\
The geocode for the given
address or the route for the given directions query cannot be returned due
to legal or contractual reasons."""),
G_GEO_BAD_KEY: textwrap.dedent("""\
The given key is either invalid or does
not match the domain for which it was given."""),
G_GEO_TOO_MANY_QUERIES: textwrap.dedent("""\
The given key has gone over the
requests limit in the 24 hour period or has submitted too many requests in
too short a period of time. If you're sending multiple requests in parallel
or in a tight loop, use a timer or pause in your code to make sure you
don't send the requests too quickly."""),
}
class MongoDict(collections.MutableMapping):
"""A dictionary which stores values in a MongoDB collection."""
def __init__(self, hostname, port, dbname, collection, keyname, *args, **kwargs):
self.store = getattr(pymongo.Connection(hostname, port)[dbname],
keyname)
self.update(dict(*args, **kwargs)) # use the free update to set keys
def __getitem__(self, key):
result = self.store.find_one({"query": self._transformkey(key)})
if not result:
raise KeyError
return result['value']
def __setitem__(self, key, value):
element = {}
element['query'] = self._transformkey(key)
element['value'] = value
self.store.insert(element)
def __delitem__(self, key):
raise NotImplementedError
def __iter__(self):
return self.store.find()
def __len__(self):
return self.store.find().count()
# This dictionary has case-unsensitive keys
def _transformkey(self, key):
try:
return key.lower()
except:
return key
class mongomemoized(object):
"""Decorator. Caches a function's return value each time it is called.
If called later with the same arguments, the cached value is returned
(not reevaluated).
"""
def __init__(self, hostname, port, dbname, collection, keyname):
self.cache = MongoDict(hostname, port, dbname, collection, keyname)
def __call__(self, f):
def wrapped_f(args):
if not isinstance(args, collections.Hashable):
# uncacheable. a list, for instance.
# better to not cache than blow up.
return f(args)
if args in self.cache:
return self.cache[args]
else:
value = f(args)
self.cache[args] = value
return value
return wrapped_f
def __repr__(self):
"""Return the function's docstring."""
return self.func.__doc__
def __get__(self, obj, objtype):
"""Support instance methods."""
return functools.partial(self.__call__, obj)
# Make sure the decorator order is not changed: the rate limit works only for
# the inner function, while the memoized function is not rate limited
@mongomemoized("localhost", 27017, "geocoded", "google", "query")
@ratelim.rate_evenly_limited(14000, 86400)
def geocode(text):
url = "http://maps.googleapis.com/maps/geo"
params = {"q": text,
"key": "AIzaSyCeFJ26MUa7qNYLBUXfXD8kQPWkevqzCK4",
"output": "json",
"sensor": "false",
}
resp = requests.get(url, params=params)
try:
resp = resp.json
code = resp['Status']['code']
except Exception as e:
print "Error on unpacking the response"
raise
try:
assert code == G_GEO_SUCCESS
place = resp['Placemark']
except (AssertionError, Exception) as e:
print ERR_MESSAGES[code]
if 602 <= code <= 603:
return None
if code == 620 or code == 610:
sys.exit(1)
raise
return place
@DenisCarriere

This comment has been minimized.

Copy link

commented Oct 8, 2014

I wouldn't use MongoDB for that purpose, it would be far easier to use Sqlite to store those values.
Thanks for the code preview. I'll try to work something out with Sqlite, it's a lot easier to manage and requires no setup since it's apart of the default python libraries.

@DenisCarriere

This comment has been minimized.

Copy link

commented Dec 16, 2014

I should remove this comment, I love MongoDB with the use of their awesome PyMongo library. I still don't know how I would incorporate it in the Geocoder

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.