Created
August 11, 2009 20:15
-
-
Save mattdennewitz/166086 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# models.py: | |
# ---------- | |
from django.contrib.gis.db import models as geomodels | |
class Place(geomodels.Model): | |
woe_id = geomodels.IntegerField() | |
place_id = geomodels.CharField(max_length=255) | |
place_type = geomodels.CharField(max_length=255) | |
place_type_id = geomodels.IntegerField() | |
label = geomodels.CharField(max_length=255) | |
created = geomodels.DateTimeField() | |
geometry = geomodels.PolygonField(srid=4326) | |
objects = geomodels.GeoManager() | |
class Meta: | |
verbose_name_plural = u"Alpha Shapes" | |
# Returns the string representation of the model. | |
def __unicode__(self): | |
return unicode('%s: %s' % (self.woe_id,self.label)) | |
# --- | |
# import_flickr_shapefiles.py | |
# --------------------------- | |
# -*- coding: utf8 -*- | |
from datetime import datetime | |
import logging | |
from lxml import etree | |
import sys | |
from django.core.management import setup_environ | |
import settings | |
setup_environ(settings) | |
from django.contrib.gis.geos import GEOSGeometry, fromstr | |
from django.db import transaction | |
from places.models import Place | |
logging.basicConfig(level=logging.ERROR, | |
datefmt="%Y-%m-%d %H:%M:%S", | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") | |
def sort_list_by_obj_attr(seq, attr): | |
""" | |
Borrowed from ActiveState recipe #52230, adjusted for using lxml elements | |
""" | |
intermed = [ (seq[i].get(attr), i, seq[i]) for i in xrange(len(seq)) ] | |
intermed.sort() | |
return [ tup[-1] for tup in intermed ] | |
########################### | |
# parse the flickr data set | |
########################### | |
# remove old places | |
logging.debug("Removing old Place objects") | |
Place.objects.filter().delete() | |
logging.debug("Removed old Place objects") | |
shapes_file = "/path/to/flickr_shapefiles_public_dataset_1.0.xml" | |
context = etree.iterparse(shapes_file, tag="{x-urn:flickr:}place") | |
for event, elem in context: | |
# extract basic info | |
place_label = elem.get('label').encode('utf8') | |
woe_id = elem.get('woe_id') | |
place_id = elem.get('place_id') | |
place_type = elem.get('place_type').encode('utf8') | |
place_type_id = elem.get('place_type_id') | |
logging.debug("Parsing %s" % place_label) | |
# extract shapes | |
shapes = elem.xpath("//t:shape", namespaces={'t': "x-urn:flickr:"}) | |
if not shapes: | |
logging.error("No shapes defined for %s" % place_label) | |
continue | |
# order by -created, get the most recently generated shape | |
sorted_shapes = sort_list_by_obj_attr(shapes, "created") | |
sorted_shapes.reverse() | |
latest_shape = sorted_shapes[0] | |
latest_shape_created_date = datetime.fromtimestamp(float(latest_shape.get('created'))) | |
logging.info("%s created on %s" % (place_label, latest_shape_created_date)) | |
# get all polylines | |
polylines = latest_shape.xpath("//t:polyline", namespaces={'t': "x-urn:flickr:"}) | |
if not polylines: | |
logging.error("No polylines for %s" % place_label) | |
continue | |
p = polylines[0] | |
# translate pairs for `POLYGON` compatibility | |
# 45.289924621582,-64.774787902832 45.294815063477,-64.777793884277 | |
# becomes: | |
# -64.774787902832 45.289924621582, -64.777793884277 45.294815063477 | |
raw_polyline = p.text | |
if not raw_polyline: | |
logging.error("No polyline data for %s" % place_label) | |
continue | |
raw_pairs = raw_polyline.split(' ') | |
fixed_pairs = [] | |
for pair in raw_pairs: | |
pair = pair.split(',') | |
if len(pair) != 2: | |
logging.error("Invalid pair for %s: %s" % ( | |
place_label, str(pair))) | |
else: | |
lat, lng = pair | |
fixed_pairs.append("%s %s" % (lng, lat)) | |
polyline = ','.join(fixed_pairs) | |
# create new Place | |
try: | |
place = Place.objects.create( | |
woe_id = woe_id, | |
place_id = place_id, | |
place_type = place_type, | |
place_type_id = place_type_id, | |
label = place_label, | |
created = latest_shape_created_date, | |
geometry = GEOSGeometry('POLYGON((%s))' % polyline)) | |
except: | |
logging.error("Could not create %s: %s" % ( | |
place_label, sys.exc_info()[1])) | |
transaction.rollback_unless_managed() | |
continue | |
logging.debug("Created %s (%s): pk: %s" % ( | |
place_label, place_type, place.pk)) | |
# It's safe to call clear() here because no descendants will be accessed | |
elem.clear() | |
# Also eliminate now-empty references from the root node to <Title> | |
while elem.getprevious() is not None: | |
del elem.getparent()[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment