Skip to content

Instantly share code, notes, and snippets.

@mattdennewitz
Created August 11, 2009 20:15
Show Gist options
  • Save mattdennewitz/166086 to your computer and use it in GitHub Desktop.
Save mattdennewitz/166086 to your computer and use it in GitHub Desktop.
# models.py:
# ----------
from django.contrib.gis.db import models as geomodels
class Place(geomodels.Model):
woe_id = geomodels.IntegerField()
place_id = geomodels.CharField(max_length=255)
place_type = geomodels.CharField(max_length=255)
place_type_id = geomodels.IntegerField()
label = geomodels.CharField(max_length=255)
created = geomodels.DateTimeField()
geometry = geomodels.PolygonField(srid=4326)
objects = geomodels.GeoManager()
class Meta:
verbose_name_plural = u"Alpha Shapes"
# Returns the string representation of the model.
def __unicode__(self):
return unicode('%s: %s' % (self.woe_id,self.label))
# ---
# import_flickr_shapefiles.py
# ---------------------------
# -*- coding: utf8 -*-
from datetime import datetime
import logging
from lxml import etree
import sys
from django.core.management import setup_environ
import settings
setup_environ(settings)
from django.contrib.gis.geos import GEOSGeometry, fromstr
from django.db import transaction
from places.models import Place
logging.basicConfig(level=logging.ERROR,
datefmt="%Y-%m-%d %H:%M:%S",
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
def sort_list_by_obj_attr(seq, attr):
"""
Borrowed from ActiveState recipe #52230, adjusted for using lxml elements
"""
intermed = [ (seq[i].get(attr), i, seq[i]) for i in xrange(len(seq)) ]
intermed.sort()
return [ tup[-1] for tup in intermed ]
###########################
# parse the flickr data set
###########################
# remove old places
logging.debug("Removing old Place objects")
Place.objects.filter().delete()
logging.debug("Removed old Place objects")
shapes_file = "/path/to/flickr_shapefiles_public_dataset_1.0.xml"
context = etree.iterparse(shapes_file, tag="{x-urn:flickr:}place")
for event, elem in context:
# extract basic info
place_label = elem.get('label').encode('utf8')
woe_id = elem.get('woe_id')
place_id = elem.get('place_id')
place_type = elem.get('place_type').encode('utf8')
place_type_id = elem.get('place_type_id')
logging.debug("Parsing %s" % place_label)
# extract shapes
shapes = elem.xpath("//t:shape", namespaces={'t': "x-urn:flickr:"})
if not shapes:
logging.error("No shapes defined for %s" % place_label)
continue
# order by -created, get the most recently generated shape
sorted_shapes = sort_list_by_obj_attr(shapes, "created")
sorted_shapes.reverse()
latest_shape = sorted_shapes[0]
latest_shape_created_date = datetime.fromtimestamp(float(latest_shape.get('created')))
logging.info("%s created on %s" % (place_label, latest_shape_created_date))
# get all polylines
polylines = latest_shape.xpath("//t:polyline", namespaces={'t': "x-urn:flickr:"})
if not polylines:
logging.error("No polylines for %s" % place_label)
continue
p = polylines[0]
# translate pairs for `POLYGON` compatibility
# 45.289924621582,-64.774787902832 45.294815063477,-64.777793884277
# becomes:
# -64.774787902832 45.289924621582, -64.777793884277 45.294815063477
raw_polyline = p.text
if not raw_polyline:
logging.error("No polyline data for %s" % place_label)
continue
raw_pairs = raw_polyline.split(' ')
fixed_pairs = []
for pair in raw_pairs:
pair = pair.split(',')
if len(pair) != 2:
logging.error("Invalid pair for %s: %s" % (
place_label, str(pair)))
else:
lat, lng = pair
fixed_pairs.append("%s %s" % (lng, lat))
polyline = ','.join(fixed_pairs)
# create new Place
try:
place = Place.objects.create(
woe_id = woe_id,
place_id = place_id,
place_type = place_type,
place_type_id = place_type_id,
label = place_label,
created = latest_shape_created_date,
geometry = GEOSGeometry('POLYGON((%s))' % polyline))
except:
logging.error("Could not create %s: %s" % (
place_label, sys.exc_info()[1]))
transaction.rollback_unless_managed()
continue
logging.debug("Created %s (%s): pk: %s" % (
place_label, place_type, place.pk))
# It's safe to call clear() here because no descendants will be accessed
elem.clear()
# Also eliminate now-empty references from the root node to <Title>
while elem.getprevious() is not None:
del elem.getparent()[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment