Skip to content

Instantly share code, notes, and snippets.

@i-like-bikes
Created April 26, 2018 14:46
Show Gist options
  • Save i-like-bikes/eecbff1f3041d5006bb307d3d34a896d to your computer and use it in GitHub Desktop.
Save i-like-bikes/eecbff1f3041d5006bb307d3d34a896d to your computer and use it in GitHub Desktop.
LGA and year filtering
import datetime
import logging
import sys
from mongoengine import connect
from mongoengine import DateTimeField, Document, EmbeddedDocument, EmbeddedDocumentField, IntField, ListField, StringField
from pymongo.errors import OperationFailure
# Solution comes from following this:
# https://stackoverflow.com/questions/28982285/mongodb-projection-of-nested-arrays
conf = {
'user': '',
'password': '',
'port': ,
'ds': ''
}
host= 'mongodb://{user}:{password}@{ds}.mlab.com:{port}/lga_data'.format(**conf)
class YearRecord(EmbeddedDocument):
year = StringField(max_length=52, required=True)
crime_rate = StringField(max_length=52, required=True)
number_incidents = StringField(max_length=52, required=True)
class OffenceRecord(EmbeddedDocument):
offence_group = StringField(max_length=52, required=True)
offence_type = StringField(max_length=52, required=True)
year_record = ListField(EmbeddedDocumentField(YearRecord))
class CouncilRecord(Document):
lga = StringField(max_length=52, required=True)
title = StringField(max_length=52, required=True)
name = StringField(max_length=52, required=True)
offence_record = ListField(EmbeddedDocumentField(OffenceRecord))
data = [{
'lga': 'cowra',
'title': 'some stat',
'name': 'detailed name',
'offence_record': [
{
'offence_group': 'SRS!',
'offence_type': 'MURDER!',
'year_record': [
{ 'year': '2015',
'crime_rate': '100%',
'number_incidents': '200'}
]
}
]
},
{
'lga': 'bathurst',
'title': 'some stat',
'name': 'detailed name',
'offence_record': [
{
'offence_group': 'SRS!',
'offence_type': 'MURDER!',
'year_record': [
{
'year': '2015',
'crime_rate': '100%',
'number_incidents': '200'
},
{
'year': '2016',
'crime_rate': '20%',
'number_incidents': '100'}
]
},
{
'offence_group': 'NOT SRS!',
'offence_type': 'MURDER!',
'year_record': [
{
'year': '2015',
'crime_rate': '100%',
'number_incidents': '200'
},
{
'year': '2016',
'crime_rate': '20%',
'number_incidents': '100'}
]
}
]
}]
connect('test_db', host=host)
# Comment this out when running for the second time
# or test for existance, so the DB isn't polluted.
# d = [CouncilRecord(**d) for d in data]
# for _ in d:
# print(_.lga)
# CouncilRecord.objects.insert(d)
lga_filter = "bathurst"
year_filter = "2015"
pipeline = [
{ "$match": { "lga": lga_filter } },
{ "$unwind": { "path": "$offence_record" } },
{ "$unwind": { "path": "$offence_record.year_record" } },
{ "$match": { "offence_record.year_record.year": year_filter } },
{ "$group" : {
"_id" : "$_id",
"lga": {"$first": "$lga"},
"title": {"$first": "$title"},
"name": {"$first": "$name"},
"offence_record": {"$push": "$offence_record"},}
}
]
for res in CouncilRecord.objects.aggregate(*pipeline):
print(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment