Created
October 8, 2011 20:41
-
-
Save pudo/1272840 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from openspending.model import meta as db | |
from openspending.model.attribute import Attribute | |
from openspending.model.common import TableHandler | |
class Dimension(object): | |
""" A base class for dimensions. """ | |
def __init__(self, dataset, name, data): | |
self._data = data | |
self.dataset = dataset | |
self.name = name | |
self.label = data.get('label', name) | |
self.type = data.get('type', name) | |
self.description = data.get('description', name) | |
self.facet = data.get('facet') | |
def join(self, from_clause): | |
""" Return the object to be joined in when this dimension | |
is part of a query. """ | |
return from_clause | |
def flush(self, bind): | |
""" Only applies to dimensions with their own table. """ | |
pass | |
def drop(self, bind): | |
""" Only applies to dimensions with their own table. """ | |
del self.column | |
def __getitem__(self, name): | |
""" Only applies to dimensions with their own attributes. | |
""" | |
raise KeyError() | |
def __repr__(self): | |
return "<Dimension(%s)>" % self.name | |
def as_dict(self): | |
# FIXME: legacy support | |
d = self._data.copy() | |
d['key'] = self.name | |
return d | |
class AttributeDimension(Dimension, Attribute): | |
""" A simple dimension that does not create its own values table | |
but keeps its values directly as columns on the facts table. | |
""" | |
def __init__(self, dataset, name, data): | |
Attribute.__init__(self, dataset, data) | |
Dimension.__init__(self, dataset, name, data) | |
def __repr__(self): | |
return "<AttributeDimension(%s)>" % self.name | |
class Measure(Attribute): | |
""" A value on the facts table that can be subject to aggregation, | |
and is specific to this one fact. """ | |
def __init__(self, dataset, name, data): | |
Attribute.__init__(self, dataset, data) | |
self.name = name | |
self.label = data.get('label', name) | |
def __getitem__(self, name): | |
raise KeyError() | |
def join(self, from_clause): | |
""" Return the object to be joined in when this dimension | |
is part of a query. """ | |
return from_clause | |
def __repr__(self): | |
return "<Metric(%s)>" % self.name | |
class CompoundDimension(Dimension, TableHandler): | |
""" A compound dimension is an outer table on the star schema, i.e. an | |
associated table that is referenced from the fact table. It can have | |
any number of attributes but in the case of OpenSpending it will not | |
have sub-dimensions (i.e. snowflake schema). | |
""" | |
def __init__(self, dataset, name, data): | |
Dimension.__init__(self, dataset, name, data) | |
self.taxonomy = data.get('taxonomy', 'entity') | |
self.attributes = [] | |
names = [] | |
for attr in data.get('attributes', data.get('fields', [])): | |
self.attributes.append(Attribute(self, attr)) | |
if not 'name' in names: | |
self.attributes.append(Attribute(self, | |
{'name': 'name', 'datatype': 'id'})) | |
def join(self, from_clause): | |
""" This will return a query fragment that can be used to establish | |
a join between the scheme table and the dimension, aliased to | |
represent this dimension (i.e. there can be multiple joins to the | |
same table from different dimensions). | |
""" | |
return from_clause.join(self.alias, self.alias.c.id==self.column_alias) | |
def flush(self, bind): | |
self._flush(bind) | |
def drop(self, bind): | |
self._drop(bind) | |
del self.column | |
@property | |
def column_alias(self): | |
""" This an aliased pointer to the FK column on the fact table. """ | |
return self.dataset.alias.c[self.column.name] | |
@property | |
def selectable(self): | |
return self.alias | |
def __getitem__(self, name): | |
for attr in self.attributes: | |
if attr.name == name: | |
return attr | |
raise KeyError() | |
def generate(self, meta, entry_table): | |
""" Create the table and column associated with this dimension | |
if it does not already exist and propagate this call to the | |
associated attributes. | |
""" | |
self._ensure_table(meta, self.dataset.name + '_' + self.taxonomy) | |
for attr in self.attributes: | |
attr.generate(meta, self.table) | |
fk = self.name + '_id' | |
if not fk in entry_table.c: | |
self.column = db.Column(self.name + '_id', db.Integer, index=True) | |
self.column.create(entry_table, index_name=self.name + '_id_index') | |
else: | |
self.column = entry_table.c[fk] | |
self.alias = self.table.alias(self.name) | |
def load(self, bind, row): | |
""" Load a row of data into this dimension by having the attributes | |
perform type casting and then upserting the values. | |
""" | |
dim = dict() | |
for attr in self.attributes: | |
attr_data = row[attr.name] | |
dim.update(attr.load(bind, attr_data)) | |
pk = self._upsert(bind, dim, ['name']) | |
return {self.column.name: pk} | |
def members(self, conditions="1=1", limit=0, offset=0): | |
query = db.select([self.alias], conditions, | |
limit=limit, offset=offset) | |
rp = self.dataset.bind.execute(query) | |
while True: | |
row = rp.fetchone() | |
if row is None: | |
break | |
member = dict(row.items()) | |
member['taxonomy'] = self.taxonomy | |
yield member | |
def __repr__(self): | |
return "<CompoundDimension(%s/%s:%s)>" % (self.taxonomy, self.name, | |
self.attributes) | |
class DateDimension(CompoundDimension): | |
DATE_FIELDS = [ | |
{'name': 'name', 'datatype': 'string'}, | |
{'name': 'year', 'datatype': 'string'}, | |
{'name': 'quarter', 'datatype': 'string'}, | |
{'name': 'month', 'datatype': 'string'}, | |
{'name': 'week', 'datatype': 'string'}, | |
{'name': 'day', 'datatype': 'string'}, | |
# legacy query support: | |
{'name': 'yearmonth', 'datatype': 'string'}, | |
] | |
def __init__(self, dataset, name, data): | |
Dimension.__init__(self, dataset, name, data) | |
self.taxonomy = 'date' | |
self.attributes = [] | |
for attr in self.DATE_FIELDS: | |
self.attributes.append(Attribute(self, attr)) | |
def load(self, bind, value): | |
data = { | |
'name': value.isoformat(), | |
'label': value.strftime("%d. %B %Y"), | |
'year': value.strftime('%Y'), | |
'quarter': str(value.month / 4), | |
'month': value.strftime('%m'), | |
'week': value.strftime('%W'), | |
'day': value.strftime('%d'), | |
'yearmonth': value.strftime('%Y%m') | |
} | |
return super(DateDimension, self).load(bind, data) | |
def __repr__(self): | |
return "<DateDimension(%s/%s:%s)>" % (self.name, self.attributes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment