Skip to content

Instantly share code, notes, and snippets.

@eevee
Created July 1, 2014 06:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eevee/097ee1d72312091bc4d9 to your computer and use it in GitHub Desktop.
Save eevee/097ee1d72312091bc4d9 to your computer and use it in GitHub Desktop.
proof of concept of fancy-ass yaml schema (this code is a travesty)
glaceon: !!python/object/apply:collections.OrderedDict
dictitems:
base-happiness: 70
capture-rate: 45
color: blue
hatch-counter: 0
identifier: glaceon
is-dimorphic: false
types: [ice]
from __future__ import print_function
from collections import OrderedDict
from pprint import pprint
import re
from sqlalchemy import and_
from sqlalchemy.orm import (
relationship,
)
from sqlalchemy.orm.collections import (
attribute_mapped_collection,
)
from sqlalchemy.schema import (
Column,
ForeignKey,
Table,
)
from sqlalchemy.types import (
Boolean,
Integer,
Unicode,
UnicodeText,
)
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import DeclarativeMeta
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import RelationshipProperty
from sqlalchemy.orm import configure_mappers
# TODO this is probably not gonna fly
from spline.models.columns import deferred_attr_factory
registry = dict()
_Base = declarative_base(class_registry=registry)
# GENERAL GAME PLAN:
# The idea here is to write a simplified schema in a very native-object way
# that maps in obvious ways to both a compact and readable YAML representation
# and an easily queried (both via SQLA and SQL) database schema.
# SPECIFIC DB FEATURES THIS NEEDS (TO EXIST, AND TO BE REASONABLY TRANSPARENT):
# - translation of names and prose, and some sort of obvious distinction
# between official prose/languages and fan
# - temporal versioning -- very many objects, and bridges in particular, are
# linked to only a single generation/group/game
# THINGS THIS NEEDS TO BE ABLE TO DO:
# - express the entire current schema
# - be queryable via sqla in a natural way
# - be queryable via http -> json in a natural way
# - load (FAST) (preferably with e.g. transations or side games optional)
# - dump (FAST)
# MORE MINOR TODO:
# - loading oughta explicitly check all the primary key conditions early? i
# think?
###############################################################################
# Library of helpers
def attr_factory(callable):
def wrapped(*args, **kwargs):
return declared_attr(callable(*args, **kwargs))
wrapped.__name__ = callable.__name__
wrapped.__doc__ = callable.__doc__
return wrapped
class TransparentRelationshipComparator(RelationshipProperty.Comparator):
"""Extension of the default relationship comparator that supports testing
for equality against a scalar value (or values), rather than a mapped
object. That is, instead of this:
bar = session.query(Bar).get(bar_id)
session.query(Foo).filter(Foo.bar == bar)
or this:
session.query(Foo).filter(Foo.bar_id == bar_id)
you can just do this:
session.query(Foo).filter(Foo.bar == bar_id)
This is particularly useful for API classes, which deliberately hide the
"raw" underlying FK column.
"""
def __eq__(self, other):
# Atrocious typechecking, alas!
if isinstance(other, (int, str)):
other = (other,)
if isinstance(other, (list, tuple)):
# This is a plain scalar, not an object; redirect the comparison to
# compare against the local columns instead.
# There's a `local_columns` property, but it's a set, so it's
# unordered, which won't work with comparison to a tuple
local_columns = [l for l, r in self.property.local_remote_pairs]
if len(local_columns) != len(other):
raise ValueError("Expected {} values, but got {!r}".format(
len(local_columns), other))
return and_(*[
col == value
for col, value in zip(local_columns, other)
])
return super(TransparentRelationshipComparator, self).__eq__(other)
class DummyDescriptor(object):
def __get__(self, inst, owner):
raise NotImplementedError(
"I'm supposed to be replaced by a real value, "
"but something has gone terribly wrong.")
###############################################################################
# Library of helpers
def Identifier():
return Column(Unicode(100), nullable=False, primary_key=True)
def Scalar(type_):
if type_ is int:
sqla_type = Integer
elif type_ is str:
# always using 'text' means no worrying about length limits
sqla_type = UnicodeText
elif type_ is bool:
sqla_type = Boolean
# TODO might want unsigned or length limits here? maybe?
return Column(sqla_type, nullable=False)
# TODO maybe replace this with the cleaned-up version i have in spline?
@attr_factory
class Reference(object):
key = DummyDescriptor()
column = DummyDescriptor()
column_key = DummyDescriptor()
def __init__(self, target):
self.target = target
self.configured = False
def __call__(self, mapped_class):
self.column = Column(ForeignKey(*self.target.__table__.primary_key))
from sqlalchemy.event import listen
listen(mapped_class, "mapper_configured", self._on_mapper_configured)
# Minor hackery: return the column now so declarative tells it what its
# name is, and we can read it back later
return self.column
def _on_mapper_configured(self, mapper, mapped_class):
if self.configured:
return
self.configured = True
rel = relationship(
self.target,
comparator_factory=TransparentRelationshipComparator,
info=dict(api_descriptor=self),
)
self.key = self.column.key
self.column_key = '_' + self.key
setattr(mapped_class, self.key, rel)
setattr(mapped_class, self.column_key, self.column)
def populate_from_primitive(self, obj, yaml_value):
"""Deserialize a YAML value and appropriately populate the given owner
object.
"""
# yaml_value should be a string or integer, depending on the foreign
# key of the other table; SQLA will yell if it's not
setattr(obj, self.column_key, yaml_value)
def yaml_load_reference(prop, obj, yaml_value):
"""Deserialize a YAML value and appropriately populate the given owner
object.
"""
# yaml_value should be a string or integer, depending on the foreign
# key of the other table; SQLA will yell if it's not
column, = prop.property.local_columns
setattr(obj, '_' + column.key, yaml_value)
def yaml_dump_reference(prop, obj):
value = getattr(obj, prop.key)
return value.identifier
@deferred_attr_factory
def Reference(key, target):
column = Column(key, ForeignKey(*target.__table__.primary_key))
rel = relationship(
target,
uselist=False,
comparator_factory=TransparentRelationshipComparator,
info=dict(
api_loader=yaml_load_reference,
api_dumper=yaml_dump_reference,
),
)
mapped_class = yield rel
setattr(mapped_class, '_' + key, column)
def yaml_load_multi_reference(prop, obj, yaml_value):
"""Deserialize a YAML value and appropriately populate the given owner
object.
"""
# yaml_value should be a /list of/ strings or integers
rel = getattr(obj, prop.key)
print(prop.property, prop.property.__dict__)
other_side = prop.property.argument
# TODO i might like to just populate the bridge table directly, but unclear
# how to do that while keeping the relationship in sync
for remote_key in yaml_value:
rel.append(session.query(other_side).get(remote_key))
def yaml_dump_multi_reference(prop, obj):
values = getattr(obj, prop.key)
return [value.identifier for value in values]
@deferred_attr_factory
def MultiReference(key, target):
column = Column(key, ForeignKey(*target.__table__.primary_key))
rel = relationship(
target,
secondary=lambda: bridge_table,
uselist=True,
comparator_factory=TransparentRelationshipComparator,
info=dict(
api_loader=yaml_load_multi_reference,
api_dumper=yaml_dump_multi_reference,
),
)
mapped_class = yield rel
bridge_columns = []
for side in mapped_class, target:
pk = side.__table__.primary_key
for col in pk:
if len(pk) == 1:
colname = side.__tablename__
else:
colname = side.__tablename__ + '_' + col.key
bridge_columns.append(Column(colname, Integer, ForeignKey(col), primary_key=True, nullable=False))
bridge_table = Table(
mapped_class.__tablename__ + '__' + target.__tablename__,
mapped_class.metadata,
*bridge_columns
)
print("created:", bridge_table)
# -----------------------------------------------------------------------------
class Base(_Base):
__abstract__ = True
@declared_attr
def __tablename__(cls):
# TODO dashes for filenames...
return re.sub(r'([A-Z])', r'_\1', cls.__name__).lstrip('_').lower()
@classmethod
def from_raw(cls, identifier, yaml_dict):
self = cls(identifier=identifier)
for key, yaml_value in yaml_dict.items():
key = key.replace('-', '_')
prop = getattr(cls, key, None)
if prop is None:
print("warning: no such property {} on {}".format(key, cls))
continue
if 'api_loader' in prop.property.info:
api_loader = prop.property.info['api_loader']
api_loader(prop, self, yaml_value)
else:
# TODO assert this is a scalar? or just beef up Scalar?
setattr(self, key, yaml_value)
return self
class Locus(Base):
__abstract__ = True
identifier = Identifier()
def __repr__(self):
return "<{0}: {1}>".format(type(self).__name__, self.identifier)
class Enum(Base):
__abstract__ = True
identifier = Identifier()
def __repr__(self):
return "<{0}: {1}>".format(type(self).__name__, self.identifier)
###############################################################################
# Schema definition
class Color(Enum):
# No special properties; just a hook for names to hang on
pass
class DamageClass(Enum):
# No special properties; just a hook for names to hang on
pass
# -----------------------------------------------------------------------------
# TODO surely these will end up needing to be treated specially
# TODO what IS the identifier for a generation? is it really a number? or a
# string number? i don't like "generation-i". "generation-1"?
class Generation(Locus):
#identifier = Column(Integer)???
pass
# -----------------------------------------------------------------------------
class Type(Locus):
#identifier
generation = Reference(Generation)
gen12_damage_class = Reference(DamageClass)
#efficacy... should be in a separate table now it seems
class PokemonSpecies(Locus):
#id(entifier) is implied
#generation = Reference(Generation)
# TODO this might be tricky.
#evolves_from = Reference(PokemonSpecies)
color = Reference(Color)
types = MultiReference(Type)
base_happiness = Scalar(int)
capture_rate = Scalar(int)
hatch_counter = Scalar(int)
is_dimorphic = Scalar(bool)
###############################################################################
# Test loading the thing
def load_everything(session):
import yaml
classes = [cls for cls in registry.values() if isinstance(cls, type)]
table_to_class = {cls.__table__: cls for cls in classes}
everything = {}
# NOTE 1: need to create them in order, then load them in reverse order,
# maybe?
# NOTE 2: not guaranteed that Type comes before Pokemon in either
# direction; may need to do my own dependency sort
for cls in [Generation, DamageClass, Type, Color, PokemonSpecies]:
table = cls.__table__
from sqlalchemy.schema import CreateTable
print(CreateTable(table))
if table not in table_to_class:
continue
cls = table_to_class[table]
everything[cls] = []
raw_data = yaml.safe_load(open('pokedex/data/{0}.yaml'.format(cls.__tablename__)))
for key, value in raw_data.items():
new = cls.from_raw(key, value)
everything[cls].append(new)
print(repr(new))
print(repr(new.__dict__))
session.add(new)
return everything
def yaml_serialize(obj):
d = OrderedDict()
from sqlalchemy.orm import object_mapper
mapper = object_mapper(obj)
for prop in mapper.iterate_properties:
if prop.key.startswith('_'):
# TODO hackery for shadowed columns
continue
dumper = prop.info.get('api_dumper')
if dumper:
value = dumper(prop, obj)
else:
value = getattr(obj, prop.key)
d[prop.key.replace('_', '-')] = value
return {obj.identifier.replace('_', '-'): d}
if __name__ == '__main__':
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
session = sessionmaker(bind=create_engine('sqlite:///:memory:'))()
# Important: make sure everything's configured (so the magical setup stuff
# runs) before trying to metaprogram
configure_mappers()
Base.metadata.create_all(session.bind)
everything = load_everything(session)
from pprint import pprint; pprint(everything)
session.flush()
for species in everything[PokemonSpecies]:
session.expire(species, ['color'])
print(species.color, species._color)
print(repr(PokemonSpecies.color))
print(repr(PokemonSpecies.color.property))
print()
print()
print()
print()
print("serializing glaceon:")
glaceon = session.query(PokemonSpecies).get('glaceon')
serialized = yaml_serialize(glaceon)
import yaml
print(yaml.dump(serialized))
black: {}
blue: {}
brown: {}
gray: {}
green: {}
pink: {}
purple: {}
red: {}
white: {}
yellow: {}
eevee:
generation: 1
types: [normal]
evolves-from: null
color: brown
shape: quadruped
habitat: urban
capture-rate: 45
base-happiness: 70
hatch-counter: 0
is-dimorphic: false
growth-rate: medium
glaceon:
generation: 3
types: [ice]
evolves-from: eevee
color: blue
shape: quadruped
habitat: urban
capture-rate: 45
base-happiness: 70
hatch-counter: 0
is-dimorphic: false
growth-rate: medium
normal:
generation: 1
gen12-damage-class: physical
fighting:
generation: 1
gen12-damage-class: physical
flying:
generation: 1
gen12-damage-class: physical
poison:
generation: 1
gen12-damage-class: physical
ground:
generation: 1
gen12-damage-class: physical
rock:
generation: 1
gen12-damage-class: physical
bug:
generation: 1
gen12-damage-class: physical
ghost:
generation: 1
gen12-damage-class: physical
steel:
generation: 2
gen12-damage-class: physical
fire:
generation: 1
gen12-damage-class: special
water:
generation: 1
gen12-damage-class: special
grass:
generation: 1
gen12-damage-class: special
electric:
generation: 1
gen12-damage-class: special
psychic:
generation: 1
gen12-damage-class: special
ice:
generation: 1
gen12-damage-class: special
dragon:
generation: 1
gen12-damage-class: special
dark:
generation: 2
gen12-damage-class: special
fairy:
generation: 6
unknown: {}
shadow: {}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment