Skip to content

Instantly share code, notes, and snippets.

@wroscoe
Created January 15, 2010 06:45
Show Gist options
  • Save wroscoe/277865 to your computer and use it in GitHub Desktop.
Save wroscoe/277865 to your computer and use it in GitHub Desktop.
"""
Author: William Roscoe
Date: 2010.1
This is an attempt at a flat database with single and dynamically created indexes to entity ids.
If you know of a better or comprable simple system please let me know.
This idea is a simple mimic of Google's BigFile system and Friend Feed's python/mysql implementation.
"""
import os, cPickle, time, sqlite3, uuid
import config
INDEX_PREFIX = "index_"
class Datastore():
"""Datastore is a way to store data in a blob it is called through indexes"""
def __init__(self, path, indexed_properties):
self.conn = sqlite3.connect(path)
self.indexed_properties = indexed_properties
#create datastore and index tables if they don't exist
c = self.conn.cursor()
if not self.table_exists('datastore'):
c.execute('create table datastore ( entity_id TEXT, blob BLOB, updated INTEGER)' )
for property_name in indexed_properties:
if not self.table_exists(INDEX_PREFIX+property_name):
c.execute('create table %s ( entity_id TEXT, %s TEXT)' %(INDEX_PREFIX+property_name, property_name) )
self.conn.commit()
def table_exists(self, table_name):
"""check if table_name exists"""
c = self.conn.cursor()
c.execute('SELECT name from sqlite_master WHERE name=?', [table_name])
if c.fetchone() > 0:
return True
return False
def put(self, new_entity):
"""Insert new_entity into datastore table and add indexed properties to index tables.
Create a unique entity id with uuid1 and an integer time value."""
new_entity['id'] = uuid.uuid1().get_hex()
new_entity['updated'] = int(time.time())
c = self.conn.cursor()
c.execute('INSERT INTO datastore(entity_id, blob, updated) VALUES(?, ?, ?);', [new_entity['id'], cPickle.dumps(new_entity), int(time.time()) ] )
self.conn.commit()
for property_name in self.indexed_properties:
if new_entity.has_key(property_name):
self.put_in_index(property_name, new_entity['id'], new_entity[property_name])
return new_entity['id']
def put_in_index(self, property_name, id, property_value):
"""put an (entity_id, property_name) row into the property's index"""
query = 'INSERT INTO %s (entity_id, %s) VALUES(?, ?);'%(INDEX_PREFIX+property_name, property_name)
c = self.conn.cursor()
c.execute(query, [id, property_value])
self.conn.commit()
def get_id(self, id):
"""SELECT an entity with its id"""
c = self.conn.cursor()
c.execute('SELECT blob FROM datastore WHERE entity_id = ?', [id])
self.conn.commit()
return cPickle.loads(str(c.fetchone()[0]))
def get_id_list(self, id_list):
"""get a list of entities by passing a list of entity ids"""
in_list = self.make_list_a_string(id_list)
print in_list
c = self.conn.cursor()
query = 'SELECT blob FROM datastore WHERE entity_id IN (%s)' % (in_list)
c.execute(query)
self.conn.commit()
results = []
for row in c.fetchall():
results.append(cPickle.loads(str(row[0])))
return results
def get_where(self, property_name, property_value_list):
"""get a list of entities that match a passed list of property names"""
in_list = self.make_list_a_string(property_value_list)
c = self.conn.cursor()
query = 'SELECT * FROM %s WHERE %s IN (%s)' %(INDEX_PREFIX+property_name, property_name, in_list)
c.execute(query)
self.conn.commit()
out_list = []
for row in c.fetchall():
out_list.append(row[0])
print out_list
return self.get_id_list(out_list)
def get_index_older(self, property_name, older):
"""SELECT entities indexted in the properties index. Limit the number of values returned"""
index_name = INDEX_PREFIX+property_name
query = 'SELECT datastore.blob FROM datastore, %s WHERE %s.entity_id = datastore.entity_id AND updated < %s LIMIT 20' %(index_name, index_name, older)
c = self.conn.cursor()
c.execute(query)
results = []
for row in c.fetchall():
results.append(cPickle.loads(str(row[0])))
return results
def make_list_a_string(self, list1):
"""convert a list to a string that can be inserted into an SQL "IN" statment"""
list2 = []
for i in list1:
list2.append(str(i))
return str(list2).strip("[]")
if __name__ == '__main__':
d = Datastore(config.PATH_DB, ['test'])
d.put({'test':'hello', 'other':'blob..lasdfasdf;lasdjflasd'})
print d.get_where('test', ['hello'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment