Skip to content

Instantly share code, notes, and snippets.

@martin-vi
Last active August 29, 2015 14:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martin-vi/dc174d3c45358387b4ee to your computer and use it in GitHub Desktop.
Save martin-vi/dc174d3c45358387b4ee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2
# lizenziert unter WTFPL
from cassandra.cluster import Cluster
import os, time, sys, argparse
# tileCache settings
tileCachePath = '/mnt/hd/marz/tilecacheNew/'
# cassandra settings
cassandraHosts = ['cassandra1.docker', 'ca2.docker']
keySpaceName = 'tilestore'
columnFamily = 'tiles_inspirecrs84quad'
separator = '|'
session = None
zoomRange = range(0, 21)
def setupCassandra():
global session
cluster = Cluster(cassandraHosts)
try:
session = cluster.connect(keySpaceName)
except:
session = cluster.connect()
keyspaceDef = '''
CREATE KEYSPACE IF NOT EXISTS %s WITH replication = {
'class': 'SimpleStrategy',
'replication_factor': '2'
};''' % keySpaceName
session.execute(keyspaceDef)
session.execute('USE %s' % keySpaceName)
columnFamDef = '''
CREATE TABLE IF NOT EXISTS %s (
key ascii,
z int,
img blob,
tileTimestamp timestamp,
PRIMARY KEY (key)
) WITH compression={'sstable_compression': ''}''' % columnFamily
session.execute(columnFamDef)
#session.execute('CREATE INDEX on %s ( z )' % columnFamily)
def pathToKey(path, filetype):
pathArray = path.split(os.sep)
rowKey = filetype + separator
rowKey += separator.join(pathArray[-7:-1]) + separator + pathArray[-1][:-4]
return rowKey
def getZ(key):
return int(key.split(separator)[1])
def putCassandra(path, filetype='png'):
key = pathToKey(path, filetype)
if getZ(key) not in zoomRange:
return
try:
query = 'SELECT key FROM %s WHERE key=\'%s\' LIMIT 1' % (columnFamily , key)
row = session.execute(query)
if row:
print 'skip %s' % key
return
except Exception as e:
print e
return
with open(path, 'r') as f:
data = f.read()
timestamp = int(time.time()*1000)
query = 'INSERT INTO {0:s} (key, z, img, LRU) VALUES ( \'{1:s}\', {2:d}, 0x{3:s}, {4:d})'
values = (columnFamily, key, getZ(key), data.encode('hex'), timestamp)
session.execute(query.format(*values))
print 'insert %s - %s' % ( key, timestamp )
def dispatchFile(path):
ftype = filename[-4:]
if ftype in ['.png', '.jpg']:
path = os.path.join(dirname, filename)
putCassandra( path , ftype[-3:])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-update', action='store_true', help='update cassandra tilecache, skip existing tiles')
parser.add_argument('-drop', action='store_true', help='drop entire cassandra keyspace')
parser.add_argument('-test', action='store_true', help='test single row')
exgroup = parser.add_argument_group(title='additonal arguments')
exgroup.add_argument('-z', '--zoom-range', nargs=2, dest='zoomLvl', help='only update defined zoom range')
args = parser.parse_args()
if args.drop:
setupCassandra()
print 'droping keyspace \"%s\"' % keySpaceName
session.execute('DROP KEYSPACE %s' % keySpaceName)
sys.exit()
elif args.update:
try:
minZ = int(args.zoomLvl[0])
maxZ = int(args.zoomLvl[1])
if maxZ > minZ:
zoomRange = range(minZ, maxZ+1)
else:
zoomRange = range(maxZ, minZ+1)
except TypeError:
pass
except ValueError:
print('none integer zoomlevel %s' % args.zoomLvl)
sys.exit()
print 'doing update..'
setupCassandra()
for dirname, dirnames, filenames in os.walk(tileCachePath):
[ dispatchFile(filename) for filename in filenames ]
sys.exit()
elif args.test:
setupCassandra()
try:
query = 'SELECT key from %s WHERE z=10 LIMIT 1 ALLOW FILTERING' % columnFamily;
row = session.execute(query)
print row
except Exception as e:
print e
sys.exit()
#/usr/bin/env python2
from pycassa.pool import ConnectionPool
from pycassa.columnfamilymap import ColumnFamilyMap
from pycassa.system_manager import (
SystemManager,
SIMPLE_STRATEGY,
ASCII_TYPE,
UTF8_TYPE,
BYTES_TYPE
)
from pycassa.types import *
from pycassa.cassandra.ttypes import NotFoundException
import os, sys, argparse
# tileCache settings
tileCachePath = '/mnt/hd/marz/tilecache/'
# cassandra settings
cassandraHost = 'vagrant'
keySpaceName = 'tilecache'
columnFamily = keySpaceName
super_cf=False # consider super columns to be deprecated
# global vars
sysm, pool, cfmap = [None] * 3
# fs path ./17/000/141/090/000/102/702.png
# cassandra rowkey 17 000 141 090 000 102 702
# col_names img | filetype | key
class Tile(object):
key = UTF8Type()
filetype = UTF8Type()
img = BytesType()
def setupCassandra():
sysm = SystemManager(cassandraHost)
if not keySpaceName in sysm.list_keyspaces():
sysm.create_keyspace(keySpaceName, SIMPLE_STRATEGY, {'replication_factor': '1'})
if not columnFamily in sysm.get_keyspace_column_families(keySpaceName):
print('creating new table')
cf_kwargs = {
# data type for a column name is called a comparator
'comparator_type' : UTF8_TYPE,
# data type for a column (or row key) value is called a validator
'key_validation_class': UTF8_TYPE,
'default_validation_class': UTF8_TYPE,
}
sysm.create_column_family(keySpaceName, columnFamily, super=super_cf, **cf_kwargs)
sysm.alter_column(keySpaceName, columnFamily, 'img', BYTES_TYPE)
sysm.close()
def pathToKey(path):
pathArray = path.split(os.sep)
rowKey = '|'.join(pathArray[-7:-1])+'|%s' % pathArray[-1][:-4]
return rowKey
def putCassandra(path, filetype='png'):
key = pathToKey(path)
try:
cfmap.get(key).key
print 'skip %s' % key
return
except NotFoundException, e:
pass
with open(path, 'r') as f:
data = f.read()
tile = Tile()
tile.key = key
tile.filetype = filetype
tile.img = data
timestamp = cfmap.insert( tile )
print 'insert %s - %s' % ( key, timestamp )
def dispatchFile(path):
if 'png' == filename[-3:]:
path = os.path.join(dirname, filename)
putCassandra( path )
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-update', action='store_true', help='update cassandra tilecache, skip existing tiles')
parser.add_argument('-drop', action='store_true', help='drop entire cassandra keyspace')
parser.add_argument('-test', action='store_true', help='test single row')
args = parser.parse_args()
setupCassandra()
pool = ConnectionPool(keySpaceName, [cassandraHost])
cfmap = ColumnFamilyMap(Tile, pool, columnFamily)
if args.drop:
print 'drop keyspace \"\"'
sys.exit()
elif args.update:
print 'update..'
for dirname, dirnames, filenames in os.walk(tileCachePath):
# print path to all filenames.
[ dispatchFile(filename) for filename in filenames ]
sys.exit()
elif args.test:
try:
print 'it works ... asking for key \"%s\"' % \
cfmap.get('13|000|008|818|000|006|428').key
except Exception as e:
print e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment