Skip to content

Instantly share code, notes, and snippets.

@upbit
Created July 15, 2015 09:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save upbit/d3555455d9ef12cd65a2 to your computer and use it in GitHub Desktop.
Save upbit/d3555455d9ef12cd65a2 to your computer and use it in GitHub Desktop.
dump big cassandra table
# -*- coding:utf-8 -*- #
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
import codecs
try:
import cassandra
import cassandra.concurrent
except ImportError:
sys.exit('Python Cassandra driver not installed. You might try \"pip install cassandra-driver\".')
from cassandra.auth import PlainTextAuthProvider #For protocol_version 2
from cassandra.cluster import Cluster
class CassandraHelper:
def __init__(self, host="127.0.0.1", keyspace="", fetch_size=100):
self.cluster = Cluster([host])
self.session = self.cluster.connect(keyspace)
self.session.default_fetch_size = fetch_size
self.session.row_factory = cassandra.query.ordered_dict_factory
def cleanup_cluster(self):
self.session.cluster.shutdown()
self.session.shutdown()
def dump(self, tablename):
query = "SELECT * FROM %s" % tablename
rows = self.session.execute(query)
for row in rows:
yield row
def main():
if len(sys.argv) < 2:
print "Usage: %s <tablename> (keyspace) (host)" % sys.argv[0]
return
tablename = sys.argv[1]
if len(sys.argv) >= 3:
keyspace = sys.argv[2]
else:
keyspace = "test"
if len(sys.argv) >= 4:
host = sys.argv[3]
else:
host = "192.168.1.100"
cass = CassandraHelper(host=host, keyspace=keyspace)
for row in cass.dump(tablename):
try:
uid = row['uid']
ts = long(row['ts'])
data = row['data']
print "%s,%s,%s" % (uid, ts, data)
except:
pass
cass.cleanup_cluster()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment