Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Batch insert python script. It is using the python driver provided by Datastax
#!/usr/bin/env python
from cassandra.cluster import Cluster
from cassandra.query import *
from cassandra import *
import hashlib
import sys
KEYSPACE = 'test'
TABLE = 'user'
BATCH_SIZE = 1000
cluster = Cluster()
session = cluster.connect(KEYSPACE)
def getHash(val):
return hashlib.sha256(str(val)).hexdigest()
def batchInsert(start, num):
insert_q = session.prepare("insert into test.user (user_id, fname, lname, number) values (?, ?, ?, ?)")
for i in range(start, num):
batch = BatchStatement()
for j in range(1, BATCH_SIZE + 1):
hashval = getHash(j)
batch.add(insert_q, (i, hashval, hashval, i + j))
try:
session.execute(batch)
except WriteTimeout as e:
print "write timeout occurred."
if __name__ == "__main__":
start = int(sys.argv[1])
num = int(sys.argv[2])
batchInsert(start, num)
@reddikih
Copy link
Author

reddikih commented Aug 18, 2016

Setup

See: Installation

install dependencies

sudo yum install gcc python-devel
sudo yum install libev libev-devel

To install cassandra driver you can use pip

pip install cassandra-driver

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment