Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Insertion script
"""Simple Data loader.
Usage:
insert_bigsavings.py [-c] [-n=<x>] [-p=<y>]
Options:
-c enable compression (disabled by default)
-n=<x> total number of rows to insert (default: 1M)
-p=<y> number of partitions to insert the row into (default: 100)
"""
from __future__ import print_function
from uuid import uuid4, uuid1
from docopt import docopt
from cassandra.cluster import Cluster
from random import randint, choice, random
from string import ascii_letters
max_async_query = 1000
if __name__ == '__main__':
args = docopt(__doc__)
use_compression = args['-c']
rows = 1000000 if args['-n'] is None else args['-n']
partitions = 100 if args['-p'] is None else args['-p']
assert rows > partitions
cluster = Cluster()
session = cluster.connect()
session.execute("CREATE KEYSPACE ks WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' };")
session.execute("""
CREATE TABLE ks.bigsavings (
k int,
c text,
my_first_value int,
a_set_of_floats set<float>,
PRIMARY KEY (k, c)
) WITH compression = {'sstable_compression': '%s'}
""" % ('LZ4Compressor' if use_compression else ''))
insert = session.prepare('INSERT INTO ks.bigsavings (k, c, my_first_value, a_set_of_floats) VALUES (?, ?, ?, ?)')
row_per_partition = rows // partitions
futures = []
for key in range(0, partitions):
for _ in range(0, row_per_partition):
random_text = ''.join(choice(ascii_letters) for _ in range(100))
float_set = set([random() for _ in range(0, 50)])
futures.append(session.execute_async(insert, [key, random_text, randint(0, 2^31), float_set]))
if (len(futures) > max_async_query):
for f in futures:
f.result()
futures = []
for f in futures:
f.result()
assert len(list(session.execute('SELECT * from ks.bigsavings;'))) == rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment