Created
December 9, 2015 13:44
-
-
Save pcmanus/b731ba4278d4a6210589 to your computer and use it in GitHub Desktop.
Insertion script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Simple Data loader. | |
Usage: | |
insert_bigsavings.py [-c] [-n=<x>] [-p=<y>] | |
Options: | |
-c enable compression (disabled by default) | |
-n=<x> total number of rows to insert (default: 1M) | |
-p=<y> number of partitions to insert the row into (default: 100) | |
""" | |
from __future__ import print_function | |
from uuid import uuid4, uuid1 | |
from docopt import docopt | |
from cassandra.cluster import Cluster | |
from random import randint, choice, random | |
from string import ascii_letters | |
max_async_query = 1000 | |
if __name__ == '__main__': | |
args = docopt(__doc__) | |
use_compression = args['-c'] | |
rows = 1000000 if args['-n'] is None else args['-n'] | |
partitions = 100 if args['-p'] is None else args['-p'] | |
assert rows > partitions | |
cluster = Cluster() | |
session = cluster.connect() | |
session.execute("CREATE KEYSPACE ks WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' };") | |
session.execute(""" | |
CREATE TABLE ks.bigsavings ( | |
k int, | |
c text, | |
my_first_value int, | |
a_set_of_floats set<float>, | |
PRIMARY KEY (k, c) | |
) WITH compression = {'sstable_compression': '%s'} | |
""" % ('LZ4Compressor' if use_compression else '')) | |
insert = session.prepare('INSERT INTO ks.bigsavings (k, c, my_first_value, a_set_of_floats) VALUES (?, ?, ?, ?)') | |
row_per_partition = rows // partitions | |
futures = [] | |
for key in range(0, partitions): | |
for _ in range(0, row_per_partition): | |
random_text = ''.join(choice(ascii_letters) for _ in range(100)) | |
float_set = set([random() for _ in range(0, 50)]) | |
futures.append(session.execute_async(insert, [key, random_text, randint(0, 2^31), float_set])) | |
if (len(futures) > max_async_query): | |
for f in futures: | |
f.result() | |
futures = [] | |
for f in futures: | |
f.result() | |
assert len(list(session.execute('SELECT * from ks.bigsavings;'))) == rows |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment