Skip to content

Instantly share code, notes, and snippets.

@pcmanus
Created December 9, 2015 13:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pcmanus/db5adca5bcdd400d9f63 to your computer and use it in GitHub Desktop.
Save pcmanus/db5adca5bcdd400d9f63 to your computer and use it in GitHub Desktop.
Insertion script
"""Simple Data loader.
Usage:
insert_smallsavings.py [-c] [-n=<x>]
Options:
-c enable compression (disabled by default)
-n=<x> total number of rows to insert (default: 1M)
"""
from __future__ import print_function
from uuid import uuid4, uuid1
from docopt import docopt
from cassandra.cluster import Cluster
from random import randint, choice, random
from string import ascii_letters
max_async_query = 1000
if __name__ == '__main__':
args = docopt(__doc__)
use_compression = args['-c']
rows = 1000000 if args['-n'] is None else args['-n']
cluster = Cluster()
session = cluster.connect()
session.execute("CREATE KEYSPACE ks WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': '1' };")
session.execute("""
CREATE TABLE ks.smallsavings (
k int PRIMARY KEY,
v1 int,
v2 text
) WITH compression = {'sstable_compression': '%s'}
""" % ('LZ4Compressor' if use_compression else ''))
insert = session.prepare('INSERT INTO ks.smallsavings (k, v1, v2) VALUES (?, ?, ?)')
futures = []
for row in range(0, rows):
random_text = ''.join(choice(ascii_letters) for _ in range(1000))
futures.append(session.execute_async(insert, [row, randint(0, 2^31), random_text]))
if (len(futures) > max_async_query):
for f in futures:
f.result()
futures = []
for f in futures:
f.result()
assert len(list(session.execute('SELECT * from ks.smallsavings;'))) == rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment