Skip to content

Instantly share code, notes, and snippets.

@artem-mindrov
Last active November 7, 2018 15:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save artem-mindrov/7b15f038e0e2dfae6830c3112ee06481 to your computer and use it in GitHub Desktop.
Save artem-mindrov/7b15f038e0e2dfae6830c3112ee06481 to your computer and use it in GitHub Desktop.
Rough estimation of a C* table size
#!/usr/bin/env python
import subprocess, shlex, re
from argparse import ArgumentParser
def tsize(type):
sizes = {
("text", "varchar", "inet", "varint", "blob", "uuid", "timeuuid"): 16,
("bigint", "date", "decimal", "double", "timestamp", "time"): 8,
("boolean", "tinyint"): 1
}
for k, v in sizes.iteritems():
if type in k:
return v
return 4
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('-t', '--table', required=True)
parser.add_argument('-n', '--number', default=1, type=int)
parser.add_argument('-p', '--partsize', default=1, type=int)
args = parser.parse_args()
p1 = subprocess.Popen(shlex.split("cqlsh -u ocs_superuser -p password -e \"desc smart.%s\"" % args.table),
stdout=subprocess.PIPE)
p2 = subprocess.Popen(shlex.split("sed -n '/^CREATE/,/^)/{//!p}'"), stdin=p1.stdout, stdout=subprocess.PIPE)
p1.stdout.close()
desc = p2.communicate()[0].splitlines()
colsizes = {}
primary_keys = []
clustering = []
if "PRIMARY KEY" in desc[0]:
primary_keys.append(desc[0].split()[0])
elif "PRIMARY KEY" in desc[-1]:
pk = re.sub(r'^.*KEY ', '', desc[-1])[1:-1]
if pk[0] == '(':
primary_keys += pk[pk.find("(") + 1: pk.find(")")].split(", ")
clustering = pk[pk.find(")") + 3:].split(", ")
primary_keys += clustering
else:
primary_keys = pk.split(", ")
clustering = primary_keys[1:]
desc = desc[:-1]
for line in desc:
linesplit = line.split()
colsizes[linesplit[0]] = tsize(linesplit[1][:-1])
print(colsizes)
print(primary_keys)
print(clustering)
metadata_size = 8 * len(colsizes)
pk_size = sum(colsizes[col] for col in primary_keys)
ck_size = sum(colsizes[col] for col in clustering)
reg_col_sizes = sum(colsizes.values()) - pk_size
print(args.number * (pk_size + args.partsize * (reg_col_sizes + ck_size) + metadata_size))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment