Skip to content

Instantly share code, notes, and snippets.

@gnumoreno
Created December 15, 2020 18:46
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gnumoreno/7440ac0765ccd3b1cc6fb859a160185c to your computer and use it in GitHub Desktop.
Save gnumoreno/7440ac0765ccd3b1cc6fb859a160185c to your computer and use it in GitHub Desktop.
Full Table Scan Example
#!/usr/bin/python3
from cassandra.cluster import Cluster
from cassandra.policies import TokenAwarePolicy
from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.metadata import *
from cassandra.concurrent import *
from timeit import default_timer as timer
cluster = Cluster(["172.19.0.2", "172.19.0.3"])
session = cluster.connect()
session.default_consistency_level = ConsistencyLevel.LOCAL_ONE
min_token = -(2**63-1)
max_token = 2**63-1
num_nodes=3
cores_per_nodes=2
parallel_queries=(num_nodes*cores_per_nodes*3)
print("Parallel queries = ",parallel_queries)
segment_size=int(parallel_queries*1000)
print("Segment size = ", segment_size)
n = int(max_token-min_token)
chunk_size=int((max_token-min_token)/segment_size)
# print(chunk_size)
record_count=0
# Count all records using chuncks of tokens
start = timer()
initial=int(min_token)
final=int(max_token)
token_range=final-initial
chunck=int(token_range/segment_size)
statement = session.prepare("SELECT count(*) from keyspace.table where token(partition_key)>=? and token(partition_key)<=? BYPASS CACHE;")
token_ranges = [(i, min(i+chunk_size, max_token)) for i in range(min_token, max_token, chunk_size)]
# print(token_ranges)
results = execute_concurrent_with_args(session, statement, token_ranges, concurrency=parallel_queries)
for (success, result) in results:
record_count += result[0].count
print ("Sum of Ranges using token chuncks with parallelism = ", record_count)
end = timer()
print(end - start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment