Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Simple CQL to Thrift Comparison in ruby
require 'cassandra-cql'
require 'simple_uuid'
require 'cassandra/0.8'
require 'benchmark'
require 'forgery'
def setup
@cassandra_cql = CassandraCQL::Database.new('127.0.0.1:9160')
begin
@cassandra_cql.execute("DROP KEYSPACE Testing")
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute("CREATE KEYSPACE Testing WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1")
@cassandra_cql.execute("use Testing")
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY wide_row_thrift'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY wide_row_cql'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute "CREATE COLUMNFAMILY wide_row_cql (id uuid PRIMARY KEY)"
@cassandra_cql.execute "CREATE COLUMNFAMILY wide_row_thrift (id uuid PRIMARY KEY)"
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY narrow_row_thrift'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY narrow_row_cql'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute "CREATE COLUMNFAMILY narrow_row_cql (id uuid PRIMARY KEY)"
@cassandra_cql.execute "CREATE COLUMNFAMILY narrow_row_thrift (id uuid PRIMARY KEY)"
setup_connections
end
def setup_connections
@cassandra_thrift = Cassandra.new('Testing', "127.0.0.1:9160")
@cassandra_cql = CassandraCQL::Database.new('127.0.0.1:9160', :keyspace => 'Testing')
end
def insert_narrow_row_cql(count = 1000)
count.times do
@cassandra_cql.execute("INSERT INTO narrow_row_cql (id, email, password, first_name, last_name) VALUES (?, ?, ?, ?, ?)",
CassandraCQL::UUID.new,
Forgery(:internet).email_address,
Forgery(:basic).password,
Forgery(:name).first_name,
Forgery(:name).last_name,
)
end
end
def insert_narrow_row_thrift(count = 1000)
count.times do
@cassandra_thrift.insert(:narrow_row_thrift, SimpleUUID::UUID.new.to_s,
{ 'email' => Forgery(:internet).email_address,
'password' => Forgery(:basic).password,
'first_name' => Forgery(:name).first_name,
'last_name' => Forgery(:name).last_name
}
)
end
end
def insert_wide_row_cql(row_count, column_count)
row_count.times do
columns = [CassandraCQL::UUID.new]
cql = "INSERT INTO wide_row_cql (id"
column_count.times do |index|
cql += ",'column_#{index.to_s.rjust(4,'0')}'"
columns << Forgery(:basic).text
end
cql += ") VALUES (?#{',?' * column_count})"
@cassandra_cql.execute(cql, *columns)
end
end
def insert_wide_row_thrift(row_count, column_count)
row_count.times do
columns = {}
column_count.times do |index|
columns['column_' + index.to_s.rjust(4,'0')] = Forgery(:basic).text
end
@cassandra_thrift.insert(:wide_row_thrift, SimpleUUID::UUID.new.to_s, columns)
end
end
def read_wide_row_cql
rows_read = 0
@cassandra_cql.execute("SELECT * FROM wide_row_cql").fetch{|row| rows_read += 1}
end
def read_wide_row_thrift
rows_read = 0
@cassandra_thrift.each(:wide_row_thrift, :count => 1000) {|row| rows_read += 1}
end
def read_narrow_row_cql
rows_read = 0
@cassandra_cql.execute("SELECT * FROM narrow_row_cql").fetch{|row| rows_read += 1}
end
def read_narrow_row_thrift
rows_read = 0
@cassandra_thrift.each(:narrow_row_thrift) {|row| rows_read += 1}
end
def benchmark_inserts
Benchmark.bm(35)do|x|
x.report("CQL Insert 100 rows 1000 cols:") {insert_wide_row_cql(100, 1000) }
x.report("Thrift Insert 100 rows 1000 cols:") {insert_wide_row_thrift(100, 1000) }
x.report("CQL Insert 1000 rows 5 cols: ") {insert_narrow_row_cql(1000) }
x.report("Thrift Insert 1000 rows 5 cols: ") {insert_narrow_row_thrift(1000) }
end
end
def benchmark_reads
Benchmark.bm(35)do|x|
x.report("CQL Read 100 rows 1000 cols:") {read_wide_row_cql }
x.report("Thrift 100 rows 1000 cols:") {read_wide_row_thrift }
x.report("CQL Read 1000 rows 5 cols: ") {read_narrow_row_cql }
x.report("Thrift 1000 rows 5 cols: ") {read_narrow_row_thrift }
end
end
if __FILE__ == $0
setup
setup_connections
benchmark_inserts
benchmark_reads
#require 'perftools'
#PerfTools::CpuProfiler.start("/tmp/read_wide_row_cql") do
# read_wide_row_cql
#end
#
#require 'rbtrace'
#while true
# read_wide_row_cql
#end
end
source :rubygems
gem 'rake'
gem 'forgery'
gem 'cassandra-cql', :path => '../cassandra-cql'
gem 'cassandra', :path => '../cassandra'
gem 'perftools.rb'
gem 'rbtrace'
# ruby cql_speed_comparison.rb
user system total real
CQL Insert 100 rows 1000 cols: 7.640000 0.080000 7.720000 ( 9.216009)
Thrift Insert 100 rows 1000 cols: 11.690000 0.140000 11.830000 ( 12.782685)
CQL Insert 1000 rows 5 cols: 0.500000 0.050000 0.550000 ( 1.246638)
Thrift Insert 1000 rows 5 cols: 0.730000 0.050000 0.780000 ( 1.265024)
user system total real
CQL Read 100 rows 1000 cols: 3.260000 0.030000 3.290000 ( 3.862104)
Thrift 100 rows 1000 cols: 4.540000 0.060000 4.600000 ( 4.999242)
CQL Read 1000 rows 5 cols: 0.160000 0.000000 0.160000 ( 0.187445)
Thrift 1000 rows 5 cols: 0.240000 0.010000 0.250000 ( 0.292730)

Not that it particularly matters since I already like the results, but changing the wide row insertion test to map a range to an array and calling join makes it much faster. A fair amount of time is spent in string concatenation on that test which has nothing to do with benchmarking the driver or cassandra.

Owner

rwjblue commented Sep 6, 2011

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment