Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Simple CQL to Thrift Comparison in ruby
require 'cassandra-cql'
require 'simple_uuid'
require 'cassandra/0.8'
require 'benchmark'
require 'forgery'
def setup
@cassandra_cql = CassandraCQL::Database.new('127.0.0.1:9160')
begin
@cassandra_cql.execute("DROP KEYSPACE Testing")
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute("CREATE KEYSPACE Testing WITH strategy_class='org.apache.cassandra.locator.SimpleStrategy' AND strategy_options:replication_factor=1")
@cassandra_cql.execute("use Testing")
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY wide_row_thrift'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY wide_row_cql'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute "CREATE COLUMNFAMILY wide_row_cql (id uuid PRIMARY KEY)"
@cassandra_cql.execute "CREATE COLUMNFAMILY wide_row_thrift (id uuid PRIMARY KEY)"
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY narrow_row_thrift'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
begin
@cassandra_cql.execute 'DROP COLUMNFAMILY narrow_row_cql'
rescue CassandraCQL::Error::InvalidRequestException => ex
end
@cassandra_cql.execute "CREATE COLUMNFAMILY narrow_row_cql (id uuid PRIMARY KEY)"
@cassandra_cql.execute "CREATE COLUMNFAMILY narrow_row_thrift (id uuid PRIMARY KEY)"
setup_connections
end
def setup_connections
@cassandra_thrift = Cassandra.new('Testing', "127.0.0.1:9160")
@cassandra_cql = CassandraCQL::Database.new('127.0.0.1:9160', :keyspace => 'Testing')
end
def insert_narrow_row_cql(count = 1000)
count.times do
@cassandra_cql.execute("INSERT INTO narrow_row_cql (id, email, password, first_name, last_name) VALUES (?, ?, ?, ?, ?)",
CassandraCQL::UUID.new,
Forgery(:internet).email_address,
Forgery(:basic).password,
Forgery(:name).first_name,
Forgery(:name).last_name,
)
end
end
def insert_narrow_row_thrift(count = 1000)
count.times do
@cassandra_thrift.insert(:narrow_row_thrift, SimpleUUID::UUID.new.to_s,
{ 'email' => Forgery(:internet).email_address,
'password' => Forgery(:basic).password,
'first_name' => Forgery(:name).first_name,
'last_name' => Forgery(:name).last_name
}
)
end
end
def insert_wide_row_cql(row_count, column_count)
row_count.times do
columns = [CassandraCQL::UUID.new]
cql = "INSERT INTO wide_row_cql (id"
column_count.times do |index|
cql += ",'column_#{index.to_s.rjust(4,'0')}'"
columns << Forgery(:basic).text
end
cql += ") VALUES (?#{',?' * column_count})"
@cassandra_cql.execute(cql, *columns)
end
end
def insert_wide_row_thrift(row_count, column_count)
row_count.times do
columns = {}
column_count.times do |index|
columns['column_' + index.to_s.rjust(4,'0')] = Forgery(:basic).text
end
@cassandra_thrift.insert(:wide_row_thrift, SimpleUUID::UUID.new.to_s, columns)
end
end
def read_wide_row_cql
rows_read = 0
@cassandra_cql.execute("SELECT * FROM wide_row_cql").fetch{|row| rows_read += 1}
end
def read_wide_row_thrift
rows_read = 0
@cassandra_thrift.each(:wide_row_thrift, :count => 1000) {|row| rows_read += 1}
end
def read_narrow_row_cql
rows_read = 0
@cassandra_cql.execute("SELECT * FROM narrow_row_cql").fetch{|row| rows_read += 1}
end
def read_narrow_row_thrift
rows_read = 0
@cassandra_thrift.each(:narrow_row_thrift) {|row| rows_read += 1}
end
def benchmark_inserts
Benchmark.bm(35)do|x|
x.report("CQL Insert 100 rows 1000 cols:") {insert_wide_row_cql(100, 1000) }
x.report("Thrift Insert 100 rows 1000 cols:") {insert_wide_row_thrift(100, 1000) }
x.report("CQL Insert 1000 rows 5 cols: ") {insert_narrow_row_cql(1000) }
x.report("Thrift Insert 1000 rows 5 cols: ") {insert_narrow_row_thrift(1000) }
end
end
def benchmark_reads
Benchmark.bm(35)do|x|
x.report("CQL Read 100 rows 1000 cols:") {read_wide_row_cql }
x.report("Thrift 100 rows 1000 cols:") {read_wide_row_thrift }
x.report("CQL Read 1000 rows 5 cols: ") {read_narrow_row_cql }
x.report("Thrift 1000 rows 5 cols: ") {read_narrow_row_thrift }
end
end
if __FILE__ == $0
setup
setup_connections
benchmark_inserts
benchmark_reads
#require 'perftools'
#PerfTools::CpuProfiler.start("/tmp/read_wide_row_cql") do
# read_wide_row_cql
#end
#
#require 'rbtrace'
#while true
# read_wide_row_cql
#end
end
source :rubygems
gem 'rake'
gem 'forgery'
gem 'cassandra-cql', :path => '../cassandra-cql'
gem 'cassandra', :path => '../cassandra'
gem 'perftools.rb'
gem 'rbtrace'
# ruby cql_speed_comparison.rb
user system total real
CQL Insert 100 rows 1000 cols: 7.640000 0.080000 7.720000 ( 9.216009)
Thrift Insert 100 rows 1000 cols: 11.690000 0.140000 11.830000 ( 12.782685)
CQL Insert 1000 rows 5 cols: 0.500000 0.050000 0.550000 ( 1.246638)
Thrift Insert 1000 rows 5 cols: 0.730000 0.050000 0.780000 ( 1.265024)
user system total real
CQL Read 100 rows 1000 cols: 3.260000 0.030000 3.290000 ( 3.862104)
Thrift 100 rows 1000 cols: 4.540000 0.060000 4.600000 ( 4.999242)
CQL Read 1000 rows 5 cols: 0.160000 0.000000 0.160000 ( 0.187445)
Thrift 1000 rows 5 cols: 0.240000 0.010000 0.250000 ( 0.292730)
@kreynolds

Not that it particularly matters since I already like the results, but changing the wide row insertion test to map a range to an array and calling join makes it much faster. A fair amount of time is spent in string concatenation on that test which has nothing to do with benchmarking the driver or cassandra.

@rwjblue
Owner
rwjblue commented Sep 6, 2011
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment