Skip to content

Instantly share code, notes, and snippets.

@calonso
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save calonso/6dba841659000fdd7959 to your computer and use it in GitHub Desktop.
Save calonso/6dba841659000fdd7959 to your computer and use it in GitHub Desktop.
Ruby script to benchmark a candidate Cassandra model
#!/usr/bin/env ruby
require 'rubygems'
require 'bundler'
Bundler.setup
Bundler.require
require 'yaml'
require 'csv'
require 'logger'
require 'date'
require 'set'
require 'benchmark'
require 'time'
def config
YAML.load_file File.expand_path('../../cassandra.yml', __FILE__)
end
def generate_data
today = Date.today
(0..100).each_with_object({}) do |i, acc|
field_1 = (123456789 + i).to_s
acc[field_1] = (1..12).each_with_object({}) do |field_2, sub_acc|
sub_acc[field_2] = (0..100).each_with_object({}) do |day, days_acc|
days_acc[(today - day).to_time] = (0...100).map { rand(100) }
end
end
end
end
logger = Logger.new STDOUT
logger.level = Logger::DEBUG
cluster = Cassandra.cluster config #.merge(logger: logger)
session = cluster.connect
session.execute "CREATE KEYSPACE IF NOT EXISTS mydrive_models_benchmarks WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };"
session.execute "USE mydrive_models_benchmarks"
session.execute "
CREATE TABLE IF NOT EXISTS bins_as_list (
field_1 varchar,
field_2 int,
date timestamp,
bins list<int>,
PRIMARY KEY ((field_1, field_2), date)
)
"
prepared_insert = session.prepare "INSERT INTO bins_as_list (field_1, field_2, date, bins) VALUES (?, ?, ?, ?)"
prepared_select = session.prepare "SELECT * FROM bins_as_list WHERE field_1 = ? AND field_2 = ? AND date >= ? AND date < ?"
prepared_select_day = session.prepare "SELECT * FROM bins_as_list WHERE field_1 = ? AND field_2 = ? AND date = ?"
data = generate_data
Benchmark.benchmark(Benchmark::CAPTION, 19, Benchmark::FORMAT) do |report|
report.report("Writing:") do
data.each do |field_1, field_2_arr|
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures|
dates.each do |date, bins|
futures << session.execute_async(prepared_insert, arguments: [field_1, field_2, date, bins], consistency: :one)
end
end
Cassandra::Future.all(futures).get
end
end
p 'Writing completed. Flush and compact and press INTRO to continue'
gets
report.report("Reading:") do
data.each do |field_1, field_2_arr|
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures|
futures << session.execute_async(prepared_select, arguments: [
field_1, field_2, (Date.today - 90).to_time, (Date.today + 1).to_time
], consistency: :quorum)
end
Cassandra::Future.all(futures).get if futures.any?
end
end
report.report("Updating:") do
data.each do |field_1, field_2_arr|
date = Date.today - rand(90)
new_bins = (1..12).map { (0...100).map { rand 100 } }
futures = field_2_arr.map do |field_2, dates|
session.execute_async prepared_select_day, arguments: [
field_1, field_2, date.to_time
], consistency: :quorum
end
Cassandra::Future.all(futures).get
i = -1
futures = field_2_arr.map do |field_2, dates|
session.execute_async prepared_insert, arguments: [
field_1, field_2, date.to_time, new_bins[i+=1]
], consistency: :one
end
Cassandra::Future.all(futures).get
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment