Skip to content

Instantly share code, notes, and snippets.

@calonso
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save calonso/f14a0d9673110d6f9578 to your computer and use it in GitHub Desktop.
Save calonso/f14a0d9673110d6f9578 to your computer and use it in GitHub Desktop.
Ruby script to benchmark a candidate Cassandra model
#!/usr/bin/env ruby
require 'rubygems'
require 'bundler'
Bundler.setup
Bundler.require
require 'yaml'
require 'csv'
require 'logger'
require 'date'
require 'set'
require 'benchmark'
require 'time'
MAX_CONCURRENT_READS = 7
def config
YAML.load_file File.expand_path('../../cassandra.yml', __FILE__)
end
def generate_data
today = Date.today
(0..100).each_with_object({}) do |i, acc|
field_1 = (123456789 + i).to_s
acc[field_1] = (1..12).each_with_object({}) do |field_2, sub_acc|
sub_acc[field_2] = (0..100).each_with_object({}) do |day, days_acc|
days_acc[(today - day).to_time] = (0...100).map { rand(100) }
end
end
end
end
logger = Logger.new STDOUT
logger.level = Logger::DEBUG
cluster = Cassandra.cluster config #.merge(logger: logger)
session = cluster.connect
session.execute "CREATE KEYSPACE IF NOT EXISTS mydrive_models_benchmarks WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };"
session.execute "USE mydrive_models_benchmarks"
session.execute "
CREATE TABLE IF NOT EXISTS binned_sp_2 (
field_1 varchar,
field_2 int,
date timestamp,
b0 int, b1 int, b2 int, b3 int, b4 int, b5 int, b6 int, b7 int, b8 int, b9 int,
b10 int, b11 int, b12 int, b13 int, b14 int, b15 int, b16 int, b17 int, b18 int, b19 int,
b20 int, b21 int, b22 int, b23 int, b24 int, b25 int, b26 int, b27 int, b28 int, b29 int,
b30 int, b31 int, b32 int, b33 int, b34 int, b35 int, b36 int, b37 int, b38 int, b39 int,
b40 int, b41 int, b42 int, b43 int, b44 int, b45 int, b46 int, b47 int, b48 int, b49 int,
b50 int, b51 int, b52 int, b53 int, b54 int, b55 int, b56 int, b57 int, b58 int, b59 int,
b60 int, b61 int, b62 int, b63 int, b64 int, b65 int, b66 int, b67 int, b68 int, b69 int,
b70 int, b71 int, b72 int, b73 int, b74 int, b75 int, b76 int, b77 int, b78 int, b79 int,
b80 int, b81 int, b82 int, b83 int, b84 int, b85 int, b86 int, b87 int, b88 int, b89 int,
b90 int, b91 int, b92 int, b93 int, b94 int, b95 int, b96 int, b97 int, b98 int, b99 int,
PRIMARY KEY ((field_1, field_2), date)
)
"
prepared_insert = session.prepare "
INSERT INTO binned_sp_2 (field_1, field_2, date,
b0, b1, b2, b3, b4, b5, b6, b7, b8, b9,
b10, b11, b12, b13, b14, b15, b16, b17, b18, b19,
b20, b21, b22, b23, b24, b25, b26, b27, b28, b29,
b30, b31, b32, b33, b34, b35, b36, b37, b38, b39,
b40, b41, b42, b43, b44, b45, b46, b47, b48, b49,
b50, b51, b52, b53, b54, b55, b56, b57, b58, b59,
b60, b61, b62, b63, b64, b65, b66, b67, b68, b69,
b70, b71, b72, b73, b74, b75, b76, b77, b78, b79,
b80, b81, b82, b83, b84, b85, b86, b87, b88, b89,
b90, b91, b92, b93, b94, b95, b96, b97, b98, b99)
VALUES (?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"
prepared_select = session.prepare "SELECT * FROM binned_sp_2 WHERE field_1 = ? AND field_2 = ? AND date >= ? AND date < ?"
prepared_select_day = session.prepare "SELECT * FROM binned_sp_2 WHERE field_1 = ? AND field_2 = ? AND date = ?"
data = generate_data
Benchmark.benchmark(Benchmark::CAPTION, 19, Benchmark::FORMAT) do |report|
report.report("Writing:") do
data.each do |field_1, field_2_arr|
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures|
dates.each do |date, bins|
futures << session.execute_async(prepared_insert, arguments: [
field_1, field_2, date, *bins
], consistency: :one)
end
end
Cassandra::Future.all(futures).get
end
end
p 'Writing completed. Flush and compact and press INTRO to continue'
gets
report.report("Reading:") do
data.each do |field_1, field_2_arr|
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures|
futures << session.execute_async(prepared_select, arguments: [
field_1, field_2, (Date.today - 90).to_time, (Date.today + 1).to_time
], consistency: :quorum)
end
Cassandra::Future.all(futures).get if futures.any?
end
end
report.report("Updating:") do
data.each do |field_1, field_2_arr|
date = Date.today - rand(90)
new_bins = (1..12).map { (0...100).map { rand 100 } }
futures = field_2_arr.map do |field_2, dates|
session.execute_async prepared_select_day, arguments: [
field_1, field_2, date.to_time
], consistency: :quorum
end
Cassandra::Future.all(futures).get
i = -1
futures = field_2_arr.map do |field_2, dates|
session.execute_async prepared_insert, arguments: [
field_1, field_2, date.to_time, *new_bins[i+=1]
], consistency: :one
end
Cassandra::Future.all(futures).get
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment