Skip to content

Instantly share code, notes, and snippets.

@jnunemaker jnunemaker/Gemfile
Created Mar 11, 2013

Embed
What would you like to do?
Eventually consistent alternate indexes using cassanity.
# Based on articles/presentations by Ed Anuff. The difference below is
# I converted it to use CQL from straight up thrift, which is what I
# assume Ed was using.
#
# Related reading:
# * http://anuff.wpengine.com/2010/07/secondary-indexes-in-cassandra/
# * http://anuff.wpengine.com/2011/02/indexing-in-cassandra/
# * http://cl.ly/NNmt - pdf
#
# **Please** let me know if I didn't translate something correctly, but I
# think this should work. It does stink that it has to be quite specific
# due to using CQL/Schemas instead of dynamic columns/comparators.
#
# Usage:
# * git clone
# * bundle
# * bundle exec example.rb
#
require 'pp'
require 'cassanity'
host = ENV.fetch('CASSANDRA_HOST', '127.0.0.1')
port = ENV.fetch('CASSANDRA_PORT', 9160)
client = Cassanity::Client.new("#{host}:#{port}")
keyspace = client[:research]
# column family to store full entity
users = keyspace.column_family(:users, schema: {
primary_key: :id,
columns: {
id: :timeuuid,
username: :text, # text column to index
},
})
# column family for reading indexes
index = keyspace.column_family(:users_by_text_property, schema: {
primary_key: [:property_name, :property_value, :id, :ts],
columns: {
property_name: :text, # entity property name (ie: 'username')
property_value: :text, # username (ie: 'jnunemaker')
id: :timeuuid, # user id
ts: :timeuuid, # unique uuid
value: :text, # could be serialized document or whatever
},
})
# column family for history of indexes to help deal with eventual consistency
index_entries = keyspace.column_family(:users_by_text_property_entries, schema: {
primary_key: [:id, :property_name, :ts],
columns: {
id: :timeuuid, # user id
property_name: :text, # username (ie: 'jnunemaker')
ts: :timeuuid, # unique uuid
property_value: :text, # username (ie: 'jnunemaker')
},
})
# ensure that keyspace and column families exist and are empty
keyspace.recreate
column_families = [users, index, index_entries]
column_families.each(&:create)
# proc that handles updating entity, index and index entries
update_property = ->(property_name, id, username) {
puts "Updating #{property_name} for #{id.to_guid} to #{username}"
modifications = []
timestamp = SimpleUUID::UUID.new
# get all values matching id and property name
rows = index_entries.select(where: {id: id, property_name: property_name})
# remove old index entries
rows.each do |row|
modifications << [:delete, {
column_family_name: index_entries.name,
where: {
id: row['id'],
property_name: property_name,
ts: row['ts'],
},
}]
end
# remove old index rows
rows.each do |row|
modifications << [:delete, {
column_family_name: index.name,
where: {
property_name: property_name,
property_value: row['property_value'],
id: row['id'],
ts: row['ts'],
},
}]
end
# update index entries with new value
modifications << [:update,
column_family_name: index_entries.name,
set: {property_value: username},
where: {
id: id,
property_name: property_name,
ts: timestamp,
},
]
# update index with new value
modifications << [:update,
column_family_name: index.name,
set: {value: nil}, # could be serialized user, or a few fields, whatever
where: {
property_name: property_name,
property_value: username,
id: id,
ts: timestamp,
},
]
# update entity with new value
modifications << [:update,
column_family_name: users.name,
set: {username: username},
where: {id: id},
]
keyspace.batch(modifications: modifications)
}
# some test data to insert and use
UserIds = {
'i' => SimpleUUID::UUID.new,
'jnunemaker' => SimpleUUID::UUID.new,
'k' => SimpleUUID::UUID.new,
}
# the property we are going to index.
PropertyName = 'username'
# insert some fake users
UserIds.each do |username, id|
update_property.call(PropertyName, id, username)
end
# a few ranges for searching for users that start with "j" or "n"
j_range = Cassanity.range('j', 'j'.succ, true)
n_range = Cassanity.range('n', 'n'.succ, true)
# proc to select all index values matching range
select_range_from_index = ->(range) {
index.select({
where: {property_name: PropertyName, property_value: range},
order: 'property_value',
}).map { |row| row['property_value'] }
}
puts "J's"
pp select_range_from_index.call(j_range)
puts
update_property.call(PropertyName, UserIds.fetch('jnunemaker'), 'nunes')
puts
puts "J's"
pp select_range_from_index.call(j_range)
puts
puts "N's"
pp select_range_from_index.call(n_range)
source 'https://rubygems.org'
gem 'cassanity'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.