public
Last active

Couchbase EBS RAID Backup on Scalarium

  • Download Gist
backup.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
#!/usr/bin/env ruby
 
# Backs up the mounted EBS RAID storage used by Couchbase of an EC2 instance using the Scalarium API.
#
# Before running the snapshot it stops Couchbase disk writes and then freezes the file system.
# File system must be XFS.
#
# Usage: Backup.new(<scalarium api token>, <mount point for ebs raid>, <name of couchbase bucket to back up>).start (must be run on the EC2 instance).
 
require 'rest_client'
require 'json'
require 'timeout'
 
class Backup
def initialize(api_token, mount_point, couch_bucket_name)
@headers = {'X-Scalarium-Token' => api_token,
'Accept' => 'application/vnd.scalarium-v1+json'}
@mount_point = mount_point
@couch_bucket_name = couch_bucket_name
end
 
def start
begin
puts "Instance #{instance_id}"
 
puts "Stopping bucket"
run "/opt/couchbase/bin/cbflushctl 127.0.0.1:11210 stop #{couch_bucket_name}"
 
puts "Waiting for bucket to flush data"
print '.' while !bucket_flushed?
puts
 
puts "Freezing file system"
run "xfs_freeze -f #{mount_point}"
 
instance_volumes.each do |v|
create_snapshot(v['id'])
end
 
puts 'Waiting for snapshots to complete'
Timeout::timeout(60 * 60) do
wait_for_completion
end
rescue => e
STDERR.puts e.message
exit(1)
ensure
puts "Unfreezing file system"
run "xfs_freeze -u #{mount_point}"
puts "Restarting bucket"
run "/opt/couchbase/bin/cbflushctl 127.0.0.1:11210 start #{couch_bucket_name}"
end
end
 
private
 
attr_reader :headers, :couch_bucket_name, :mount_point
 
def instance_volumes
@instance_volumes ||= api_get('/volumes').map{|v| v.select{|k, v|
%w(instance_id name id raid_array_id).include?(k)}}.select{|v|
v['raid_array_id']}.group_by{|v| v['instance_id']}[instance_id]
end
 
def instance_id
@instance_id ||= JSON.parse(File.read('/var/lib/scalarium/cluster_state.json'))['instance']['id']
end
 
def url(path)
"https://manage.scalarium.com/api#{path}"
end
 
def run(cmd)
`#{cmd}`
unless $?.success?
puts "FAILED\n"
end
end
 
def api_get(path)
retry_count = 0
begin
JSON.parse(RestClient.get(url(path), headers).body)
rescue RestClient::InternalServerError => e
retry_count += 1
sleep 2
if retry_count < 5
retry
else
raise e
end
end
end
 
def create_snapshot(volume_id)
puts "Snapshotting #{volume_id}"
res = RestClient.post url("/volumes/#{volume_id}/snapshot"), '', headers
if res.code != 201
puts 'FAILED'
puts res.body
puts
end
end
 
def bucket_flushed?
`/opt/couchbase/bin/cbstats 127.0.0.1:11210 all -b #{@couch_bucket_name} | grep ep_uncommitted | awk '{print $2}'`.strip == '0'
end
 
def wait_for_completion
begin
progress = []
completed = instance_volumes.reduce(true) do |result, v|
snapshot = api_get("/volumes/#{v['id']}/snapshots").first
progress << snapshot['progress'].to_i
result && snapshot['status'] == 'completed'
end
print "#{progress.reduce(:+) / progress.size.to_f}% ... "
sleep 5
end while !completed
end
end
 
t = Time.now
Backup.new('<scalarium api token>', '<mount point>' , '<couch bucket>').start
puts "Backup took #{Time.now - t}s"

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.