Skip to content

Instantly share code, notes, and snippets.

@piotrmaslanka
Last active April 7, 2023 12:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save piotrmaslanka/b56f92962ad962d744009b40ddb54c78 to your computer and use it in GitHub Desktop.
Save piotrmaslanka/b56f92962ad962d744009b40ddb54c78 to your computer and use it in GitHub Desktop.
Moving data between different Cassandras if you're running them using Docker
#!/usr/bin/python3
import os
import sys
"""
A tool to copy a named snapshot of a cassandra to a target rsynced thing.
Will use ssh to create target directories
Requires: rsync ssh
Keyspaces with names starting with system will be omitted.
Usage:
- nodetool snapshot -t name-of-the-snapshot
- backup_cassandra.py target_host:/base/dir/on_the_target_host /local/path/to/cassandra/data name-of-the-snapshot
- backup_cassandra.py target_host:/base/dir/on_the_target_host /local/path/to/cassandra/data name-of-the-snapshot
Additionally if you define an environment variable called EXCLUDE_KEYSPACES keyspaces mentioned won't be synced
Additionally if you define an environment variable called EXCLUDE_TABLES keyspaces mentioned won't be synced
"""
if __name__ == '__main__':
local_cassandra_directory = sys.argv[2]
keyspaces = os.listdir(local_cassandra_directory)
target_host, target_host_dir = sys.argv[1].split(':')
snap_name = sys.argv[3]
if 'EXCLUDE_KEYSPACES' in os.environ:
for keyspace in os.environ['EXCLUDE_KEYSPACES'].split(' '):
keyspaces.remove(keyspace)
keyspaces = [ks for ks in keyspaces if not ks.startswith('system')]
tables_to_kill = os.environ.get('EXCLUDE_TABLES', '').split(' ')
for keyspace in keyspaces:
for table_directory in os.listdir(os.path.join(sys.argv[2], keyspace)):
table_name = '-'.join(table_directory.split('-')[:-1])
if table_name in tables_to_kill:
continue
print('Proceeding with {keyspace}:{table_name}'.format(**locals()))
os.system("ssh {target_host} mkdir -p {target_host_dir}/{keyspace}/{table_name}".format(**locals()))
src_path = os.path.join(local_cassandra_directory, keyspace, table_directory, 'snapshots', snap_name)
os.system('rsync --progress -r -v {src_path}/* {target_host}:{target_host_dir}/{keyspace}/{table_name}'.format(**locals()))
#!/usr/bin/python3
import os
import sys
import subprocess
"""
A tool to restore the data to Cassandra
Requires: docker + all relevant schemas to be available
Usage:
- restore_cassandra.py cassandra-container-name RPC_ADDRESS /var/lib/cassandra/local/container/site/of/backup/made/by/backup_cassandra.py
"""
def do_exec(path, **kwargs):
path = path.format(**kwargs)
cmd = 'docker exec -it {container_name} '+path
cmd = cmd.format(**kwargs)
print('Attempting', cmd)
a = subprocess.check_output(cmd, shell=True)
a = a.decode('utf-8').replace('\t', ' ').replace('\r', ' ').replace('\n', ' ')
return [k for k in a.split(' ') if k.strip()]
if __name__ == '__main__':
container_name = sys.argv[1]
cassandra_host = sys.argv[2]
backup_path = sys.argv[3]
keyspaces = do_exec('ls {backup_path}', **locals())
for keyspace in keyspaces:
tables = do_exec('ls {backup_path}/{keyspace}', **locals())
for table in tables:
do_exec('sstableloader -d {cassandra_host} {backup_path}/{keyspace}/{table}', **locals())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment