Skip to content

Instantly share code, notes, and snippets.

@amorton
Created November 6, 2011 09:02
Show Gist options
  • Save amorton/1342624 to your computer and use it in GitHub Desktop.
Save amorton/1342624 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# encoding: utf-8
"""
Changes the format specifier on SSTables **IN PLACE**
e.g. change
TwitterUserHistoricalValues-h-20983-Data.db
to be
TwitterUserHistoricalValues-g-20983-Data.db
Useful if you need to down grade an install.
NOTE: check that the file versions are compatible.
"""
import argparse
import glob
import logging
import os.path
import sys
logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__file__)
def split_filename(filename):
# e.g. TwitterUserHistoricalValues-20983-Data.db
# e.g. TwitterUserHistoricalValues-g-20983-Data.db
# returns a tuple of (cf_name, file_version, file_number, component)
parts = []
state = "initial"
for token in filename.split("-"):
if state == "initial":
parts.append(token)
state = "cf_name"
elif state == "cf_name":
#may be a number of a version identifier.
if token.isalpha():
#version
parts.append(token)
state = "version"
else:
#is file num, version is 0.6
parts.append("h")
parts.append(int(token))
state = "file_num"
elif state == "version":
# is file num
parts.append(int(token))
state = "file_num"
elif state == "file_num":
# component, e.g. data, filter, index
assert token.endswith(".db")
parts.append(token.replace(".db", ""))
state = "finished"
assert state == "finished"
return tuple(parts)
def match_files(keyspace_dir, match_spec):
"""Find the sstable files we want to change.
:param keyspace_dir: keyspace dir to check.
:param match_spec: File format spec to match.
:return: List of absolute sstable paths.
"""
match_files = []
_, _, files = os.walk(keyspace_dir).next()
for f in files:
#skip any compacted markers
if f.endswith("-Compacted"):
log.debug("Skipping compacted marker %s", f)
continue
parts = split_filename(f)
log.debug("For file %s got parts %s", f, parts)
if parts[1] == match_spec:
match_files.append(os.path.join(keyspace_dir, f))
log.info("Matched files %s", match_files)
return match_files
def change_file_spec(match_files, to_spec):
"""Works out the new file names for the matched files.
Does not do the rename.
:param match_files: Full file paths.
:param to_spec: File version to change to.
:returns: List of the new file names.
"""
new_files = []
for old_file in match_files:
old_dir, old_file = os.path.split(old_file)
old_file_parts = split_filename(old_file)
new_file_parts = tuple(
new or old
for new, old in zip((None, to_spec, None, None), old_file_parts)
)
new_file = "-".join(str(s) for s in new_file_parts)
new_file = os.path.join(old_dir, "%s.db" % new_file )
new_files.append(new_file)
log.info("New file names %s", new_files)
return new_files
def rename_files(from_files, to_files, test_only, once_only):
"""Renames all of the files in from_files to be to_files.
"""
if not len(from_files) == len(to_files):
raise RuntimeError("File name length mismatch")
for from_f, to_f in zip(from_files, to_files):
if test_only:
log.info("TEST Renaming %s to %s", from_f, to_f)
else:
log.info("Renaming %s to %s", from_f, to_f)
os.rename(from_f, to_f)
if once_only:
log.warn("once_only specified, exiting.")
break
return
def get_parser():
parser = argparse.ArgumentParser(prog="SSTable Rename",
description="Changes the format specifier of SSTables *inplace*.")
parser.add_argument('keyspace', metavar="keyspace", type=str,
help='Keyspace to link sstables from.')
parser.add_argument("--from_spec", type=str, default="h",
help="Format spec to change from.")
parser.add_argument("--to_spec", type=str, default="g",
help="Format spec to change to.")
parser.add_argument("--test",action='store_true', default=False,
help="Test only, do not rename.")
parser.add_argument("--once_only",action='store_true', default=False,
help="Only rename one file.")
return parser
def main():
parser = get_parser()
cmd_args = parser.parse_args()
log.debug("Got args %s", cmd_args)
keyspace_dir = os.path.join("/var/lib/cassandra/data", cmd_args.keyspace)
log.debug("Keyspace dir is %s", keyspace_dir)
from_files = match_files(keyspace_dir, cmd_args.from_spec)
if not from_files:
raise RuntimeError("No matched files.")
to_files = change_file_spec(from_files, cmd_args.to_spec)
if not to_files:
raise RuntimeError("No renamed files.")
rename_files(from_files, to_files, cmd_args.test, cmd_args.once_only)
return 0
if __name__ == "__main__":
sys.exit(main() or 0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment