Skip to content

Instantly share code, notes, and snippets.

@ykhrustalev
Last active December 11, 2015 08:48
Show Gist options
  • Save ykhrustalev/5926e3ddb6a72b5e3597 to your computer and use it in GitHub Desktop.
Save ykhrustalev/5926e3ddb6a72b5e3597 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import grp
import json
import logging
import optparse
import os
import pwd
import re
import subprocess
import sys
logging.basicConfig()
logger = logging.getLogger('helper')
def execute(cmd, **kwargs):
kwargs = kwargs.copy()
if 'stdout' not in kwargs:
kwargs['stdout'] = subprocess.PIPE
if 'stderr' not in kwargs:
kwargs['stderr'] = subprocess.PIPE
p = subprocess.Popen(cmd, **kwargs)
out, err = p.communicate()
rc = p.returncode
if rc == 0:
logger.debug("running: %s", ' '.join(cmd))
logger.debug("return code: %s", rc)
logger.debug("stdout: %s", out)
logger.debug("stderr: %s", err)
else:
logger.error("Failed to run: %s", ' '.join(cmd))
logger.error("return code: %s", rc)
logger.error("stdout: %s", out)
logger.error("stderr: %s", err)
raise Exception('Failed to run command')
return rc, out, err
def current_node():
_, out, _ = execute(['gluster', 'peer', 'status'])
for line in out.split('\n'):
m = re.match(r'^Hostname:\s*(.+)', line)
if m:
val = m.group(1)
logger.debug("current node: %s", val)
return val
raise Exception('can not identify node >>%s<<' % out)
def get_bricks(volume):
node = current_node()
_, out, _ = execute(['gluster', 'volume', 'info', volume])
res = []
for line in out.split('\n'):
m = re.match(r'^Brick\d+: %s:(.+)' % node, line)
if m:
res.append(m.group(1))
if not res:
raise Exception("Failed to read bricks on volume: %s", volume)
logger.debug("bricks: %s", res)
return res
def get_file_attr(brick, path):
_, out, _ = execute([
'getfattr',
'-d',
'-m',
'.',
'-e',
'hex',
'%s%s' % (brick, path)
])
return '\n'.join(filter(None, out.split('\n')))
def get_brick_gfid(brick, path):
brick_path = get_brick_data_path(brick, path)
if not os.path.exists(brick_path):
logger.debug("brick %s misses %s", brick, path)
return
out = get_file_attr(brick, path)
for line in out.split('\n'):
m = re.match(r'^trusted\.gfid=0x(.+)', line)
if m:
return m.group(1)
logger.debug("no gfid in attribute output")
def get_gfid(volume, path):
for brick in get_bricks(volume):
val = get_brick_gfid(brick, path)
if val:
return val
logger.warning('no brick contains file on path %s', path)
def get_brick_data_path(brick, path):
return '%s%s' % (brick, path)
def get_brick_attrs_path(brick, gfid):
attrs_path = '%s/%s/%s-%s-%s-%s-%s' % (
gfid[0:2],
gfid[2:4],
gfid[0:8],
gfid[8:12],
gfid[12:16],
gfid[16:20],
gfid[20:32],
)
return '%s/.glusterfs/%s' % (brick, attrs_path)
class FileStats(dict):
def __str__(self):
res = []
for k, v in sorted(self.items()):
if k != 'path':
res.append(" %s: %s" % (k, v))
return '\n'.join(res)
def get_file_stats(path):
exists = os.path.exists(path)
res = FileStats()
res.update({
"path": path,
})
if not exists:
res.update({
"exists": exists
})
return res
stats = os.stat(path)
if os.path.isdir(path):
res.update({
'isdir': True
})
_, ls_out, _ = execute(['ls', '-ld', path])
else:
_, ls_out, _ = execute(['ls', '-la', path])
res.update({
"md5sum": execute(['md5sum', path])[1],
})
try:
username = pwd.getpwuid(stats.st_uid).pw_name
except KeyError:
username = stats.st_uid
try:
groupname = grp.getgrgid(stats.st_gid).gr_name
except KeyError:
groupname = stats.st_gid
res.update({
"size": stats.st_size,
"ctime": stats.st_ctime,
"atime": stats.st_atime,
"mtime": stats.st_mtime,
"mode": oct(stats.st_mode),
"user": username,
"group": groupname,
"ls-la": ls_out.rstrip(),
"stat": execute(['stat', path])[1],
})
return res
class BrickFileInfo(object):
def __init__(self, attributes, data_file, attributes_file):
self.attributes = attributes
self.data_file = data_file
self.attributes_file = attributes_file
def __str__(self):
res = []
if self.attributes:
res += ["attributes:", str(self.attributes)]
if self.data_file:
res += ["data file: %s" % self.data_file['path'],
str(self.data_file)]
if self.attributes_file:
res += ["attributes file: %s" % self.attributes_file['path'],
str(self.attributes_file), ]
return '\n'.join(res)
class VolumeFileInfo(object):
def __init__(self, volume, path):
self.volume = volume
self.path = path
self.brick_files = []
def __str__(self):
res = [
"** Info on %s on '%s'" % (self.path, self.volume)
]
for obj in self.brick_files:
res.append("*" * 79)
res.append(str(obj))
return '\n'.join(res)
def add_brick(self, item):
self.brick_files.append(item)
def get_info(volume, path, show_attrs, show_data_file, show_attr_file):
res = VolumeFileInfo(volume, path)
for brick in get_bricks(volume):
gfid = get_brick_gfid(brick, path)
if not gfid:
continue
data_path = get_brick_data_path(brick, path)
attrs_path = get_brick_attrs_path(brick, gfid)
res.add_brick(BrickFileInfo(
get_file_attr(brick, path) if show_attrs else None,
get_file_stats(data_path) if show_data_file else None,
get_file_stats(attrs_path) if show_attr_file else None,
))
return res
def set_gf_attr(volume, brick, path, key, value):
file_path = get_brick_data_path(brick, path)
if not os.path.exists(file_path):
logger.warning("File is missing %s", file_path)
exit(1)
print ([
'setfattr',
'-n',
key,
'-v',
value,
file_path
])
get_file_attr(brick, file_path)
_gfid_cache = {}
_filecache = os.path.join(os.path.dirname(__file__), '.filecache')
if os.path.exists(_filecache):
with open(_filecache) as f:
try:
_gfid_cache = json.loads(f.read())
except:
print >> sys.stderr, "failed to open", _filecache
def gfid_to_filename_from_cache(gfid):
return _gfid_cache.get(gfid)
def gfid_to_filename_set_cache(gfid, filename):
_gfid_cache[gfid] = filename
with open(_filecache, 'w') as f:
f.write(json.dumps(_gfid_cache, indent=2))
def clean_t_copies(volume, gffile, safe):
# /data2/gluster/.glusterfs/eb/af/ebaf76d7-24fb-41a6-8f78-8904a537f7db
parts = gffile.split('/')
good_dot_files = set()
bad_dot_files = set()
good_data_files = set()
bad_data_files = set()
bricks = get_bricks(volume)
for brick in bricks:
dot_file = os.path.join(brick, '/'.join(parts[3:]))
dot_file_stats = get_file_stats(dot_file)
# print 'dot_file', dot_file, dot_file_stats
if 'mode' not in dot_file_stats:
continue
m = dot_file_stats['mode']
if m == '0101000':
bad_dot_files.add(dot_file)
else:
good_dot_files.add(dot_file)
particular_brick = '/'.join(parts[:3])
gfid = parts[-1]
particular_data_file = gfid_to_filename_from_cache(gfid)
if not particular_data_file:
print 'looking for file by gfid', particular_brick, gfid
_, out, _ = execute([
'./gfid-resolver.sh',
particular_brick,
gfid
])
particular_data_file= None
for line in out.split('\n'):
if 'File' not in line:
continue
particular_data_file = line.split('\t')[-1].strip()
break
if particular_data_file:
gfid_to_filename_set_cache(gfid, particular_data_file)
else:
print >>sys.stderr, "can't find file", out
for brick in bricks:
data_file = os.path.join(
brick,
'/'.join(particular_data_file.split('/')[3:])
)
data_file_stats = get_file_stats(data_file)
# print 'dot_file', data_file, data_file_stats
if 'mode' not in data_file_stats:
continue
m = data_file_stats['mode']
if m == '0101000':
bad_data_files.add(data_file)
else:
good_data_files.add(data_file)
warnings = []
if not len(bad_dot_files):
warnings.append("there are no bad dot files")
if not len(bad_data_files):
warnings.append("there are no bad data files")
if len(bad_dot_files) != len(bad_data_files):
warnings.append(
"there is different number of bad dot and data files %s vs %s"
% (len(bad_dot_files), len(bad_data_files))
)
if not len(good_dot_files):
warnings.append("there are no good dot files")
if not len(good_data_files):
warnings.append("there are no good data files")
if len(good_dot_files) != len(good_data_files):
warnings.append(
"there is different number of good dot and data files %s vs %s"
% (len(good_dot_files), len(good_data_files))
)
if len(good_data_files) > 1:
warnings.append("there is more then one good copy")
if warnings:
print >> sys.stderr, 'doing nothing becase', "\n".join(warnings)
print 'good_dot_files', good_dot_files
print 'good_data_files', good_data_files
print 'bad_dot_files', bad_dot_files
print 'bad_data_files', bad_data_files
exit(1)
statements = []
if (good_dot_files
and good_data_files
and bad_dot_files
and bad_data_files):
for name in bad_dot_files:
if not safe:
os.unlink(name)
statements.append("removing %s" % name)
for name in bad_data_files:
if not safe:
os.unlink(name)
statements.append("removing %s" % name)
good_file_path = list(good_data_files)[0]
original_name = os.path.basename(good_file_path)
original_folder = os.path.dirname(good_file_path)
cmd = 'mv %s /tmp/%s' % (good_file_path, original_name)
statements.append(cmd)
if not safe:
os.system(cmd)
shared_path = os.path.join(
'/shared',
'/'.join(original_folder.split('/')[3:]),
original_name
)
cmd = 'mv /tmp/%s %s' % (original_name, shared_path)
statements.append(cmd)
if not safe:
os.system(cmd)
print 'ok'
print '\n'.join(statements)
def main():
parser = optparse.OptionParser()
parser.add_option("--volume", dest="volume", default="shared",
help="which volume to use")
parser.add_option("--fix-copies", dest="clean_t_copies",
action="store_true", default=False,
help="clean ------T copies")
parser.add_option("--safe", dest="safe",
action="store_true", default=False,
help="clean ------T copies dry run")
parser.add_option("--info", dest="get_info",
action="store_true", default=False,
help="get info about specified path")
parser.add_option("-A", "--show-attrs", dest="show_attrs",
action="store_true", default=False,
help="")
parser.add_option("-d", "--show-data-file", dest="show_data_file",
action="store_true", default=False,
help="")
parser.add_option("-a", "--show-attr-file", dest="show_attr_file",
action="store_true", default=False,
help="")
parser.add_option("--gfid", dest="get_gfid",
action="store_true", default=False,
help="get GFID on specified path")
parser.add_option("--set-gf-attr", dest="set_gf_attr",
action="store_true", default=False,
help="set info about specified path")
parser.add_option("--key", dest="key", default=None,
help="key to set attribute on")
parser.add_option("--value", dest="value", default=None,
help="key to set attribute on")
parser.add_option("--brick", dest="brick", default=None,
help="brick on which path should be looked up")
parser.add_option("-v", dest="verbose",
action="store_true", default=False,
help="print status messages to stdout")
options, args = parser.parse_args()
if options.verbose:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
if options.clean_t_copies:
clean_t_copies(options.volume, args[0], options.safe)
elif options.get_info:
show_attrs = options.show_attrs
show_data_file = options.show_data_file
show_attr_file = options.show_attr_file
if not show_attrs and not show_data_file and not show_attr_file:
show_attrs = True
show_data_file = True
show_attr_file = True
for path in args:
print get_info(
options.volume,
path,
show_attrs,
show_data_file,
show_attr_file
)
elif options.get_gfid:
for path in args:
print get_gfid(options.volume, path)
elif options.set_gf_attr:
if not options.key or options.value or options.brick:
parser.error("--key, --value and --brick should be set")
if len(args) != 1:
parser.error("only one path should be supplied")
set_gf_attr(
options.volume,
options.brick,
args[0],
options.key,
options.value
)
else:
parser.print_usage()
exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment