Skip to content

Instantly share code, notes, and snippets.

@luk
Created March 30, 2011 08:03
Show Gist options
  • Select an option

  • Save luk/894031 to your computer and use it in GitHub Desktop.

Select an option

Save luk/894031 to your computer and use it in GitHub Desktop.
HBase recovery tools
package luk.hbase.utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Writables;
public class HBaseRegionLoops {
private String mTable = null;
private boolean mDoFix;
private Configuration mConf;
ClusterStatus mStatus;
HConnection mConnection;
CatalogTracker mCatalogTracker;
HBaseAdmin mAdmin;
FileSystem mFileSystem;
Path mTableDir;
TreeMap<byte[], TreeSet<HRegionInfo>> mStartKeyToRegionsMap = new TreeMap<byte[], TreeSet<HRegionInfo>>(Bytes.BYTES_COMPARATOR);
private Path mBackupFolder = null;
public HBaseRegionLoops(Configuration conf) throws IOException, InterruptedException {
this.mConf = conf;
mAdmin = new HBaseAdmin(conf);
mStatus = mAdmin.getMaster().getClusterStatus();
mConnection = mAdmin.getConnection();
mCatalogTracker = new CatalogTracker(mConnection);
mFileSystem = FileSystem.get(mConf);
}
private boolean doit() throws Exception {
// get regions for this table
populateStartKeyToRegionsMap(Bytes.toBytes(getTable()));
// find startkeys with multiple regions
Queue<byte[]> corruptedKeys = getCorruptedKeys(mStartKeyToRegionsMap);
if (corruptedKeys.size() > 0) {
System.out.println("Found "+ corruptedKeys.size() +" loopy regions");
} else {
System.out.println("Table '"+ getTable() +"' is has no loopy regions");
return true;
}
// fix if desired
if (doFix())
{
if (!mAdmin.isTableDisabled(getTable())){
throw new Exception("Won't fix enabled table. Disable it first");
}
// fix until no more corrupted keys
while (corruptedKeys.size() > 0){
// get the key
byte[] key = corruptedKeys.remove();
// try to fix.
if (!fixLoopyKey(key))
{
// could not fix. add it to the end of the queue
corruptedKeys.add(key);
}
System.out.println("--");
}
System.out.println("Moved corrupted regions to '"+getBackupFolder() +"'.");
}
return false;
}
/**
* Tries to fix the region with startkey key
* @param key
* @return false, if this is not possible yet (e.g. if its a multi loop)
* @throws Exception
*/
private boolean fixLoopyKey(final byte[] key) throws Exception {
try{
mCatalogTracker.start();
// find the conjunction after the fork
ArrayList<HashSet<byte[]>> regionKeys = new ArrayList<HashSet<byte[]>>();
final byte[] conjunctionKey = getForkConjunction(key, regionKeys);
if (conjunctionKey == null){
return false;
}
// find path where most rows were found
int maxRowIndex = 0;
{
int maxRows = 0;
for (int i = 0; i < regionKeys.size(); i++){
if (regionKeys.get(i).size() > maxRows){
maxRows = regionKeys.get(i).size();
maxRowIndex = i;
}
}
}
// lets remove the other regions from the file system and from the inde
final ArrayList<HRegionInfo> startRegions = new ArrayList<HRegionInfo>(mStartKeyToRegionsMap.get(key));
for (int i = 0; i < startRegions.size(); i++){
// skip correct region
if (i == maxRowIndex){
continue;
}
cleanUpRegionPath(startRegions.get(i), conjunctionKey);
}
return true;
}catch (Exception e){
throw e;
}
finally {
mCatalogTracker.stop();
}
}
/**
* Deletes all regions from the specified startRegion to the region with the
* conjunctionKey as start key from .META. and moves the corresponding
* directory to the backup directory
* @param startRegion
* @param conjunctionKey
* @throws IOException
*/
private void cleanUpRegionPath(HRegionInfo startRegion, final byte[] conjunctionKey) throws IOException{
HRegionInfo currentRegion = startRegion;
do {
Path regionDir = getRegionDir(currentRegion);
//copy region to backup path
Path destDir = new Path(mBackupFolder, mTable + Path.SEPARATOR + regionDir.getName());
if (!mFileSystem.exists(destDir.getParent())){
mFileSystem.mkdirs(destDir.getParent());
}
mFileSystem.rename(regionDir, destDir);
System.out.println("Moving "+ regionDir + " to "+ destDir);
// remove from startkey map
mStartKeyToRegionsMap.get(currentRegion.getStartKey()).remove(currentRegion);
// advance
HRegionInfo RegiontoDelete = currentRegion;
assert(mStartKeyToRegionsMap.get(currentRegion.getEndKey()).size()==1);
currentRegion = mStartKeyToRegionsMap.get(currentRegion.getEndKey()).first() ;
// remove from meta
MetaEditor.deleteRegion(mCatalogTracker, RegiontoDelete);
} while (!Arrays.equals(currentRegion.getStartKey(), conjunctionKey));
System.out.println("-");
}
/**
* for a given region start key, find the conjuction and the row keys
* in the different paths.
*
* A
* / | \
* B C E
* | | |
* C | |
* \ | /
* F
*
* @returns false in case, it is a multi fork. e.g.:
* A
* / \
* B C
* / \ |
* D E |
* \ | /
* F
*/
private byte[] getForkConjunction(final byte[] startKey, ArrayList<HashSet<byte[]>> regionKeys) throws Exception
{
byte[] conjunctionKey = startKey;
ArrayList<HRegionInfo> regionsHRegionInfos = new ArrayList<HRegionInfo>(mStartKeyToRegionsMap.get(startKey));
regionKeys.clear();
for (int i = 0; i < regionsHRegionInfos.size(); i++){
regionKeys.add(new HashSet<byte[]>());
}
while (true){
int smallestKeyIndex = 0;
byte[] smallestEndKey = regionsHRegionInfos.get(0).getEndKey();
for (int i = 1; i < regionsHRegionInfos.size(); i++){
final HRegionInfo info = regionsHRegionInfos.get(i);
// find smallest element
if (Bytes.BYTES_COMPARATOR.compare(info.getEndKey(), smallestEndKey) < 0){
smallestEndKey = info.getEndKey();
smallestKeyIndex = i;
}
}
// check, if new min key is a valid region (i.e. only one region per key)
if (Arrays.equals(smallestEndKey, HConstants.EMPTY_END_ROW)){
//TODO: Handle
throw new Exception("Reached table end without finding an ancestor. table is seriously corrupted");
}
if (mStartKeyToRegionsMap.get(smallestEndKey).size() > 1){
System.out.println("MULTILOOP at "+ Bytes.toStringBinary(startKey) + " conjunction at "+ Bytes.toStringBinary(smallestEndKey));
return null;
}
// if smallest element is equal current key: break. we found the ancestor
if (Bytes.BYTES_COMPARATOR.compare(conjunctionKey, smallestEndKey) == 0){
break;
}
// continue
HRegionInfo nextRegion = mStartKeyToRegionsMap.get(smallestEndKey).first();
regionsHRegionInfos.set(smallestKeyIndex, nextRegion);
// get number of row
Set<byte[]> newKeys = getRowKeysInRegion(nextRegion);
if(newKeys.removeAll(regionKeys.get(smallestKeyIndex))){
System.out.println("Key found in nextRegion is already in old region!");
}
regionKeys.get(smallestKeyIndex).addAll(newKeys);
conjunctionKey = smallestEndKey;
}
System.out.println("Table forks at '"+ Bytes.toStringBinary(startKey) + "'. Conjunction at '"+ Bytes.toStringBinary(conjunctionKey) +"'");
return conjunctionKey;
}
/**
* Returns all HFiles for a HRegion dir
* @param fs
* @param regionDir
* @return
* @throws IOException
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
throws IOException {
List<Path> res = new ArrayList<Path>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for (FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDir()) {
res.add(file.getPath());
}
}
}
return res;
}
/**
* Scans the HFiles of the specified region and scans them.
* @param region HRegion to inspect
* @return Set of all row keys found in the HFiles of the region.
*/
private HashSet<byte[]> getRowKeysInRegion(HRegionInfo region){
HashSet<byte[]> keys = new HashSet<byte[]>();
Path regionDir = getRegionDir(region);
try {
List<Path> regionFiles = getStoreFiles(mFileSystem, regionDir);
for (Path file : regionFiles) {
if (!mFileSystem.exists(file)) {
System.err.println("ERROR, file doesnt exist: " + file);
continue;
}
// create reader and load file info
HFile.Reader reader = new HFile.Reader(mFileSystem, file, null,true);
reader.loadFileInfo();
// scan over file and read key/value's
HFileScanner scanner = reader.getScanner(false, false);
scanner.seekTo();
// scan whole hfile and record row keys
do {
keys.add(scanner.getKeyValue().getRow());
} while (scanner.next());
}
} catch (Exception e) {
System.out.println("Coult not read region "+ region.getRegionNameAsString() +": "+e);
keys.clear();
}
return keys;
}
/**
* Returns the directory of the specified region
* @param region
* @return
*/
private Path getRegionDir(final HRegionInfo region) {
return new Path(mTableDir, region.getEncodedName());
}
/**
* Returns all region start keys for wich more than one region exists.
* @param startKeyToRegionMap
* @return Set of keys
*/
private static Queue<byte[]> getCorruptedKeys(final Map<byte[], TreeSet<HRegionInfo>> startKeyToRegionMap){
Queue<byte[]> corruptedKeys = new LinkedList<byte[]>();
for (Map.Entry<byte[], TreeSet<HRegionInfo>> e : startKeyToRegionMap.entrySet()){
TreeSet<HRegionInfo> regions = e.getValue();
if (regions.size() < 2 ){
continue;
}
System.out.println(regions.size() +" regions for start key '"+ Bytes.toStringBinary(e.getKey()) +"'" );
corruptedKeys.add(e.getKey());
}
return corruptedKeys;
}
/**
* Populates the mStartKeyToRegionsMap for a given table
* @param tableNameBytes
* @throws IOException
*/
public void populateStartKeyToRegionsMap(final byte[] tableNameBytes) throws IOException {
MetaScannerVisitor listener = new MetaScannerVisitor() {
final Comparator<HRegionInfo> comp = new Comparator<HRegionInfo>() {
public int compare(HRegionInfo k1, HRegionInfo k2) {
return Bytes.BYTES_COMPARATOR.compare(k1.getRegionName(), k2.getRegionName());
}
};
@Override
public boolean processRow(Result result) throws IOException {
byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER);
if (value == null || value.length == 0) {
return true;
}
HRegionInfo info = Writables.getHRegionInfo(value);
if (!Arrays.equals(info.getTableDesc().getName(),
tableNameBytes)) {
return true;
}
byte[] startKey = info.getStartKey();
if (!mStartKeyToRegionsMap.containsKey(startKey)) {
mStartKeyToRegionsMap.put(startKey, new TreeSet<HRegionInfo>(comp));
}
mStartKeyToRegionsMap.get(startKey).add(new HRegionInfo(info));
return true;
}
};
mStartKeyToRegionsMap.clear();
MetaScanner.metaScan(mConf, listener);
}
public void setFix(boolean b) {
mDoFix = b;
}
public boolean doFix(){
return mDoFix;
}
void setTable(final String table) throws IOException {
mTable = table;
mTableDir= new Path( FSUtils.getRootDir(mConf), mTable );
}
String getTable() {
return mTable;
}
public void setBackupFolder(Path folderName) {
mBackupFolder = folderName;
}
public Path getBackupFolder()
{
return mBackupFolder;
}
public static void main(String[] args) throws Exception {
// create a fsck object
Configuration conf = HBaseConfiguration.create();
conf.set("fs.defaultFS",conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
conf.set("fs.default.name",conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
HBaseRegionLoops fsck = new HBaseRegionLoops(conf);
// Process command-line args.
for (int i = 0; i < args.length; i++) {
String cmd = args[i];
if (cmd.equals("--fix")){
fsck.setFix(true);
}
else if (cmd.equals("--destination")){
if (i == args.length){
System.err.println("HBaseRegionLoops: --destination needs a value.");
printUsageAndExit();
}
fsck.setBackupFolder(new Path(args[i+1]));
i++;
}
else if (fsck.getTable() == null){
fsck.setTable(cmd);
}
else {
String str = "Unknown command line option : " + cmd;
System.out.println(str);
}
}
// check, if we have all info
if (fsck.getTable() == null){
System.err.println("Please specify table");
printUsageAndExit();
}
if (fsck.doFix() && fsck.getBackupFolder() == null)
{
Path backupPath = new Path("/hbase-loopy-regions");
int suffix = 1;
while (fsck.mFileSystem.exists(backupPath)) {
backupPath = new Path("/hbase-loopy-regions-"+suffix);
suffix++;
}
fsck.setBackupFolder(backupPath);
}
// do stuff
fsck.doit();
Runtime.getRuntime().exit(0);
}
private static void printUsageAndExit() {
System.err.println("Usage: HBaseRegionLoops [opts] table-name ");
System.err.println( "Fixes Tables whose regions look like \n"+
" A\n"+
" / \\\n"+
" B C\n"+
" / \\ |\n"+
" D E |\n"+
" \\ | /\n"+
" F\n" +
"instead of A-C-F");
System.err.println(" where [opts] are:");
System.err.println(" --fix Try to fix the errors.");
System.err.println(" --destination Change the destination where the \n" +
" corrupted regions are moved to " +
" (Only needed when --fix is specified).");
Runtime.getRuntime().exit(-2);
}
}
package luk.hbase.utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Writables;
public class HBaseRegionLoops {
private String mTable = null;
private boolean mDoFix;
private Configuration mConf;
ClusterStatus mStatus;
HConnection mConnection;
CatalogTracker mCatalogTracker;
HBaseAdmin mAdmin;
FileSystem mFileSystem;
Path mTableDir;
TreeMap<byte[], TreeSet<HRegionInfo>> mStartKeyToRegionsMap = new TreeMap<byte[], TreeSet<HRegionInfo>>(Bytes.BYTES_COMPARATOR);
private Path mBackupFolder = null;
public HBaseRegionLoops(Configuration conf) throws IOException, InterruptedException {
this.mConf = conf;
mAdmin = new HBaseAdmin(conf);
mStatus = mAdmin.getMaster().getClusterStatus();
mConnection = mAdmin.getConnection();
mCatalogTracker = new CatalogTracker(mConnection);
mFileSystem = FileSystem.get(mConf);
}
private boolean doit() throws Exception {
// get regions for this table
populateStartKeyToRegionsMap(Bytes.toBytes(getTable()));
// find startkeys with multiple regions
Queue<byte[]> corruptedKeys = getCorruptedKeys(mStartKeyToRegionsMap);
if (corruptedKeys.size() > 0) {
System.out.println("Found "+ corruptedKeys.size() +" loopy regions");
} else {
System.out.println("Table '"+ getTable() +"' is has no loopy regions");
return true;
}
// fix if desired
if (doFix())
{
if (!mAdmin.isTableDisabled(getTable())){
throw new Exception("Won't fix enabled table. Disable it first");
}
// fix until no more corrupted keys
while (corruptedKeys.size() > 0){
// get the key
byte[] key = corruptedKeys.remove();
// try to fix.
if (!fixLoopyKey(key))
{
// could not fix. add it to the end of the queue
corruptedKeys.add(key);
}
System.out.println("--");
}
System.out.println("Moved corrupted regions to '"+getBackupFolder() +"'.");
}
return false;
}
/**
* Tries to fix the region with startkey key
* @param key
* @return false, if this is not possible yet (e.g. if its a multi loop)
* @throws Exception
*/
private boolean fixLoopyKey(final byte[] key) throws Exception {
try{
mCatalogTracker.start();
// find the conjunction after the fork
ArrayList<HashSet<byte[]>> regionKeys = new ArrayList<HashSet<byte[]>>();
final byte[] conjunctionKey = getForkConjunction(key, regionKeys);
if (conjunctionKey == null){
return false;
}
// find path where most rows were found
int maxRowIndex = 0;
{
int maxRows = 0;
for (int i = 0; i < regionKeys.size(); i++){
if (regionKeys.get(i).size() > maxRows){
maxRows = regionKeys.get(i).size();
maxRowIndex = i;
}
}
}
// lets remove the other regions from the file system and from the inde
final ArrayList<HRegionInfo> startRegions = new ArrayList<HRegionInfo>(mStartKeyToRegionsMap.get(key));
for (int i = 0; i < startRegions.size(); i++){
// skip correct region
if (i == maxRowIndex){
continue;
}
cleanUpRegionPath(startRegions.get(i), conjunctionKey);
}
return true;
}catch (Exception e){
throw e;
}
finally {
mCatalogTracker.stop();
}
}
/**
* Deletes all regions from the specified startRegion to the region with the
* conjunctionKey as start key from .META. and moves the corresponding
* directory to the backup directory
* @param startRegion
* @param conjunctionKey
* @throws IOException
*/
private void cleanUpRegionPath(HRegionInfo startRegion, final byte[] conjunctionKey) throws IOException{
HRegionInfo currentRegion = startRegion;
do {
Path regionDir = getRegionDir(currentRegion);
//copy region to backup path
Path destDir = new Path(mBackupFolder, mTable + Path.SEPARATOR + regionDir.getName());
if (!mFileSystem.exists(destDir.getParent())){
mFileSystem.mkdirs(destDir.getParent());
}
mFileSystem.rename(regionDir, destDir);
System.out.println("Moving "+ regionDir + " to "+ destDir);
// remove from startkey map
mStartKeyToRegionsMap.get(currentRegion.getStartKey()).remove(currentRegion);
// advance
HRegionInfo RegiontoDelete = currentRegion;
assert(mStartKeyToRegionsMap.get(currentRegion.getEndKey()).size()==1);
currentRegion = mStartKeyToRegionsMap.get(currentRegion.getEndKey()).first() ;
// remove from meta
MetaEditor.deleteRegion(mCatalogTracker, RegiontoDelete);
} while (!Arrays.equals(currentRegion.getStartKey(), conjunctionKey));
System.out.println("-");
}
/**
* for a given region start key, find the conjuction and the row keys
* in the different paths.
*
* A
* / | \
* B C E
* | | |
* C | |
* \ | /
* F
*
* @returns false in case, it is a multi fork. e.g.:
* A
* / \
* B C
* / \ |
* D E |
* \ | /
* F
*/
private byte[] getForkConjunction(final byte[] startKey, ArrayList<HashSet<byte[]>> regionKeys) throws Exception
{
byte[] conjunctionKey = startKey;
ArrayList<HRegionInfo> regionsHRegionInfos = new ArrayList<HRegionInfo>(mStartKeyToRegionsMap.get(startKey));
regionKeys.clear();
for (int i = 0; i < regionsHRegionInfos.size(); i++){
regionKeys.add(new HashSet<byte[]>());
}
while (true){
int smallestKeyIndex = 0;
byte[] smallestEndKey = regionsHRegionInfos.get(0).getEndKey();
for (int i = 1; i < regionsHRegionInfos.size(); i++){
final HRegionInfo info = regionsHRegionInfos.get(i);
// find smallest element
if (Bytes.BYTES_COMPARATOR.compare(info.getEndKey(), smallestEndKey) < 0){
smallestEndKey = info.getEndKey();
smallestKeyIndex = i;
}
}
// check, if new min key is a valid region (i.e. only one region per key)
if (Arrays.equals(smallestEndKey, HConstants.EMPTY_END_ROW)){
//TODO: Handle
throw new Exception("Reached table end without finding an ancestor. table is seriously corrupted");
}
if (mStartKeyToRegionsMap.get(smallestEndKey).size() > 1){
System.out.println("MULTILOOP at "+ Bytes.toStringBinary(startKey) + " conjunction at "+ Bytes.toStringBinary(smallestEndKey));
return null;
}
// if smallest element is equal current key: break. we found the ancestor
if (Bytes.BYTES_COMPARATOR.compare(conjunctionKey, smallestEndKey) == 0){
break;
}
// continue
HRegionInfo nextRegion = mStartKeyToRegionsMap.get(smallestEndKey).first();
regionsHRegionInfos.set(smallestKeyIndex, nextRegion);
// get number of row
Set<byte[]> newKeys = getRowKeysInRegion(nextRegion);
if(newKeys.removeAll(regionKeys.get(smallestKeyIndex))){
System.out.println("Key found in nextRegion is already in old region!");
}
regionKeys.get(smallestKeyIndex).addAll(newKeys);
conjunctionKey = smallestEndKey;
}
System.out.println("Table forks at '"+ Bytes.toStringBinary(startKey) + "'. Conjunction at '"+ Bytes.toStringBinary(conjunctionKey) +"'");
return conjunctionKey;
}
/**
* Returns all HFiles for a HRegion dir
* @param fs
* @param regionDir
* @return
* @throws IOException
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
throws IOException {
List<Path> res = new ArrayList<Path>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for (FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDir()) {
res.add(file.getPath());
}
}
}
return res;
}
/**
* Scans the HFiles of the specified region and scans them.
* @param region HRegion to inspect
* @return Set of all row keys found in the HFiles of the region.
*/
private HashSet<byte[]> getRowKeysInRegion(HRegionInfo region){
HashSet<byte[]> keys = new HashSet<byte[]>();
Path regionDir = getRegionDir(region);
try {
List<Path> regionFiles = getStoreFiles(mFileSystem, regionDir);
for (Path file : regionFiles) {
if (!mFileSystem.exists(file)) {
System.err.println("ERROR, file doesnt exist: " + file);
continue;
}
// create reader and load file info
HFile.Reader reader = new HFile.Reader(mFileSystem, file, null,true);
reader.loadFileInfo();
// scan over file and read key/value's
HFileScanner scanner = reader.getScanner(false, false);
scanner.seekTo();
// scan whole hfile and record row keys
do {
keys.add(scanner.getKeyValue().getRow());
} while (scanner.next());
}
} catch (Exception e) {
System.out.println("Coult not read region "+ region.getRegionNameAsString() +": "+e);
keys.clear();
}
return keys;
}
/**
* Returns the directory of the specified region
* @param region
* @return
*/
private Path getRegionDir(final HRegionInfo region) {
return new Path(mTableDir, region.getEncodedName());
}
/**
* Returns all region start keys for wich more than one region exists.
* @param startKeyToRegionMap
* @return Set of keys
*/
private static Queue<byte[]> getCorruptedKeys(final Map<byte[], TreeSet<HRegionInfo>> startKeyToRegionMap){
Queue<byte[]> corruptedKeys = new LinkedList<byte[]>();
for (Map.Entry<byte[], TreeSet<HRegionInfo>> e : startKeyToRegionMap.entrySet()){
TreeSet<HRegionInfo> regions = e.getValue();
if (regions.size() < 2 ){
continue;
}
System.out.println(regions.size() +" regions for start key '"+ Bytes.toStringBinary(e.getKey()) +"'" );
corruptedKeys.add(e.getKey());
}
return corruptedKeys;
}
/**
* Populates the mStartKeyToRegionsMap for a given table
* @param tableNameBytes
* @throws IOException
*/
public void populateStartKeyToRegionsMap(final byte[] tableNameBytes) throws IOException {
MetaScannerVisitor listener = new MetaScannerVisitor() {
final Comparator<HRegionInfo> comp = new Comparator<HRegionInfo>() {
public int compare(HRegionInfo k1, HRegionInfo k2) {
return Bytes.BYTES_COMPARATOR.compare(k1.getRegionName(), k2.getRegionName());
}
};
@Override
public boolean processRow(Result result) throws IOException {
byte[] value = result.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER);
if (value == null || value.length == 0) {
return true;
}
HRegionInfo info = Writables.getHRegionInfo(value);
if (!Arrays.equals(info.getTableDesc().getName(),
tableNameBytes)) {
return true;
}
byte[] startKey = info.getStartKey();
if (!mStartKeyToRegionsMap.containsKey(startKey)) {
mStartKeyToRegionsMap.put(startKey, new TreeSet<HRegionInfo>(comp));
}
mStartKeyToRegionsMap.get(startKey).add(new HRegionInfo(info));
return true;
}
};
mStartKeyToRegionsMap.clear();
MetaScanner.metaScan(mConf, listener);
}
public void setFix(boolean b) {
mDoFix = b;
}
public boolean doFix(){
return mDoFix;
}
void setTable(final String table) throws IOException {
mTable = table;
mTableDir= new Path( FSUtils.getRootDir(mConf), mTable );
}
String getTable() {
return mTable;
}
public void setBackupFolder(Path folderName) {
mBackupFolder = folderName;
}
public Path getBackupFolder()
{
return mBackupFolder;
}
public static void main(String[] args) throws Exception {
// create a fsck object
Configuration conf = HBaseConfiguration.create();
conf.set("fs.defaultFS",conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
conf.set("fs.default.name",conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
HBaseRegionLoops fsck = new HBaseRegionLoops(conf);
// Process command-line args.
for (int i = 0; i < args.length; i++) {
String cmd = args[i];
if (cmd.equals("--fix")){
fsck.setFix(true);
}
else if (cmd.equals("--destination")){
if (i == args.length){
System.err.println("HBaseRegionLoops: --destination needs a value.");
printUsageAndExit();
}
fsck.setBackupFolder(new Path(args[i+1]));
i++;
}
else if (fsck.getTable() == null){
fsck.setTable(cmd);
}
else {
String str = "Unknown command line option : " + cmd;
System.out.println(str);
}
}
// check, if we have all info
if (fsck.getTable() == null){
System.err.println("Please specify table");
printUsageAndExit();
}
if (fsck.doFix() && fsck.getBackupFolder() == null)
{
Path backupPath = new Path("/hbase-loopy-regions");
int suffix = 1;
while (fsck.mFileSystem.exists(backupPath)) {
backupPath = new Path("/hbase-loopy-regions-"+suffix);
suffix++;
}
fsck.setBackupFolder(backupPath);
}
// do stuff
fsck.doit();
Runtime.getRuntime().exit(0);
}
private static void printUsageAndExit() {
System.err.println("Usage: HBaseRegionLoops [opts] table-name ");
System.err.println( "Fixes Tables whose regions look like \n"+
" A\n"+
" / \\\n"+
" B C\n"+
" / \\ |\n"+
" D E |\n"+
" \\ | /\n"+
" F\n" +
"instead of A-C-F");
System.err.println(" where [opts] are:");
System.err.println(" --fix Try to fix the errors.");
System.err.println(" --destination Change the destination where the \n" +
" corrupted regions are moved to " +
" (Only needed when --fix is specified).");
Runtime.getRuntime().exit(-2);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment