-
-
Save ndimiduk/6594d55a7a282c5d3378e65b9582deaa to your computer and use it in GitHub Desktop.
# | |
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
# Drop all empty regions for a table. Dropping a region is implemented as a merge into | |
# an adjacent region. | |
# | |
# $ hbase org.jruby.Main drop_empty_regions.rb mytable | |
include Java | |
import java.lang.Exception | |
import java.lang.RuntimeException | |
import org.apache.hadoop.hbase.HBaseConfiguration | |
import org.apache.hadoop.hbase.TableName | |
import org.apache.hadoop.hbase.client.Admin | |
import org.apache.hadoop.hbase.client.ConnectionFactory | |
import org.apache.hadoop.hbase.ServerName | |
import org.apache.hadoop.hbase.HRegionInfo | |
import org.apache.hadoop.hbase.util.Bytes | |
import org.apache.commons.logging.LogFactory | |
NAME = "drop_empty_regions.rb" | |
# Create a logger and disable the DEBUG-level annoying client logging | |
def configureLogging() | |
# Configure log4j to not spew so much | |
logger = org.apache.log4j.Logger.getLogger("org.apache.hadoop.hbase.client") | |
logger.setLevel(org.apache.log4j.Level::INFO) | |
logger = org.apache.log4j.Logger.getLogger("org.apache.zookeeper") | |
logger.setLevel(org.apache.log4j.Level::WARN) | |
return LogFactory.getLog(NAME) | |
end | |
# Get configuration instance | |
def getConfiguration() | |
config = HBaseConfiguration.create() | |
# No prefetching on hbase:meta This is for versions pre 0.99. Newer versions do not prefetch. | |
config.setInt("hbase.client.prefetch.limit", 1) | |
# Make a config that retries at short intervals many times | |
config.setInt("hbase.client.pause", 500) | |
config.setInt("hbase.client.retries.number", 100) | |
return config | |
end | |
# find my HRegionInfo | |
def getTargetInfo(infos, emptyRegion) | |
infos.each do |region| | |
return region if region.getRegionNameAsString() == emptyRegion | |
end | |
raise RuntimeException.new("Unable to locate region " + emptyRegion) | |
end | |
def getAdjacent(infos, targetRegionInfo) | |
infos.each do |region| | |
return region if HRegionInfo.areAdjacent(region, targetRegionInfo) | |
end | |
raise RuntimeException.new("No adjacent region found for " + targetRegionInfo) | |
end | |
if ARGV.length != 1 | |
puts "usage: drop_region.rb <tableName> <regionsFile>" | |
exit 1 | |
end | |
$TABLE_NAME_STRING = ARGV[0] | |
$TABLE_NAME = TableName.valueOf(ARGV[0]) | |
# Create a logger and save it to ruby global | |
$LOG = configureLogging() | |
conn = nil | |
admin = nil | |
config = getConfiguration() | |
conn = ConnectionFactory.createConnection(config) | |
admin = conn.getAdmin() | |
regionArray = [] | |
clusterStatus = admin.getClusterStatus() | |
# Fetch all regions for this table that have StorageFileSizeMB == 0 | |
clusterStatus.getServers().each do |serverName| | |
serverLoad = clusterStatus.getLoad(serverName) | |
serverLoad.getRegionsLoad().entrySet().each do |entry| | |
region = entry.getValue().getName() | |
storeFileSize = entry.getValue().getStorefileSizeMB() | |
regionArray.push(Bytes.toStringBinary(region)) if storeFileSize == 0 and entry.getValue().getNameAsString().start_with?($TABLE_NAME_STRING + ",") | |
end | |
end | |
regionArray.each do |emptyRegion| | |
begin | |
tableRegions = admin.getTableRegions($TABLE_NAME) | |
targetInfo = getTargetInfo(tableRegions, emptyRegion) | |
adjacentInfo = getAdjacent(tableRegions, targetInfo) | |
$LOG.info("'dropping' " + emptyRegion.to_s + " by merging into " + adjacentInfo.to_s) | |
admin.mergeRegions(targetInfo.getEncodedNameAsBytes, adjacentInfo.getEncodedNameAsBytes(), false) | |
sleep(1) | |
rescue RuntimeError, Exception => e | |
$LOG.info("Encountered unhandled exception.", e) | |
end | |
end | |
admin.close() unless admin.nil? | |
conn.close() unless conn.nil? |
Hi @eangelou. Yeah that's probably a good change to make.
I haven't used this script in ages, i think the normalizer is supposed to handle this use case. Mind trying it out?
Hello again,
Yes, it is working fine now, thanks for approving the change!
I took the script for merging the regions however it created inconsistencies. and below is RCA
RCA of inconsistencies received from MERGE script triggered on 1st-Feb, PSB Cloudera comments,
It's observed that while the script taking MERGED (child) region IDs, at HBase end still Merge-Procedure hasn't been completed causing this issue. You may try to consider giving more SLEEP time between every MERGE for a safer execution.
still figuring out how the logic is taking child region for merger ( only possibility when we call the get adjacent region )
I will post here again once I find solution, maybe a simple check is missing.
hi @pandeysa. Please don't use this script. It was written against an HBase version circa 1.2, and intended to execute under a very specific set of circumstances. It went on to inspire the much more modern implementation of the normalizer. I advise you to get a modern version of HBase (2.4+) and allow the normalizer to do this type of work.
Hello,
Great script! I would propose to change line 102:
regionArray.push(Bytes.toStringBinary(region)) if storeFileSize == 0 and entry.getValue().getNameAsString().start_with?($TABLE_NAME_STRING)
to :
regionArray.push(Bytes.toStringBinary(region)) if storeFileSize == 0 and entry.getValue().getNameAsString().start_with?($TABLE_NAME_STRING + ",")
as I had problems due to multiple tables starting with the same string. ',' acts as a nice table name delimiter there (or at least worked in my setup).
Thanks!