Skip to content

@noomerikal /cellcounter.rb
Created

Embed URL

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
CellCounter for HBase Scan with TimeRange
=begin
Usage: /bin/hbase shell cellcounter.rb
=end
import java.text.SimpleDateFormat
import java.text.ParsePosition
import java.util.Date
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.util.Bytes
table_name = 'lead'
column_name = 'binary:object'
start_date = '08012010000000'
end_date = '08022010000000'
timerange_start = SimpleDateFormat.new("MMddyyyyHHmmss").parse(start_date, ParsePosition.new(0)).getTime()
timerange_end = SimpleDateFormat.new("MMddyyyyHHmmss").parse(end_date, ParsePosition.new(0)).getTime()
=begin
puts timerange_start
puts timerange_end
puts Date.new(timerange_start).toString()
puts Date.new(timerange_end).toString()
=end
scanner = Scan.new
scanner.setMaxVersions()
scanner.setTimeRange(timerange_start,timerange_end)
table = HTable.new(@hbase.configuration, table_name)
table_scanner = table.getScanner(scanner)
row_counter = 0
total_cell_counter = 0
table_scan_iter = table_scanner.iterator
while table_scan_iter.hasNext
table_row = table_scan_iter.next
table_row_key = Bytes::toStringBinary(table_row.getRow)
cell_counter = 0
table_row.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
if column.eql? column_name
cell_counter += 1
total_cell_counter += 1
end
end
puts "#{table_name}_id - #{table_row_key} | versions - #{cell_counter}"
row_counter += 1
end
puts "total row count - #{row_counter}"
puts "total version count - #{total_cell_counter}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.