public
Last active

CellCounter for HBase Scan with TimeRange

  • Download Gist
cellcounter.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
=begin
Usage: /bin/hbase shell cellcounter.rb
=end
 
import java.text.SimpleDateFormat
import java.text.ParsePosition
import java.util.Date
 
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.util.Bytes
 
table_name = 'lead'
column_name = 'binary:object'
start_date = '08012010000000'
end_date = '08022010000000'
 
timerange_start = SimpleDateFormat.new("MMddyyyyHHmmss").parse(start_date, ParsePosition.new(0)).getTime()
timerange_end = SimpleDateFormat.new("MMddyyyyHHmmss").parse(end_date, ParsePosition.new(0)).getTime()
 
=begin
puts timerange_start
puts timerange_end
puts Date.new(timerange_start).toString()
puts Date.new(timerange_end).toString()
=end
 
scanner = Scan.new
scanner.setMaxVersions()
scanner.setTimeRange(timerange_start,timerange_end)
table = HTable.new(@hbase.configuration, table_name)
table_scanner = table.getScanner(scanner)
row_counter = 0
total_cell_counter = 0
table_scan_iter = table_scanner.iterator
while table_scan_iter.hasNext
table_row = table_scan_iter.next
table_row_key = Bytes::toStringBinary(table_row.getRow)
cell_counter = 0
table_row.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
if column.eql? column_name
cell_counter += 1
total_cell_counter += 1
end
end
puts "#{table_name}_id - #{table_row_key} | versions - #{cell_counter}"
row_counter += 1
end
puts "total row count - #{row_counter}"
puts "total version count - #{total_cell_counter}"

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.