Skip to content

Instantly share code, notes, and snippets.

@noomerikal
Created July 28, 2011 00:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save noomerikal/1110646 to your computer and use it in GitHub Desktop.
Save noomerikal/1110646 to your computer and use it in GitHub Desktop.
CellCounter for HBase Scan with TimeRange
=begin
Usage: /bin/hbase shell cellcounter.rb
=end
import java.text.SimpleDateFormat
import java.text.ParsePosition
import java.util.Date
import org.apache.hadoop.hbase.client.HTable
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.util.Bytes
table_name = 'lead'
column_name = 'binary:object'
start_date = '08012010000000'
end_date = '08022010000000'
timerange_start = SimpleDateFormat.new("MMddyyyyHHmmss").parse(start_date, ParsePosition.new(0)).getTime()
timerange_end = SimpleDateFormat.new("MMddyyyyHHmmss").parse(end_date, ParsePosition.new(0)).getTime()
=begin
puts timerange_start
puts timerange_end
puts Date.new(timerange_start).toString()
puts Date.new(timerange_end).toString()
=end
scanner = Scan.new
scanner.setMaxVersions()
scanner.setTimeRange(timerange_start,timerange_end)
table = HTable.new(@hbase.configuration, table_name)
table_scanner = table.getScanner(scanner)
row_counter = 0
total_cell_counter = 0
table_scan_iter = table_scanner.iterator
while table_scan_iter.hasNext
table_row = table_scan_iter.next
table_row_key = Bytes::toStringBinary(table_row.getRow)
cell_counter = 0
table_row.list.each do |kv|
family = String.from_java_bytes(kv.getFamily)
qualifier = Bytes::toStringBinary(kv.getQualifier)
column = "#{family}:#{qualifier}"
if column.eql? column_name
cell_counter += 1
total_cell_counter += 1
end
end
puts "#{table_name}_id - #{table_row_key} | versions - #{cell_counter}"
row_counter += 1
end
puts "total row count - #{row_counter}"
puts "total version count - #{total_cell_counter}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment