-
-
Save SergeyKozlov/dccf8ce7da858c1e4d9e107b12e8202f to your computer and use it in GitHub Desktop.
RowDeleter for HBase Scan with TimeRange
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
=begin | |
Usage: /bin/hbase shell rowdeleter.rb | |
=end | |
import java.text.SimpleDateFormat | |
import java.text.ParsePosition | |
import java.util.Date | |
import org.apache.hadoop.hbase.client.Delete | |
import org.apache.hadoop.hbase.client.HTable | |
import org.apache.hadoop.hbase.client.Scan | |
import org.apache.hadoop.hbase.util.Bytes | |
table_name = 'lead' | |
column_name = 'binary:object' | |
start_date = '07302011000000' | |
end_date = '08032011000000' | |
timerange_start = SimpleDateFormat.new("MMddyyyyHHmmss").parse(start_date, ParsePosition.new(0)).getTime() | |
timerange_end = SimpleDateFormat.new("MMddyyyyHHmmss").parse(end_date, ParsePosition.new(0)).getTime() | |
=begin | |
puts timerange_start | |
puts timerange_end | |
puts Date.new(timerange_start).toString() | |
puts Date.new(timerange_end).toString() | |
=end | |
scanner = Scan.new | |
scanner.setMaxVersions() | |
scanner.setTimeRange(timerange_start,timerange_end) | |
table = HTable.new(@hbase.configuration, table_name) | |
table_scanner = table.getScanner(scanner) | |
row_counter = 0 | |
table_scan_iter = table_scanner.iterator | |
while table_scan_iter.hasNext | |
table_row = table_scan_iter.next | |
table_row_key = Bytes::toStringBinary(table_row.getRow) | |
delete = Delete.new(table_row_key.to_s.to_java_bytes, org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP, nil) | |
table.delete(delete) | |
puts "deleted #{table_name}_id - #{table_row_key}" | |
row_counter += 1 | |
end | |
puts "total deleted rows - #{row_counter}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment