Created
March 16, 2010 18:49
-
-
Save kovyrin/334351 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/core/src/main/ruby/hbase.rb b/core/src/main/ruby/hbase.rb | |
index de9d006..4ba2a98 100644 | |
--- a/core/src/main/ruby/hbase.rb | |
+++ b/core/src/main/ruby/hbase.rb | |
@@ -31,6 +31,8 @@ module HBaseConstants | |
MAXLENGTH = "MAXLENGTH" | |
CACHE_BLOCKS = "CACHE_BLOCKS" | |
REPLICATION_SCOPE = "REPLICATION_SCOPE" | |
+ INTERVAL = 'INTERVAL' | |
+ CACHE = 'CACHE' | |
# Load constants from hbase java API | |
def self.promote_constants(constants) | |
diff --git a/core/src/main/ruby/hbase/table.rb b/core/src/main/ruby/hbase/table.rb | |
index 7e1c808..51115bf 100644 | |
--- a/core/src/main/ruby/hbase/table.rb | |
+++ b/core/src/main/ruby/hbase/table.rb | |
@@ -63,11 +63,11 @@ module Hbase | |
#---------------------------------------------------------------------------------------------- | |
# Count rows in a table | |
- def count(interval = 1000) | |
+ def count(interval = 1000, caching_rows = 10) | |
# We can safely set scanner caching with the first key only filter | |
scan = Scan.new | |
scan.cache_blocks = false | |
- scan.caching = 10 | |
+ scan.caching = caching_rows | |
scan.setFilter(FirstKeyOnlyFilter.new) | |
# Run the scanner | |
diff --git a/core/src/main/ruby/shell/commands/count.rb b/core/src/main/ruby/shell/commands/count.rb | |
index 4341776..f65b98c 100644 | |
--- a/core/src/main/ruby/shell/commands/count.rb | |
+++ b/core/src/main/ruby/shell/commands/count.rb | |
@@ -6,17 +6,26 @@ module Shell | |
Count the number of rows in a table. This operation may take a LONG | |
time (Run '$HADOOP_HOME/bin/hadoop jar hbase.jar rowcount' to run a | |
counting mapreduce job). Current count is shown every 1000 rows by | |
- default. Count interval may be optionally specified. Examples: | |
+ default. Count interval may be optionally specified. Scan caching | |
+ is enabled on count scans by default. Default cache size is 10 rows. | |
+ If your rows are small in size, you may want to increase this | |
+ parameter. Examples: | |
hbase> count 't1' | |
- hbase> count 't1', 100000 | |
+ hbase> count 't1', INTERVAL => 100000 | |
+ hbase> count 't1', CACHE => 1000 | |
EOF | |
end | |
- def command(table, interval = 1000) | |
+ def command(table, params) | |
+ params = { | |
+ 'INTERVAL' => 1000, | |
+ 'CACHE' => 10 | |
+ }.merge(params) | |
+ | |
now = Time.now | |
formatter.header | |
- count = table(table).count(interval) do |cnt, row| | |
+ count = table(table).count(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row| | |
formatter.row([ "Current count: #{cnt}, row: #{row}" ]) | |
end | |
formatter.footer(now, count) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment