Created
November 6, 2009 06:21
-
-
Save igrigorik/227771 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'ruport' | |
require 'pp' | |
path = ARGV[0] | |
stats = {} | |
types = { | |
'gen' => 'Segments File: stores information about segments', | |
'lock' => 'Lock File: the Write lock prevents multiple IndexWriters from writing to the same file.', | |
'cfs' => 'Compound File: an optional "virtual" file consisting of all the other index files for systems that frequently run out of file handles.', | |
'fnm' => 'Fields: stores information about the fields', | |
'fdx' => 'Field Index: contains pointers to field data', | |
'fdt' => 'Field Data: the stored fields for documents', | |
'tis' => 'Term Infos: part of the term dictionary, stores term info', | |
'tii' => 'Term Info Index: the index into the Term Infos file', | |
'frq' => 'Frequencies: contains the list of docs which contain each term along with frequency', | |
'prx' => 'Positions: stores position information about where a term occurs in the index', | |
'nrm' => 'Norms: encodes length and boost factors for docs and fields', | |
'tvx' => 'Term Vector Index: stores offset into the document data file', | |
'tvd' => 'Term Vector Documents: contains information about each document that has term vectors', | |
'tvf' => 'Term Vector Fields: the field level info about term vectors', | |
'del' => 'Deleted Documents: info about what files are deleted' | |
} | |
Dir.glob(path + "*").each do |file| | |
type = file.split('.').last | |
if stats[type] | |
stats[type][:size] += File.size(file) | |
else | |
stats[type] = {:size => File.size(file)} | |
end | |
end | |
total = stats.values.inject(0){|t,v|t+=v[:size]} | |
stats = stats.collect do |file, stat| | |
percent = format("%.2f", ((stat[:size].to_f / total) * 100)).to_f | |
[file, format("%.2f", stat[:size].to_f / (1024*1024)), percent, types[file]] | |
end.sort{|a, b| b[-2] <=> a[-2]} | |
report = Ruport::Data::Table.new({ | |
:column_names => ['filetype', 'size (MB)', '%', 'description'], | |
:data => stats | |
}) | |
puts report.to_s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment