This is a experimental code and usage of Rover::DataFrame#summary
of Ruby.
This method is not implemented in official release.
module My | |
module RoverVectorPrepender | |
def mean | |
@data.cast_to(Numo::DFloat).mean(nan: true) | |
end | |
def median | |
@data.cast_to(Numo::DFloat).median(nan: true) | |
end | |
def std | |
@data.cast_to(Numo::DFloat).stddev(nan: true) | |
end | |
def var | |
@data.cast_to(Numo::DFloat).var(nan: true) | |
end | |
end | |
module RoverDataFramePrepender | |
# Show statistical summary of self | |
# - Returns DataFrame | |
# - Make stats for numeric columns only | |
# - 1st column header indicates [n of rows, n of numeric columns] | |
# - Int type columns are casted to Float64 in mean, std, var (by Vector) | |
# - NaNs are ignored using (nan: true) option in Numo | |
# - counts also show non-NaN counts | |
def summary | |
num_keys = self.keys.select {|key| self[key].numeric?} | |
nrow, _ = self.shape | |
key0 = :"[#{nrow},#{num_keys.size}]" | |
round = 6 | |
hash = {key0 => num_keys} | |
hash["count"] = num_keys.map {|k| self[k].missing.to_numo.count_false } | |
hash["mean"] = num_keys.map {|k| self[k].mean.round(round) } | |
hash["std"] = num_keys.map {|k| self[k].std.round(round) } | |
hash["min"] = num_keys.map {|k| self[k].min } | |
hash["25%"] = num_keys.map {|k| self[k].percentile(25).round(round) } | |
hash["50%"] = num_keys.map {|k| self[k].percentile(50).round(round) } | |
hash["75%"] = num_keys.map {|k| self[k].percentile(75).round(round) } | |
hash["max"] = num_keys.map {|k| self[k].max } | |
Rover::DataFrame.new(hash) | |
end | |
# This method may be abandoned | |
# - My first implementation | |
# - Counts sould be Int but casted to Float | |
def summary_T | |
num_keys = self.keys.select {|key| self[key].numeric?} | |
# use key of 1st column to show n_rows and n_of_numeric_columns | |
nrow, _ = self.shape | |
key0 = :"[#{nrow},#{num_keys.size}]" | |
round = 6 | |
ary = [] << | |
num_keys.each_with_object({key0 => "count"}) {|k, h| h[k] = self[k].missing.to_numo.count_false } << | |
num_keys.each_with_object({key0 => "mean"}) {|k, h| h[k] = self[k].mean.round(round) } << | |
num_keys.each_with_object({key0 => "std"}) {|k, h| h[k] = self[k].std.round(round) } << | |
num_keys.each_with_object({key0 => "min"}) {|k, h| h[k] = self[k].min } << | |
num_keys.each_with_object({key0 => "25%"}) {|k, h| h[k] = self[k].percentile(25).round(round) } << | |
num_keys.each_with_object({key0 => "50%"}) {|k, h| h[k] = self[k].percentile(50).round(round) } << | |
num_keys.each_with_object({key0 => "75%"}) {|k, h| h[k] = self[k].percentile(75).round(round) } << | |
num_keys.each_with_object({key0 => "max"}) {|k, h| h[k] = self[k].max } | |
Rover::DataFrame.new(ary) | |
end | |
end | |
end | |
Rover::Vector.prepend My::RoverVectorPrepender | |
Rover::DataFrame.prepend My::RoverDataFramePrepender |