Last active
January 28, 2020 14:52
-
-
Save glurp/33775db6e47564975998 to your computer and use it in GitHub Desktop.
ruby filter: unix pipe filter, ruby-based. As awq (more and less), but in ruby langage : filter, caclulation, format, plot curve & barr, sumerize, goupe-by . . .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# | |
########################################################## | |
# rfilter.rb : filter on stdin, splitted on blanc(s) | |
########################################################## | |
require 'time' | |
require 'date' | |
require 'fileutils' | |
require 'pp' | |
require 'json' | |
######################################################## | |
# Filters primitives | |
######################################################## | |
class Code | |
def grep(str,no=nil) # print if match | |
puts $line if ((no.nil?) ? $line : _[no]) =~ /#{str}/ | |
end | |
$skip=true | |
def skip_until(filter) | |
if ! $skip || $line=~filter | |
yield | |
$skip=false | |
end | |
end | |
$dateref=nil | |
def older(dateref) | |
reg=/\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT]\d\d:\d\d:\d\d/ | |
date1=$line.scan(reg) | |
date=date1.last | |
dt = Time.parse( date ) | |
$dateref=Time.parse( dateref ) unless $dateref | |
yield(date) if date<dateref | |
end | |
def newer(dateref) | |
reg=/\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT]\d\d:\d\d:\d\d/ | |
date1=$line.scan(reg) | |
date=date1.last | |
dt = Time.parse( date ) | |
$dateref=Time.parse( dateref ) unless $dateref | |
yield(date) if date>=dateref | |
end | |
def timediff(delta_max,date=nil) | |
unless date | |
date=$line[/(\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT][\S]*)/,1] | |
end | |
dt = Time.parse( (date.kind_of? String) ? date : date.join(" ")) | |
if defined?($last_date) | |
d=dt-$last_date | |
if d > delta_max | |
if bloc_defined? | |
yield(d,dt) | |
else | |
puts "#{$line} | #{d} secs" | |
puts $oldline | |
puts "=====" | |
end | |
end | |
end | |
$last_date=dt | |
end | |
def ifdiff(field) | |
yield($ifdiff,field) if defined?($ifdiff) && field!=$ifdiff | |
$ifdiff=field | |
end | |
def cut(*fieldsnum) # extract some column(s) | |
a=_ | |
puts fieldsnum.map {|num| a[num]}.join("\t") | |
end | |
def clear() | |
puts `clear` | |
end | |
def extr(word) | |
$line[/#{word}/,1] | |
end | |
def atoi(str=nil) | |
(str||$line).scan(/\d+/).first.to_i | |
end | |
def after(str,x=1,n=1) | |
rest=$line[/#{str}[":,\s]*(.*)$/,1].split(/\s+/) | |
rest[x-1,n] | |
end | |
def extrd(word) | |
if $line.match(/#{word}/) | |
puts $line | |
p $line[/(#{word}.........\S+)/,1] | |
end | |
end | |
def match(regexp) | |
e=$line.match(regexp) | |
puts e.values_at(0) if e | |
end | |
def show() # helper for debugging what is receiving on stdin | |
a=$line.split(/\s+/) | |
puts "line: #{$line}" | |
_.each_with_index {|v,i| puts " _#{i+1} => '#{v}'"} | |
exit(1) | |
end | |
def sum(i) # add all data in a column/data | |
$pending||=0 | |
$pending+=i.to_i | |
end | |
def statistics(i) # mean/median/stddev | |
value=i.to_f | |
$pending||={type: :statistics,count: 0,min: 999999999999999,max:-99999999999999999, sum: 0, sum2: 0} | |
$pending[:count]=$pending[:count]+1 | |
$pending[:min] =value if $pending[:min] > value | |
$pending[:max] =value if $pending[:max] < value | |
$pending[:sum]+=value | |
$pending[:sum2]+=value*value | |
end | |
def self.end_statistics(data) | |
v=data | |
r={} | |
%i{count min max}.each {|k| r[k] = v[k]} | |
r[:mean]=v[:sum]/v[:count] | |
r[:variance]= ((v[:count]*v[:sum2] - v[:sum]*v[:sum]) ** 0.5 )/v[:count] | |
r | |
end | |
###################### Ploting | |
# plot() multi-curve with one call; multi shot curve | |
# splot() multi-curve, multi-shot curve | |
# bplot() mono-curve, multi shot barr | |
# rplot() multi-curve, mono shot, with x-label curve | |
# | |
# for n in {1..10}; do echo $n ; done | rfiler 'plot(_1,title:"Vmstat")' | |
# for n in {1..10}; do echo $n $((n+10)); done | rfiler 'plot(_1,_2,title:"Vmstat",name:%w{a b})' | |
# for n in {1..10}; do echo $n $((n+10)); done | rfiler 'splot("a",_1);splot("b",_2)' | |
# vmstat 1 | rfiler 'bplot(nol,_2,title:"Vmstat"); stop_after(10)' | |
# (echo "a 1 2 3 4 5 6" ; echo "b 6 5 4 3 1") | rfilter 'rplot(_1,_[1..-1])" | |
# | |
$plotconf={title: "RFilter",names:[]} | |
def plot(*v) | |
$pending||={type: :plot,data: []} | |
if Hash === v.last | |
$plotconf=v.last | |
v.pop | |
end | |
$pending[:data] << v.map {|a| a.to_f} | |
end | |
def splot(name,value) | |
$pending||={type: :rplot,data: []} | |
$plotconf[:name] << name unless $pending[:data][name] | |
$pending[:data][name]||=[] | |
$pending[:data][name] << value.to_f | |
end | |
def rplot(name,lvalue,h=nil) | |
$pending||={type: :rplot,data: {}} | |
$plotconf=h if h | |
$pending[:data][name]||=[] | |
$pending[:data][name].concat(lvalue.map {|v| v.to_f}) | |
end | |
def bplot(label,*v) | |
$pending||={type: :bplot,data: []} | |
if Hash === v.last | |
$plotconf=v.last | |
v.pop | |
end | |
$pending[:data] << [label,v.map(&:to_f)] | |
end | |
def self.end_plot(data) end_allplot(data) end | |
def self.end_bplot(data) end_allplot(data) end | |
def self.end_rplot(data) | |
lname=$pending[:data].keys | |
$pending[:data].each {|k,lv| p [k,lv[0..10]] } | |
$plotconf[:name]=$pending[:data].keys | |
len=$pending[:data].max_by {|(name,lv)| lv.size}.last.size | |
puts "nb mesures : #{len}\n\n" | |
$pending[:data] = (0...len).each_with_object([]) {|index,r| r<< $pending[:data].map {|name,lv| lv[index]||0 }} | |
$pending[:type]=:plot | |
end_allplot($pending) | |
end | |
def self.end_allplot(data) | |
return unless data[:data] && data[:data].size>0 && data[:data].first.size>0 | |
require 'tmpdir' | |
require 'gruff' rescue (puts "gruff not installed!\n install libmagickwand-dev and libmagickcore-dev ;\n and the : gem install gruff ",exit(1)) | |
fn=Dir.mktmpdir("plot-") | |
filename="#{Dir.tmpdir()}/#{fn}.png" | |
filename="#{fn}.png" | |
case data[:type] | |
when :plot | |
g = Gruff::Line.new(600) | |
g.title = $plotconf[:title] | |
puts $pending[:data].inspect[0..100] | |
nb_echant=$pending[:data].first.size | |
nb_echant.times { |no| g.data(($plotconf[:name][no] rescue nil) || "measures #{no+1}",$pending[:data].map {|pt| pt[no] }) } | |
g.write(filename) | |
when :bplot | |
g = Gruff::Bar.new(600) | |
g.title = $plotconf[:title] | |
($pending[:data].first.last.size).times { |no| g.data(($plotconf[:name][no] rescue nil) || "measures #{no+1}",$pending[:data].map {|pt| pt.last[no] }) } | |
g.labels=data[:data].inject({}) {|h,l| h[h.size]=l.first||h.size.to_s ; h} | |
g.write(filename) | |
else | |
puts "unknown plot type #{data[:type]}" | |
end | |
%x{ruiby 'image("#{filename}")' &} | |
{filename: filename,count: data[:data].size,usage: "Use display or eog or feh for view image..."} | |
end | |
def mult(i) # mutliply all data in a column | |
$pending||=1 | |
$pending*=i.to_i | |
end | |
def toh(a,b) # put in Hash h[a]=b ; print Hash at exit | |
$pending||={} | |
$pending[a]=b | |
end | |
def tohcount(a,b="1") # put in Hash h[a]+=b ; print Hash at exit | |
$pending||={} | |
$pending[a]=($pending[a]||0)+b.to_i | |
end | |
def tohlist(a,b="?") # push in Hash h[a] b ; print Hash of Array at exit | |
$pending||={} | |
$pending[a]=($pending[a]||[])<< b | |
end | |
def tohh(a,b,c) # put in Hash h[a][b]= b ; print Hash of hash at exit | |
$pending||={} | |
$pending[a]||=($pending[a]||{}) | |
$pending[a][b]=c | |
end | |
def sela(&b) # selection and print as table | |
puts _.join("\t") if yield($line,_) | |
end | |
def sell(&b) # selection and print brut line | |
puts $line if yield($line,_) | |
end | |
def format_if(fstr,*args,&b) # format some field if cofition eval to true | |
puts(fstr % args) if yield($line,_) | |
end | |
def format(fstr,*args) # format some field, inconditionaly | |
puts(fstr % args) | |
end | |
def delta(value,no=0) | |
value=value.to_f | |
$hdelta||={} | |
$hdelta[no]||=value | |
d=value - $hdelta[no] | |
$hdelta[no]=value | |
d | |
end | |
def stop_after(n) $stop=true if (Time.now-$startup).to_i >= n end | |
def stop_nol(n) $stop=true if nol>=n end | |
def stop_if(v) $stop=true if v end | |
end | |
######################################################## | |
# output anything in string | |
######################################################## | |
def ppr(data) | |
case data | |
when Array | |
if data.first && Array === data.first | |
data.each {|a| puts a.join(" ")} | |
else | |
data.each {|a| puts a.to_s} | |
end | |
when Hash | |
data=Code.send("end_#{data[:type]}".to_sym,data) if data[:type] | |
size=data.keys.max_by {|a| a.to_s.size}.size | |
data.each {|(k,v)| puts "%#{size+1}s => %s" % [k,v]} | |
else | |
puts data | |
end | |
end | |
######################################################## | |
# Help | |
######################################################## | |
if ARGV.size==0 || ARGV.first =~ /^--?h(e(l(p)?)?)?/ | |
puts <<EEND | |
RubyFilter : as simple as awk, with ruby expression | |
Usage by pipe or by filenames arguments: | |
> cat ddd | rfilter 'expression' | |
or | |
> rfilter 'expression' *.txt | |
With | |
expression = ruby code, with ( sum(arg)|mult(arg)|toh(a,b)| | |
sela {}|sell {} || format(str,args*) {} | |
arg can be : _ (line splited); _0 (all line as string), _1 (first word), _2 ... or any ruby data | |
At the end of input, current result is printed, if exist | |
Specials Filters | |
================= | |
cut(*fieldsnum) | extract some column(s) | |
extr(word) | extract 'data' after word, if detected | |
extrd(word) | debug extract | |
match(regexp) | extract data which match with regexp argument | |
delta(value) | return delta of value on each line | |
show() | helper for debugging what is receiving on stdin | |
grep(exp) | selection line on regexpression | |
sum(i) | add all data in a column/data | |
mult(i) | mutliply all data in a column | |
toh(a,b) | put in Hash h[a]=b ; print Hash at exit | |
tohcount(a,b) | cummulation with key in Hash : h[a]+=b.to_i ; print Hash at exit | |
timediff(delta,date) | print line.last and current if dates differ form more than delta seconds | |
sela(&b) | selection and print as table | |
sell(&b) | selection and print brut line | |
format_if(fstr,*args,&b)|format some field if condition eval to true | |
format(fstr,*args) | format some field, inconditionaly | |
cut() : tils when cut whith separator as \s+ | |
extr() : utils for extacts some data in xml/json | |
dalta() : compare current line with las one | |
toh() : select last data grouped by criterium | |
tohcount() : count some event, grouped by criterium | |
tohlist() : add some event, grouped by criterium | |
tohh() : ,dd in hash grouped by criterium | |
timediff() : detect some holl in log (inactivity...) | |
format() : print beautiful text | |
Exemples: | |
======== | |
Count number of file which have size bigger than 1K: | |
> ls -l | rfilter 'sell {_5.to_i>1024}' | rfilter 'sum 1' | |
Count volumes of files which have size bigger than 1K: | |
> ls -l | rfilter 'sell {_5.to_i>1024}' | rfilter 'sum _5' | |
Count LOC of shell-script files | |
> find . -type f -name '*.sh' -exec cat {} \; | rfilter 'sell {_0!=/^\s*#/ && _0.size>0}' | rfilter 'sum 1' | |
Format lines | |
> ls -l | rfilter "format('%-15s %10s',_9,_5)" | |
Format lines if condition | |
> ls -l a*.rb | rfilter "format_if('%15s | %10s',_9,_5) { _5.to_i>1}" | |
a.rb | 864 | |
abool.rb | 888 | |
allways.rb | 698 | |
anac.rb | 5777 | |
statistics: "", | |
Calcultator : | |
>echo 10.33 22.44 | rfilter 'puts Math.sin(_1.to_f) ; puts _2.to_f*2' | |
-0.7865622859965424 | |
44.88 | |
EEND | |
exit(1) | |
end | |
######################################################## | |
# Main | |
######################################################## | |
$cc=nil | |
regexp=/\s+/ | |
if ARGV.first == "-F"|| ARGV.first == "--field-separator" | |
ARGV.shift | |
regexp= /#{ARGV.shift}/ | |
end | |
code=ARGV.shift | |
clazz=<<EEND | |
class Code | |
(1..55).each {|nop| define_method("_#\{nop}") { make unless $array ; $array[nop-1]||"" } } | |
define_method("nol") { @nol } | |
define_method("_") { make unless $array ; $array } | |
define_method("_0") { $line } | |
def initialize(nol,regexp,line) | |
@nol=nol | |
$line=line.chomp | |
$array=nil | |
@regexp=regexp | |
end | |
def make() | |
$array=($line||"").split(@regexp) | |
end | |
def execute() | |
#{code} | |
end | |
end | |
EEND | |
#puts clazz | |
eval clazz | |
nol=0 | |
STDOUT.sync=true | |
$oldline="" | |
$o=0 | |
$stop=false | |
$startup=Time.now | |
ARGF.each do |line| | |
Code.new(nol,regexp,line).execute | |
$oldline=$line | |
nol+=1 | |
break if $stop | |
end | |
if defined?($pending) | |
ppr $pending | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment