Skip to content

Instantly share code, notes, and snippets.

@glurp
Last active January 28, 2020 14:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glurp/33775db6e47564975998 to your computer and use it in GitHub Desktop.
Save glurp/33775db6e47564975998 to your computer and use it in GitHub Desktop.
ruby filter: unix pipe filter, ruby-based. As awq (more and less), but in ruby langage : filter, caclulation, format, plot curve & barr, sumerize, goupe-by . . .
#!/usr/bin/ruby
#
##########################################################
# rfilter.rb : filter on stdin, splitted on blanc(s)
##########################################################
require 'time'
require 'date'
require 'fileutils'
require 'pp'
require 'json'
########################################################
# Filters primitives
########################################################
class Code
def grep(str,no=nil) # print if match
puts $line if ((no.nil?) ? $line : _[no]) =~ /#{str}/
end
$skip=true
def skip_until(filter)
if ! $skip || $line=~filter
yield
$skip=false
end
end
$dateref=nil
def older(dateref)
reg=/\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT]\d\d:\d\d:\d\d/
date1=$line.scan(reg)
date=date1.last
dt = Time.parse( date )
$dateref=Time.parse( dateref ) unless $dateref
yield(date) if date<dateref
end
def newer(dateref)
reg=/\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT]\d\d:\d\d:\d\d/
date1=$line.scan(reg)
date=date1.last
dt = Time.parse( date )
$dateref=Time.parse( dateref ) unless $dateref
yield(date) if date>=dateref
end
def timediff(delta_max,date=nil)
unless date
date=$line[/(\d\d\d\d[-\/]\d\d[-\/]\d\d[\sT][\S]*)/,1]
end
dt = Time.parse( (date.kind_of? String) ? date : date.join(" "))
if defined?($last_date)
d=dt-$last_date
if d > delta_max
if bloc_defined?
yield(d,dt)
else
puts "#{$line} | #{d} secs"
puts $oldline
puts "====="
end
end
end
$last_date=dt
end
def ifdiff(field)
yield($ifdiff,field) if defined?($ifdiff) && field!=$ifdiff
$ifdiff=field
end
def cut(*fieldsnum) # extract some column(s)
a=_
puts fieldsnum.map {|num| a[num]}.join("\t")
end
def clear()
puts `clear`
end
def extr(word)
$line[/#{word}/,1]
end
def atoi(str=nil)
(str||$line).scan(/\d+/).first.to_i
end
def after(str,x=1,n=1)
rest=$line[/#{str}[":,\s]*(.*)$/,1].split(/\s+/)
rest[x-1,n]
end
def extrd(word)
if $line.match(/#{word}/)
puts $line
p $line[/(#{word}.........\S+)/,1]
end
end
def match(regexp)
e=$line.match(regexp)
puts e.values_at(0) if e
end
def show() # helper for debugging what is receiving on stdin
a=$line.split(/\s+/)
puts "line: #{$line}"
_.each_with_index {|v,i| puts " _#{i+1} => '#{v}'"}
exit(1)
end
def sum(i) # add all data in a column/data
$pending||=0
$pending+=i.to_i
end
def statistics(i) # mean/median/stddev
value=i.to_f
$pending||={type: :statistics,count: 0,min: 999999999999999,max:-99999999999999999, sum: 0, sum2: 0}
$pending[:count]=$pending[:count]+1
$pending[:min] =value if $pending[:min] > value
$pending[:max] =value if $pending[:max] < value
$pending[:sum]+=value
$pending[:sum2]+=value*value
end
def self.end_statistics(data)
v=data
r={}
%i{count min max}.each {|k| r[k] = v[k]}
r[:mean]=v[:sum]/v[:count]
r[:variance]= ((v[:count]*v[:sum2] - v[:sum]*v[:sum]) ** 0.5 )/v[:count]
r
end
###################### Ploting
# plot() multi-curve with one call; multi shot curve
# splot() multi-curve, multi-shot curve
# bplot() mono-curve, multi shot barr
# rplot() multi-curve, mono shot, with x-label curve
#
# for n in {1..10}; do echo $n ; done | rfiler 'plot(_1,title:"Vmstat")'
# for n in {1..10}; do echo $n $((n+10)); done | rfiler 'plot(_1,_2,title:"Vmstat",name:%w{a b})'
# for n in {1..10}; do echo $n $((n+10)); done | rfiler 'splot("a",_1);splot("b",_2)'
# vmstat 1 | rfiler 'bplot(nol,_2,title:"Vmstat"); stop_after(10)'
# (echo "a 1 2 3 4 5 6" ; echo "b 6 5 4 3 1") | rfilter 'rplot(_1,_[1..-1])"
#
$plotconf={title: "RFilter",names:[]}
def plot(*v)
$pending||={type: :plot,data: []}
if Hash === v.last
$plotconf=v.last
v.pop
end
$pending[:data] << v.map {|a| a.to_f}
end
def splot(name,value)
$pending||={type: :rplot,data: []}
$plotconf[:name] << name unless $pending[:data][name]
$pending[:data][name]||=[]
$pending[:data][name] << value.to_f
end
def rplot(name,lvalue,h=nil)
$pending||={type: :rplot,data: {}}
$plotconf=h if h
$pending[:data][name]||=[]
$pending[:data][name].concat(lvalue.map {|v| v.to_f})
end
def bplot(label,*v)
$pending||={type: :bplot,data: []}
if Hash === v.last
$plotconf=v.last
v.pop
end
$pending[:data] << [label,v.map(&:to_f)]
end
def self.end_plot(data) end_allplot(data) end
def self.end_bplot(data) end_allplot(data) end
def self.end_rplot(data)
lname=$pending[:data].keys
$pending[:data].each {|k,lv| p [k,lv[0..10]] }
$plotconf[:name]=$pending[:data].keys
len=$pending[:data].max_by {|(name,lv)| lv.size}.last.size
puts "nb mesures : #{len}\n\n"
$pending[:data] = (0...len).each_with_object([]) {|index,r| r<< $pending[:data].map {|name,lv| lv[index]||0 }}
$pending[:type]=:plot
end_allplot($pending)
end
def self.end_allplot(data)
return unless data[:data] && data[:data].size>0 && data[:data].first.size>0
require 'tmpdir'
require 'gruff' rescue (puts "gruff not installed!\n install libmagickwand-dev and libmagickcore-dev ;\n and the : gem install gruff ",exit(1))
fn=Dir.mktmpdir("plot-")
filename="#{Dir.tmpdir()}/#{fn}.png"
filename="#{fn}.png"
case data[:type]
when :plot
g = Gruff::Line.new(600)
g.title = $plotconf[:title]
puts $pending[:data].inspect[0..100]
nb_echant=$pending[:data].first.size
nb_echant.times { |no| g.data(($plotconf[:name][no] rescue nil) || "measures #{no+1}",$pending[:data].map {|pt| pt[no] }) }
g.write(filename)
when :bplot
g = Gruff::Bar.new(600)
g.title = $plotconf[:title]
($pending[:data].first.last.size).times { |no| g.data(($plotconf[:name][no] rescue nil) || "measures #{no+1}",$pending[:data].map {|pt| pt.last[no] }) }
g.labels=data[:data].inject({}) {|h,l| h[h.size]=l.first||h.size.to_s ; h}
g.write(filename)
else
puts "unknown plot type #{data[:type]}"
end
%x{ruiby 'image("#{filename}")' &}
{filename: filename,count: data[:data].size,usage: "Use display or eog or feh for view image..."}
end
def mult(i) # mutliply all data in a column
$pending||=1
$pending*=i.to_i
end
def toh(a,b) # put in Hash h[a]=b ; print Hash at exit
$pending||={}
$pending[a]=b
end
def tohcount(a,b="1") # put in Hash h[a]+=b ; print Hash at exit
$pending||={}
$pending[a]=($pending[a]||0)+b.to_i
end
def tohlist(a,b="?") # push in Hash h[a] b ; print Hash of Array at exit
$pending||={}
$pending[a]=($pending[a]||[])<< b
end
def tohh(a,b,c) # put in Hash h[a][b]= b ; print Hash of hash at exit
$pending||={}
$pending[a]||=($pending[a]||{})
$pending[a][b]=c
end
def sela(&b) # selection and print as table
puts _.join("\t") if yield($line,_)
end
def sell(&b) # selection and print brut line
puts $line if yield($line,_)
end
def format_if(fstr,*args,&b) # format some field if cofition eval to true
puts(fstr % args) if yield($line,_)
end
def format(fstr,*args) # format some field, inconditionaly
puts(fstr % args)
end
def delta(value,no=0)
value=value.to_f
$hdelta||={}
$hdelta[no]||=value
d=value - $hdelta[no]
$hdelta[no]=value
d
end
def stop_after(n) $stop=true if (Time.now-$startup).to_i >= n end
def stop_nol(n) $stop=true if nol>=n end
def stop_if(v) $stop=true if v end
end
########################################################
# output anything in string
########################################################
def ppr(data)
case data
when Array
if data.first && Array === data.first
data.each {|a| puts a.join(" ")}
else
data.each {|a| puts a.to_s}
end
when Hash
data=Code.send("end_#{data[:type]}".to_sym,data) if data[:type]
size=data.keys.max_by {|a| a.to_s.size}.size
data.each {|(k,v)| puts "%#{size+1}s => %s" % [k,v]}
else
puts data
end
end
########################################################
# Help
########################################################
if ARGV.size==0 || ARGV.first =~ /^--?h(e(l(p)?)?)?/
puts <<EEND
RubyFilter : as simple as awk, with ruby expression
Usage by pipe or by filenames arguments:
> cat ddd | rfilter 'expression'
or
> rfilter 'expression' *.txt
With
expression = ruby code, with ( sum(arg)|mult(arg)|toh(a,b)|
sela {}|sell {} || format(str,args*) {}
arg can be : _ (line splited); _0 (all line as string), _1 (first word), _2 ... or any ruby data
At the end of input, current result is printed, if exist
Specials Filters
=================
cut(*fieldsnum) | extract some column(s)
extr(word) | extract 'data' after word, if detected
extrd(word) | debug extract
match(regexp) | extract data which match with regexp argument
delta(value) | return delta of value on each line
show() | helper for debugging what is receiving on stdin
grep(exp) | selection line on regexpression
sum(i) | add all data in a column/data
mult(i) | mutliply all data in a column
toh(a,b) | put in Hash h[a]=b ; print Hash at exit
tohcount(a,b) | cummulation with key in Hash : h[a]+=b.to_i ; print Hash at exit
timediff(delta,date) | print line.last and current if dates differ form more than delta seconds
sela(&b) | selection and print as table
sell(&b) | selection and print brut line
format_if(fstr,*args,&b)|format some field if condition eval to true
format(fstr,*args) | format some field, inconditionaly
cut() : tils when cut whith separator as \s+
extr() : utils for extacts some data in xml/json
dalta() : compare current line with las one
toh() : select last data grouped by criterium
tohcount() : count some event, grouped by criterium
tohlist() : add some event, grouped by criterium
tohh() : ,dd in hash grouped by criterium
timediff() : detect some holl in log (inactivity...)
format() : print beautiful text
Exemples:
========
Count number of file which have size bigger than 1K:
> ls -l | rfilter 'sell {_5.to_i>1024}' | rfilter 'sum 1'
Count volumes of files which have size bigger than 1K:
> ls -l | rfilter 'sell {_5.to_i>1024}' | rfilter 'sum _5'
Count LOC of shell-script files
> find . -type f -name '*.sh' -exec cat {} \; | rfilter 'sell {_0!=/^\s*#/ && _0.size>0}' | rfilter 'sum 1'
Format lines
> ls -l | rfilter "format('%-15s %10s',_9,_5)"
Format lines if condition
> ls -l a*.rb | rfilter "format_if('%15s | %10s',_9,_5) { _5.to_i>1}"
a.rb | 864
abool.rb | 888
allways.rb | 698
anac.rb | 5777
statistics: "",
Calcultator :
>echo 10.33 22.44 | rfilter 'puts Math.sin(_1.to_f) ; puts _2.to_f*2'
-0.7865622859965424
44.88
EEND
exit(1)
end
########################################################
# Main
########################################################
$cc=nil
regexp=/\s+/
if ARGV.first == "-F"|| ARGV.first == "--field-separator"
ARGV.shift
regexp= /#{ARGV.shift}/
end
code=ARGV.shift
clazz=<<EEND
class Code
(1..55).each {|nop| define_method("_#\{nop}") { make unless $array ; $array[nop-1]||"" } }
define_method("nol") { @nol }
define_method("_") { make unless $array ; $array }
define_method("_0") { $line }
def initialize(nol,regexp,line)
@nol=nol
$line=line.chomp
$array=nil
@regexp=regexp
end
def make()
$array=($line||"").split(@regexp)
end
def execute()
#{code}
end
end
EEND
#puts clazz
eval clazz
nol=0
STDOUT.sync=true
$oldline=""
$o=0
$stop=false
$startup=Time.now
ARGF.each do |line|
Code.new(nol,regexp,line).execute
$oldline=$line
nol+=1
break if $stop
end
if defined?($pending)
ppr $pending
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment