Skip to content

Instantly share code, notes, and snippets.

@vznvzn
Created January 8, 2021 02:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vznvzn/72b1221fa4c92437975be8d6ff74a698 to your computer and use it in GitHub Desktop.
Save vznvzn/72b1221fa4c92437975be8d6ff74a698 to your computer and use it in GitHub Desktop.
require 'statsample'
def outin(fn)
l = (f = File.open(fn)).readlines
f.close
raise if (l.shift.chop != '$dat << eof')
k = l.shift.split
i = l.index("eof\n")
raise if (i.nil?)
l[i..-1] = []
l2 = l.map { |x| Hash[[k, x.split.map { |x| #x.sub!('NaN', '0');
Kernel.eval(x) }].transpose] }
l2.each { |x| x['ns'] = x['ns'].to_s }
$stderr.puts([fn, l2.size].inspect)
return l2
end
def load(l1 = [])
l = []
l1.each \
{
|x|
l.concat(outin("gnuplot#{x}.cmd"))
}
return l
end
def outd(f, l)
f.puts('$dat << eof')
k = l[0].keys
f.puts(k.join("\t"))
l.each { |x| f.puts(x.values.join("\t")) }
f.puts('eof')
return k
end
def outa(f, l, a = {}, t = '')
k = outd(f, l)
f.puts("set colors classic; set title '#{t}'; ")
f.puts("set ytics nomirror; set y2tics;")
f.puts("plot \\")
k, ct = [k - ['t'], "(column('t')):"] if (k.member?('t'))
k.each \
{
|x|
next if (a.member?(x) && a[x].nil?)
opt = a.fetch(x, '')
opt += ' lw 2 ' if (!opt.include?('lw'))
opt += ' with line ' if (!opt.include?('with') && !opt.include?('pt'))
f.puts("'$dat' using #{ct}(column('#{x}')) #{opt} title '#{x}',\\")
}
f.puts
# f.puts("reset; pause -1;")
end
def outafn(l, a = {}, fno = nil, t = '')
fn = "gnuplot#{fno}.cmd"
outa(f = File.open(fn, 'w'), l, a, t)
f.close
$stderr.puts([fn, t, l.size].inspect)
end
def runs(ns)
ns = ns.reverse
s = ''
l = []
ns.length.times \
{
|i|
s += ns[i]
l, s = [l + [[s, i]], ''] if (ns[i + 1] != ns[i])
}
return l
end
def log(x)
return
$x = [] if ($x.nil?)
return if ($x.member?(x))
$x << x
$stderr.puts(x.inspect)
end
def stat(k, l)
t = l.inject { |a, x| a + x }
t2 = l.inject(0) { |a, x| a + (x ** 2) }
c = l.size
a = t.to_f / c
z = t2.to_f / c - a ** 2
sd = Math.sqrt(z < 0 ? 0 : z)
raise [l, c, z].inspect if (sd.nan?)
return {"a#{k}" => a, "s#{k}" => sd}
end
def sum(l)
return l.inject { |t, x| t + x }
end
def avg(l)
return sum(l).to_f / l.size
end
def avg2(l)
return l.empty? ? 0 : avg(l)
end
def stat2(k, l)
return l.empty? ? {"a#{k}" => 0, "s#{k}" => 0} : stat(k, l)
end
def diffs(l)
a = stat2(nil, l)
l.sort!
m = l[l.size / 2]
ma = avg2(l.select { |x| x > m })
am = a['a'] / m
l2 = (1...l.size).map { |x| l[x] - l[x - 1] }
a2 = stat2('2', l2)
mx = l.max
return {'m' => m,
'ma' => ma,
'am' => am,
'mn' => l.min,
'mx' => mx,
'c' => l.size,
}.merge(a).merge(a2)
end
def filter(l, b = nil, t = nil, t2 = nil)
log({'b' => b, 't' => t})
l = l.select { |x| x[0][0] == b.to_s } if (!b.nil?)
return l.sort_by { |x| [x[0].length, x[1]] }.reverse[0...t] if (!t.nil?)
return l.select { |x| x[0].length >= t2 } if (!t2.nil?)
return l
end
def review3(l, w, b = 1)
l2 = []
l3 = []
(0...l.size).each_with_index \
{
|j|
x = l[j]
ns = x['ns']
l1 = runs(ns)
case w
when 1
l1 = filter(l1, b, 3)
when 2
l1 = filter(l1, b, 10)
when 3
l1 = filter(l1, b, 15)
when 4
l1 = filter(l1, b, nil, 3)
end
l3 += (0...l1.size).map { |x| {'j' => j, 'i' => l1[x][1], 'w' => l1[x][0].length} }
l2 << diffs(l1.map { |s, i| i }).merge({'hc' => x['hc'], 'cm' => x['cm'], 'cg' => x['cg']})
}
return l2
end
def d(s)
c = s.split('').select { |x| x == '1' }.size
d = c.to_f / s.length
return d
end
def len(ns, p)
l = ns.split(p)
l = [] if (l.nil?)
l.shift if (l[0] == '')
return l.map { |x| x.length }
end
def len1(ns)
return len(ns, /0+/)
end
def len0(ns)
return len(ns, /1+/)
end
def len01(ns)
return len1(ns), len0(ns)
end
def e(ns)
return len01(ns).flatten.size.to_f / ns.length
end
def log2(x)
return Math.log(x) / Math.log(2.0)
end
def lh(ns)
nw2 = ns.length / 2
return ns[nw2..-1], ns[0..nw2]
end
def dlh(ns)
nw1, nw2 = lh(ns)
d1 = d(nw1)
d2 = d(nw2)
return d(ns) - 0.5, d1 - 0.5, d2 - 0.5, d1 - d2
end
def d4(ns)
return d(ns[0..(ns.length / 4)])
end
def e4(ns)
return e(ns[0..(ns.length / 4)])
end
def midpt(ns)
w2 = ns.length / 4
l = ns.split('')
i = j = 0
while (i < w2 && j < l.size)
i += l[j].to_i
j += 1
end
return j.to_f / ns.length
end
def midpt1(ns)
return midpt(ns.reverse)
end
def d1(s)
c = s.split('').select { |x| x == '1' }.size
return c
end
def midpt2(ns)
w2 = d1(ns) / 2
l = ns.split('')
i = j = 0
while (i < w2 && j < l.size)
i += l[j].to_i
j += 1
end
return j.to_f / ns.length
end
def not0(x)
return x == 0 ? 1 : x
end
def fix(l1)
l1[0].keys.each \
{
|k|
l = l1.map { |x| x[k] }.select { |x| x.finite? }
next if (l.size == l1.size)
m = avg(l)
# p("#{k} #{m}")
l1.each { |x| x[k] = m if (!x[k].finite?) }
}
end
def data(l, l1 = [], l2 = [], l3 = [], l4 = [])
l.each \
{
|x|
ns = x['ns']
d, d1, d2, d12 = dlh(ns)
mp = midpt(ns) - 0.5
mp2 = midpt2(ns) - 0.5
s1 = mp.abs / not0(d12.abs)
s3 = mp.abs * d12.abs
s5 = -mp * d12
t1 = mp2.abs / not0(d12.abs)
t3 = mp2.abs * d12.abs
t5 = -mp2 * d12
mx0 = len0(ns).max
mx0 = 0 if (mx0.nil?)
mx1 = len1(ns).max
mx01 = [mx0, mx1].max
mx = log2(log2(ns.to_i(2).to_f))
a0 = avg(len0(ns))
a1 = avg(len1(ns))
a01 = avg(len01(ns).flatten)
a = {
's1' => s1,
's2' => 1 / s1,
's4' => Math.log(1 / s3),
's3' => s3,
's5' => -s5,
't1' => t1,
't2' => 1 / t1,
't4' => Math.log(1 / t3),
't3' => t3,
't5' => -t5,
}
l1 << a
a2 = {
'a0' => a0,
'a1' => a1,
'a01' => a01,
'a01t' => (a0 + a1) / 2,
'mx0' => mx0,
'mx1' => mx1,
'mx01' => mx01,
}
l2 << a2
a3 = {
'e4' => e4(ns) - 0.5,
'd4' => d4(ns) - 0.5,
'd12' => d12,
'd1' => d1,
'd1a' => d1.abs,
'd2' => d2,
'd2a' => d2.abs,
'd' => d,
'da' => d.abs
}
l3 << a3
a4 = {
'a01d' => a0 + a1 - 2 * a01,
'mp' => mp,
'mpa' => mp.abs,
'mp2' => mp2,
'mp2a' => mp2.abs,
'a0m' => a0 / mx,
'a1m' => a1 / mx,
}
l4 << a4
}
fix(l1)
return (0...l.size).map { |x| [l1[x], l2[x], l3[x], l4[x]].inject({}) { |h, x| h.merge(x) }
.merge({'hc' => l[x]['hc'], 'cm' => l[x]['cm'], 'cg' => l[x]['cg']}) }
end
def axes2(x2)
return x2.inject({}) { |h, x| h.merge({x => 'axes x1y2' }) }
end
def dot(x, z)
t = z['c']
(z.keys - ['c']).each { |v| t += z[v] * x[v] }
return t
end
def predict(l, vy, z)
l.each { |x| x["#{vy}_y"] = dot(x, z) }
end
def sum(l)
t = 0.0
l.each { |x| t += x }
return t
end
def av(l)
return nil if (l.empty?)
return sum(l) / l.size
end
def corr(l, y1, yp = nil)
yp, ye = ["#{y1}_y", "#{y1}_e"] if (yp.nil?)
xav = av(l.map { |x| x[y1] })
yav = av(l.map { |x| x[yp] })
tx = ty = txy = e = 0.0
m = nil
l.each \
{
|z|
x = z[y1]
y = z[yp]
z[ye] = (x - y).abs if (!ye.nil?)
txy += (x - xav) * (y - yav)
tx += (x - xav) ** 2
ty += (y - yav) ** 2
}
d = Math.sqrt(tx) * Math.sqrt(ty)
$z = 1e-4
r = txy / (d.abs < $z ? $z : d)
return r
end
def fit(l, vx, vy)
a = {}
(vx + [vy]).each { |v| a[v] = l.map { |x| x[v] }.to_vector() }
begin
r = Statsample::Regression.multiple(a.to_dataset(), vy)
# rescue Statsample::Regression::LinearDependency
# return nil
end
# $stderr.puts(r.summary)
z = r.coeffs.merge({'c' => r.constant})
predict(l, vy, z)
r = corr(l, vy)
return r, z
end
def copy(l, l1, k, n = nil)
k2 = n.nil? ? k : "#{k}_#{n}"
l1.size.times \
{
|i|
l[i] = {} if (l[i].nil?)
l[i][k2] = l1[i][k]
}
end
def keys(l, ks)
return l.map { |x| Hash[[ks, x.values_at(*ks)].transpose] }
end
def fit3(l, l1, k1, mn2 = 0.015, mn1 = 0.025)
l2 = []
l1.keys.each \
{
|w|
(l3 = l1[w][0].keys - ['hc', 'cm', 'cg']).each \
{
|k|
r = corr(l1[w], k, k1)
l2 << [k, w, r]
}
# $stderr.puts([w, l3.size].inspect)
}
l4 = l2.select { |x| !x[2].finite? }
raise l4.inspect if (!l4.empty?)
l = l.map { |x| Hash[k1, x[k1]] }
l3 = []
l2.sort_by { |x| -x[2].abs }.select { |x| x[2].abs > mn1 }.each \
{
|k, w, r|
next if ([l3.map { |x| (x - r).abs }.min, mn2].compact[0] < mn2)
p([k, w, r])
copy(l, l1[w], k, w)
l3 << r
}
$stderr.puts("#{l3.size} / #{l2.size}")
r, z = fit(l, l[0].keys - [k1], k1)
p([r, Hash[z.sort_by { |x| -x[1].abs }]])
return keys(l, [k1, "#{k1}_y", "#{k1}_e"])
end
def var2(l, l1)
l2 = fit3(l, l1, 'hc', 0.025)
l3 = fit3(l, l1, 'cm', 0.03)
l4 = fit3(l, l1, 'cg', 0.025)
l1 = (0...l2.size).map { |i| l2[i].merge(l3[i]).merge(l4[i]) }
copy(l1, l, 'c')
['hc', 'cm', 'cg'].each_with_index \
{
|k, i|
outafn(l1.sort_by { |x| x[k] }, axes2(['hc', 'hc_y', 'hc_e']), i + 1, "by #{k}")
outafn(l1.sort_by { |x| x["#{k}_e"] }, axes2(['hc', 'hc_y', 'hc_e']), "#{i + 1}b", "by #{k}_e")
}
end
def f2(n)
return n.odd? ? (n * 3 + 1) / 2 : n / 2
end
def adv(l)
l.each \
{
|x|
1.times { x['ns'] = f2(x['ns'].to_i(2)).to_s(2) }
# x['hc'] = x['c'].to_f / x['ns'].length
}
return l
end
def avgs(l)
ks = l[0][0].keys
l1 = []
l[0].size.times { |i| l1[i] = Hash[[ks, ks.map { |k| avg(l.map { |x| x[i][k] }) }].transpose] }
return l1
end
def smooth(l, fn)
l1 = []
l1 << fn.call(l)
19.times { l1 << fn.call(adv(l)) }
return avgs(l1)
end
def fit3b(l)
a = {}
(1..4).each { |w| [0, 1].each { |b| a["#{w}_#{b}"] = smooth(l, lambda { |l| review3(l, w, b)}) } }
a[nil] = smooth(l, lambda { |l| data(l) })
var2(l, a)
end
def bitdiff(ns1, ns2)
mn = [ns1.length, ns2.length].min
b1 = ns1[-mn..-1].to_i(2)
b2 = ns2[-mn..-1].to_i(2)
s = (b1 ^ b2).to_s(2)
t = 0
s.length.times { |i| t += s[i].to_i }
return t
end
def prefixdiff(x, ns1, ns2, k, n = '')
x["pd#{n}"] = [bitdiff(x[k], ns1), bitdiff(x[k], ns2)].min.to_f / [ns1.length + 1, ns2.length + 1].max
end
def prefixdiffs(l, k = 'ns', n = '')
l = l.sort_by { |x| x[k] }
l.size.times \
{
|x|
prefixdiff(l[x], x == 0 ? '' : l[x - 1][k], x == l.size - 1 ? '' : l[x + 1][k], k, n)
}
end
def prefixdiffs2(l)
prefixdiffs(l, 'ns', 1)
prefixdiffs(l, 'ns2', 2)
l.each { |x| x['pd'] = [x['pd1'], x['pd2']].min }
end
def sample2(l, n)
d = (l.size - 1).to_f / n
l2 = []
(n + 1).times \
{
|i|
j = (i * d).to_i
l2 << l[j]
}
return l2
end
def merge()
return load(['x']) if (File.exists?(fn = 'gnuplotx.cmd'))
l = load((1..5).map { |x| "#{x}-1"})
l.each \
{
|x|
ns = x['ns']
n = ns.to_i(2)
n = f2(n)
ns = n.to_s(2)
ns2 = ns.reverse
x['ns'] = ns
x['ns2'] = ns2
x['hc'] = x['c'].to_f / ns.length
}
prefixdiffs(l)
l = sample2(l.select { |x| x['pd'] > 0.40 }, 1000)
l.sort_by! { |x| x['hc'] }
outd(f = File.open(fn, 'w'), l)
return l
end
fit3b(merge())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment