Created
January 4, 2021 05:59
-
-
Save vznvzn/559c31047010d6266ccd61aa8aa07d15 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'statsample' | |
def outin(fn) | |
l = (f = File.open(fn)).readlines | |
f.close | |
raise if (l.shift.chop != '$dat << eof') | |
k = l.shift.split | |
i = l.index("eof\n") | |
raise if (i.nil?) | |
l[i..-1] = [] | |
l2 = l.map { |x| Hash[[k, x.split.map { |x| #x.sub!('NaN', '0'); | |
Kernel.eval(x) }].transpose] } | |
l2.each { |x| x['ns'] = x['ns'].to_s } | |
$stderr.puts([fn, l2.size].inspect) | |
return l2 | |
end | |
def load(l1 = ['']) | |
l = [] | |
l1.each \ | |
{ | |
|i| | |
l.concat(outin("gnuplot#{i}-1.cmd")) | |
} | |
return l | |
end | |
def outd(f, l) | |
f.puts('$dat << eof') | |
k = l[0].keys | |
f.puts(k.join("\t")) | |
l.each { |x| f.puts(x.values.join("\t")) } | |
f.puts('eof') | |
return k | |
end | |
def outa(f, l, a = {}, t = '') | |
k = outd(f, l) | |
f.puts("set colors classic; set title '#{t}'; ") | |
f.puts("set ytics nomirror; set y2tics;") | |
f.puts("plot \\") | |
k, ct = [k - ['t'], "(column('t')):"] if (k.member?('t')) | |
k.each \ | |
{ | |
|x| | |
next if (a.member?(x) && a[x].nil?) | |
opt = a.fetch(x, '') | |
opt += ' lw 2 ' if (!opt.include?('lw')) | |
opt += ' with line ' if (!opt.include?('with') && !opt.include?('pt')) | |
f.puts("'$dat' using #{ct}(column('#{x}')) #{opt} title '#{x}',\\") | |
} | |
f.puts | |
# f.puts("reset; pause -1;") | |
end | |
def outafn(l, a = {}, fno = nil, t = '') | |
fn = "gnuplot#{fno}.cmd" | |
outa(f = File.open(fn, 'w'), l, a, t) | |
f.close | |
$stderr.puts([fn, t, l.size].inspect) | |
end | |
def runs(ns) | |
ns = ns.reverse | |
s = '' | |
l = [] | |
ns.length.times \ | |
{ | |
|i| | |
s += ns[i] | |
l, s = [l + [[s, i]], ''] if (ns[i + 1] != ns[i]) | |
} | |
return l | |
end | |
def plotcolor(l, fn) | |
f = File.open(fn, 'w') | |
outd(f, l) | |
cs = l[0].keys.map { |k| "(column('#{k}'))" }.join(':') | |
f.puts("plot $dat using #{cs} with point linecolor palette pt 5 ps 0.5") | |
f.close | |
$stderr.puts([fn, l.size].inspect) | |
end | |
def log(x) | |
$x = [] if ($x.nil?) | |
return if ($x.member?(x)) | |
$x << x | |
$stderr.puts(x.inspect) | |
end | |
def stat(k, l) | |
t = l.inject { |a, x| a + x } | |
t2 = l.inject(0) { |a, x| a + (x ** 2) } | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
raise if (sd.nan?) | |
return {"a#{k}" => a, "s#{k}" => sd} | |
end | |
def sum(l) | |
return l.inject { |t, x| t + x } | |
end | |
def avg(l) | |
return sum(l).to_f / l.size | |
end | |
def diffs(l) | |
a = stat(nil, l) | |
l.sort! | |
m = l[l.size / 2] | |
ma = avg(l.select { |x| x > m }) | |
am = a['a'] / m | |
l2 = (1...l.size).map { |x| l[x] - l[x - 1] } | |
a2 = stat('2', l2) | |
mx = l.max | |
return {'m' => m, | |
'ma' => ma, | |
'am' => am, | |
'mn' => l.min, | |
'mx' => mx, | |
'c' => l.size, | |
}.merge(a).merge(a2) | |
end | |
def filter(l, b = nil, t = nil, t2 = nil) | |
log({'b' => b, 't' => t}) | |
l = l.select { |x| x[0][0] == b.to_s } if (!b.nil?) | |
return l.sort_by { |x| [x[0].length, x[1]] }.reverse[0...t] if (!t.nil?) | |
return l.select { |x| x[0].length >= t2 } if (!t2.nil?) | |
return l | |
end | |
def review3(l, w, b = 1) | |
l2 = [] | |
l3 = [] | |
(0...l.size).each_with_index \ | |
{ | |
|j| | |
x = l[j] | |
ns = x['ns'] | |
l1 = runs(ns) | |
case w | |
when 1 | |
l1 = filter(l1, b, 3) | |
when 2 | |
l1 = filter(l1, b, 10) | |
when 3 | |
l1 = filter(l1, b, 15) | |
when 4 | |
l1 = filter(l1, b, nil, 3) | |
end | |
l3 += (0...l1.size).map { |x| {'j' => j, 'i' => l1[x][1], 'w' => l1[x][0].length} } | |
l2 << diffs(l1.map { |s, i| i }).merge({'hc' => x['hc'], 'cm' => x['cm'], 'cg' => x['cg']}) | |
} | |
# plotcolor(l3, 'gnuplot6.cmd') | |
# outafn(l2, {'am' => 'axes x1y2', 'hc' => 'axes x1y2'}) | |
return l2 | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def len(ns, p) | |
l = ns.split(p) | |
l = [] if (l.nil?) | |
l.shift if (l[0] == '') | |
return l.map { |x| x.length } | |
end | |
def len1(ns) | |
return len(ns, /0+/) | |
end | |
def len0(ns) | |
return len(ns, /1+/) | |
end | |
def len01(ns) | |
return len1(ns), len0(ns) | |
end | |
def e(ns) | |
return len01(ns).flatten.size.to_f / ns.length | |
end | |
def log2(x) | |
return Math.log(x) / Math.log(2.0) | |
end | |
def lh(ns) | |
nw2 = ns.length / 2 | |
return ns[nw2..-1], ns[0..nw2] | |
end | |
def dlh(ns) | |
nw1, nw2 = lh(ns) | |
d1 = d(nw1) | |
d2 = d(nw2) | |
return d(ns) - 0.5, d1 - 0.5, d2 - 0.5, d1 - d2 | |
end | |
def d4(ns) | |
return d(ns[0..(ns.length / 4)]) | |
end | |
def e4(ns) | |
return e(ns[0..(ns.length / 4)]) | |
end | |
def midpt(ns) | |
w2 = ns.length / 4 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def midpt1(ns) | |
return midpt(ns.reverse) | |
end | |
def d1(s) | |
c = s.split('').select { |x| x == '1' }.size | |
return c | |
end | |
def midpt2(ns) | |
w2 = d1(ns) / 2 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def not0(x) | |
return x == 0 ? 1 : x | |
end | |
def data(l, l1 = [], l2 = [], l3 = [], l4 = []) | |
l.each \ | |
{ | |
|x| | |
ns = x['ns'] | |
d, d1, d2, d12 = dlh(ns) | |
mp = midpt(ns) - 0.5 | |
mp2 = midpt2(ns) - 0.5 | |
s1 = mp.abs / not0(d12.abs) | |
s3 = mp.abs * d12.abs | |
s5 = -mp * d12 | |
t1 = mp2.abs / not0(d12.abs) | |
t3 = mp2.abs * d12.abs | |
t5 = -mp2 * d12 | |
mx0 = len0(ns).max | |
mx0 = 0 if (mx0.nil?) | |
mx1 = len1(ns).max | |
mx01 = [mx0, mx1].max | |
mx = log2(log2(ns.to_i(2).to_f)) | |
a0 = avg(len0(ns)) | |
a1 = avg(len1(ns)) | |
a01 = avg(len01(ns).flatten) | |
a = { | |
's1' => s1, | |
's2' => 1 / s1, | |
's4' => Math.log(1 / s3), | |
's3' => s3, | |
's5' => -s5, | |
't1' => t1, | |
't2' => 1 / t1, | |
't4' => Math.log(1 / t3), | |
't3' => t3, | |
't5' => -t5, | |
} | |
l1 << a | |
a2 = { | |
'a0' => a0, | |
'a1' => a1, | |
'a01' => a01, | |
'a01t' => (a0 + a1) / 2, | |
'mx0' => mx0, | |
'mx1' => mx1, | |
'mx01' => mx01, | |
} | |
l2 << a2 | |
a3 = { | |
'e4' => e4(ns) - 0.5, | |
'd4' => d4(ns) - 0.5, | |
'd12' => d12, | |
'd1' => d1, | |
'd1a' => d1.abs, | |
'd2' => d2, | |
'd2a' => d2.abs, | |
'd' => d, | |
'da' => d.abs | |
} | |
l3 << a3 | |
a4 = { | |
'a01d' => a0 + a1 - 2 * a01, | |
'mp' => mp, | |
'mpa' => mp.abs, | |
'mp2' => mp2, | |
'mp2a' => mp2.abs, | |
'a0m' => a0 / mx, | |
'a1m' => a1 / mx, | |
} | |
l4 << a4 | |
} | |
fix(l1) | |
return (0...l.size).map { |x| [l1[x], l2[x], l3[x], l4[x]].inject({}) { |h, x| h.merge(x) } | |
.merge({'hc' => l[x]['hc'], 'cm' => l[x]['cm'], 'cg' => l[x]['cg']}) } | |
end | |
def axes2(x2) | |
return x2.inject({}) { |h, x| h.merge({x => 'axes x1y2' }) } | |
end | |
def runavg(l, k, c, l3) | |
l1 = l.map { |x| x[k] } | |
t = sum(l1[0...c]) | |
l2 = (['-'] * (c - 1)) + [t.to_f / c] | |
while (l1.size > c) | |
t -= l1.shift | |
t += l1[c - 1] | |
l2 << (t.to_f / c) | |
end | |
ka = "#{k}a" | |
l.each_with_index { |x, i| l3[i] = l3.fetch(i, {}).merge({k => l2[i] }) } | |
end | |
def fix(l1) | |
l1[0].keys.each \ | |
{ | |
|k| | |
l = l1.map { |x| x[k] }.select { |x| x.finite? } | |
next if (l.size == l1.size) | |
m = avg(l) | |
# p("#{k} #{m}") | |
l1.each { |x| x[k] = m if (!x[k].finite?) } | |
} | |
end | |
def out(l, a, fn) | |
outafn(l, a, "#{fn}.cmd") | |
l2 = [] | |
l[0].keys.each { |k| runavg(l, k, 100, l2) } | |
outafn(l2, a, "#{fn}b.cmd") | |
end | |
def review4(l) | |
data(l, l1 = [], l2 = [], l3 = [], l4 = []) | |
out(l1, axes2(['s3', 's5', 't3', 't5']), 'gnuplot7') | |
out(l2, axes2(['mx0', 'mx1', 'mx01']), 'gnuplot8') | |
out(l3, {}, 'gnuplot9') | |
out(l4, axes2(['a01d']), 'gnuplot10') | |
end | |
def dot(x, z) | |
t = z['c'] | |
(z.keys - ['c']).each { |v| t += z[v] * x[v] } | |
return t | |
end | |
def predict(l, vy, z) | |
l.each { |x| x["#{vy}_y"] = dot(x, z) } | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def corr(l, y1, yp = nil) | |
yp, ye = ["#{y1}_y", "#{y1}_e"] if (yp.nil?) | |
xav = av(l.map { |x| x[y1] }) | |
yav = av(l.map { |x| x[yp] }) | |
tx = ty = txy = e = 0.0 | |
m = nil | |
l.each \ | |
{ | |
|z| | |
x = z[y1] | |
y = z[yp] | |
z[ye] = (x - y).abs if (!ye.nil?) | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
} | |
d = Math.sqrt(tx) * Math.sqrt(ty) | |
$z = 1e-4 | |
r = txy / (d.abs < $z ? $z : d) | |
return r | |
end | |
def fit(l, vx, vy) | |
a = {} | |
(vx + [vy]).each { |v| a[v] = l.map { |x| x[v] }.to_vector() } | |
begin | |
r = Statsample::Regression.multiple(a.to_dataset(), vy) | |
# rescue Statsample::Regression::LinearDependency | |
# return nil | |
end | |
# $stderr.puts(r.summary) | |
z = r.coeffs.merge({'c' => r.constant}) | |
predict(l, vy, z) | |
r = corr(l, vy) | |
return r, z | |
end | |
def copy(l, l1, k, n = nil) | |
k2 = n.nil? ? k : "#{k}_#{n}" | |
l1.size.times \ | |
{ | |
|i| | |
l[i] = {} if (l[i].nil?) | |
l[i][k2] = l1[i][k] | |
} | |
end | |
def keys(l, ks) | |
return l.map { |x| Hash[[ks, x.values_at(*ks)].transpose] } | |
end | |
def fit3(l, l1, k1, mn2 = 0.015, mn1 = 0.025) | |
l2 = [] | |
l1.keys.each \ | |
{ | |
|w| | |
(l3 = l1[w][0].keys - ['hc', 'cm', 'cg']).each \ | |
{ | |
|k| | |
r = corr(l1[w], k, k1) | |
l2 << [k, w, r] | |
} | |
$stderr.puts([w, l3.size].inspect) | |
} | |
l = l.map { |x| Hash[k1, x[k1]] } | |
l3 = [] | |
l2.sort_by { |x| -x[2].abs }.select { |x| x[2].abs > mn1 }.each \ | |
{ | |
|k, w, r| | |
next if ([l3.map { |x| (x - r).abs }.min, mn2].compact[0] < mn2) | |
p([k, w, r]) | |
copy(l, l1[w], k, w) | |
l3 << r | |
} | |
$stderr.puts("#{l3.size} / #{l2.size}") | |
r, z = fit(l, l[0].keys - [k1], k1) | |
p([r, Hash[z.sort_by { |x| -x[1].abs }]]) | |
return keys(l, [k1, "#{k1}_y", "#{k1}_e"]) | |
end | |
def var2(l, l1) | |
l2 = fit3(l, l1, 'hc') | |
l3 = fit3(l, l1, 'cm') | |
l4 = fit3(l, l1, 'cg', 0.02) | |
l1 = (0...l2.size).map { |i| l2[i].merge(l3[i]).merge(l4[i]) } | |
copy(l1, l, 'c') | |
['hc', 'cm', 'cg'].each_with_index \ | |
{ | |
|k, i| | |
outafn(l1.sort_by { |x| x[k] }, axes2(['hc', 'hc_y', 'hc_e']), i + 1, "by #{k}") | |
outafn(l1.sort_by { |x| x["#{k}_e"] }, axes2(['hc', 'hc_y', 'hc_e']), "#{i + 1}b", "by #{k}_e") | |
} | |
end | |
def fit3b(l) | |
a = {} | |
(1..4).each { |w| [0, 1].each { |b| a["#{w}_#{b}"] = review3(l, w, b) } } | |
a[nil] = data(l) | |
var2(l, a) | |
end | |
fit3b(load()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment