Created
January 8, 2021 02:33
-
-
Save vznvzn/72b1221fa4c92437975be8d6ff74a698 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'statsample' | |
def outin(fn) | |
l = (f = File.open(fn)).readlines | |
f.close | |
raise if (l.shift.chop != '$dat << eof') | |
k = l.shift.split | |
i = l.index("eof\n") | |
raise if (i.nil?) | |
l[i..-1] = [] | |
l2 = l.map { |x| Hash[[k, x.split.map { |x| #x.sub!('NaN', '0'); | |
Kernel.eval(x) }].transpose] } | |
l2.each { |x| x['ns'] = x['ns'].to_s } | |
$stderr.puts([fn, l2.size].inspect) | |
return l2 | |
end | |
def load(l1 = []) | |
l = [] | |
l1.each \ | |
{ | |
|x| | |
l.concat(outin("gnuplot#{x}.cmd")) | |
} | |
return l | |
end | |
def outd(f, l) | |
f.puts('$dat << eof') | |
k = l[0].keys | |
f.puts(k.join("\t")) | |
l.each { |x| f.puts(x.values.join("\t")) } | |
f.puts('eof') | |
return k | |
end | |
def outa(f, l, a = {}, t = '') | |
k = outd(f, l) | |
f.puts("set colors classic; set title '#{t}'; ") | |
f.puts("set ytics nomirror; set y2tics;") | |
f.puts("plot \\") | |
k, ct = [k - ['t'], "(column('t')):"] if (k.member?('t')) | |
k.each \ | |
{ | |
|x| | |
next if (a.member?(x) && a[x].nil?) | |
opt = a.fetch(x, '') | |
opt += ' lw 2 ' if (!opt.include?('lw')) | |
opt += ' with line ' if (!opt.include?('with') && !opt.include?('pt')) | |
f.puts("'$dat' using #{ct}(column('#{x}')) #{opt} title '#{x}',\\") | |
} | |
f.puts | |
# f.puts("reset; pause -1;") | |
end | |
def outafn(l, a = {}, fno = nil, t = '') | |
fn = "gnuplot#{fno}.cmd" | |
outa(f = File.open(fn, 'w'), l, a, t) | |
f.close | |
$stderr.puts([fn, t, l.size].inspect) | |
end | |
def runs(ns) | |
ns = ns.reverse | |
s = '' | |
l = [] | |
ns.length.times \ | |
{ | |
|i| | |
s += ns[i] | |
l, s = [l + [[s, i]], ''] if (ns[i + 1] != ns[i]) | |
} | |
return l | |
end | |
def log(x) | |
return | |
$x = [] if ($x.nil?) | |
return if ($x.member?(x)) | |
$x << x | |
$stderr.puts(x.inspect) | |
end | |
def stat(k, l) | |
t = l.inject { |a, x| a + x } | |
t2 = l.inject(0) { |a, x| a + (x ** 2) } | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
raise [l, c, z].inspect if (sd.nan?) | |
return {"a#{k}" => a, "s#{k}" => sd} | |
end | |
def sum(l) | |
return l.inject { |t, x| t + x } | |
end | |
def avg(l) | |
return sum(l).to_f / l.size | |
end | |
def avg2(l) | |
return l.empty? ? 0 : avg(l) | |
end | |
def stat2(k, l) | |
return l.empty? ? {"a#{k}" => 0, "s#{k}" => 0} : stat(k, l) | |
end | |
def diffs(l) | |
a = stat2(nil, l) | |
l.sort! | |
m = l[l.size / 2] | |
ma = avg2(l.select { |x| x > m }) | |
am = a['a'] / m | |
l2 = (1...l.size).map { |x| l[x] - l[x - 1] } | |
a2 = stat2('2', l2) | |
mx = l.max | |
return {'m' => m, | |
'ma' => ma, | |
'am' => am, | |
'mn' => l.min, | |
'mx' => mx, | |
'c' => l.size, | |
}.merge(a).merge(a2) | |
end | |
def filter(l, b = nil, t = nil, t2 = nil) | |
log({'b' => b, 't' => t}) | |
l = l.select { |x| x[0][0] == b.to_s } if (!b.nil?) | |
return l.sort_by { |x| [x[0].length, x[1]] }.reverse[0...t] if (!t.nil?) | |
return l.select { |x| x[0].length >= t2 } if (!t2.nil?) | |
return l | |
end | |
def review3(l, w, b = 1) | |
l2 = [] | |
l3 = [] | |
(0...l.size).each_with_index \ | |
{ | |
|j| | |
x = l[j] | |
ns = x['ns'] | |
l1 = runs(ns) | |
case w | |
when 1 | |
l1 = filter(l1, b, 3) | |
when 2 | |
l1 = filter(l1, b, 10) | |
when 3 | |
l1 = filter(l1, b, 15) | |
when 4 | |
l1 = filter(l1, b, nil, 3) | |
end | |
l3 += (0...l1.size).map { |x| {'j' => j, 'i' => l1[x][1], 'w' => l1[x][0].length} } | |
l2 << diffs(l1.map { |s, i| i }).merge({'hc' => x['hc'], 'cm' => x['cm'], 'cg' => x['cg']}) | |
} | |
return l2 | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def len(ns, p) | |
l = ns.split(p) | |
l = [] if (l.nil?) | |
l.shift if (l[0] == '') | |
return l.map { |x| x.length } | |
end | |
def len1(ns) | |
return len(ns, /0+/) | |
end | |
def len0(ns) | |
return len(ns, /1+/) | |
end | |
def len01(ns) | |
return len1(ns), len0(ns) | |
end | |
def e(ns) | |
return len01(ns).flatten.size.to_f / ns.length | |
end | |
def log2(x) | |
return Math.log(x) / Math.log(2.0) | |
end | |
def lh(ns) | |
nw2 = ns.length / 2 | |
return ns[nw2..-1], ns[0..nw2] | |
end | |
def dlh(ns) | |
nw1, nw2 = lh(ns) | |
d1 = d(nw1) | |
d2 = d(nw2) | |
return d(ns) - 0.5, d1 - 0.5, d2 - 0.5, d1 - d2 | |
end | |
def d4(ns) | |
return d(ns[0..(ns.length / 4)]) | |
end | |
def e4(ns) | |
return e(ns[0..(ns.length / 4)]) | |
end | |
def midpt(ns) | |
w2 = ns.length / 4 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def midpt1(ns) | |
return midpt(ns.reverse) | |
end | |
def d1(s) | |
c = s.split('').select { |x| x == '1' }.size | |
return c | |
end | |
def midpt2(ns) | |
w2 = d1(ns) / 2 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def not0(x) | |
return x == 0 ? 1 : x | |
end | |
def fix(l1) | |
l1[0].keys.each \ | |
{ | |
|k| | |
l = l1.map { |x| x[k] }.select { |x| x.finite? } | |
next if (l.size == l1.size) | |
m = avg(l) | |
# p("#{k} #{m}") | |
l1.each { |x| x[k] = m if (!x[k].finite?) } | |
} | |
end | |
def data(l, l1 = [], l2 = [], l3 = [], l4 = []) | |
l.each \ | |
{ | |
|x| | |
ns = x['ns'] | |
d, d1, d2, d12 = dlh(ns) | |
mp = midpt(ns) - 0.5 | |
mp2 = midpt2(ns) - 0.5 | |
s1 = mp.abs / not0(d12.abs) | |
s3 = mp.abs * d12.abs | |
s5 = -mp * d12 | |
t1 = mp2.abs / not0(d12.abs) | |
t3 = mp2.abs * d12.abs | |
t5 = -mp2 * d12 | |
mx0 = len0(ns).max | |
mx0 = 0 if (mx0.nil?) | |
mx1 = len1(ns).max | |
mx01 = [mx0, mx1].max | |
mx = log2(log2(ns.to_i(2).to_f)) | |
a0 = avg(len0(ns)) | |
a1 = avg(len1(ns)) | |
a01 = avg(len01(ns).flatten) | |
a = { | |
's1' => s1, | |
's2' => 1 / s1, | |
's4' => Math.log(1 / s3), | |
's3' => s3, | |
's5' => -s5, | |
't1' => t1, | |
't2' => 1 / t1, | |
't4' => Math.log(1 / t3), | |
't3' => t3, | |
't5' => -t5, | |
} | |
l1 << a | |
a2 = { | |
'a0' => a0, | |
'a1' => a1, | |
'a01' => a01, | |
'a01t' => (a0 + a1) / 2, | |
'mx0' => mx0, | |
'mx1' => mx1, | |
'mx01' => mx01, | |
} | |
l2 << a2 | |
a3 = { | |
'e4' => e4(ns) - 0.5, | |
'd4' => d4(ns) - 0.5, | |
'd12' => d12, | |
'd1' => d1, | |
'd1a' => d1.abs, | |
'd2' => d2, | |
'd2a' => d2.abs, | |
'd' => d, | |
'da' => d.abs | |
} | |
l3 << a3 | |
a4 = { | |
'a01d' => a0 + a1 - 2 * a01, | |
'mp' => mp, | |
'mpa' => mp.abs, | |
'mp2' => mp2, | |
'mp2a' => mp2.abs, | |
'a0m' => a0 / mx, | |
'a1m' => a1 / mx, | |
} | |
l4 << a4 | |
} | |
fix(l1) | |
return (0...l.size).map { |x| [l1[x], l2[x], l3[x], l4[x]].inject({}) { |h, x| h.merge(x) } | |
.merge({'hc' => l[x]['hc'], 'cm' => l[x]['cm'], 'cg' => l[x]['cg']}) } | |
end | |
def axes2(x2) | |
return x2.inject({}) { |h, x| h.merge({x => 'axes x1y2' }) } | |
end | |
def dot(x, z) | |
t = z['c'] | |
(z.keys - ['c']).each { |v| t += z[v] * x[v] } | |
return t | |
end | |
def predict(l, vy, z) | |
l.each { |x| x["#{vy}_y"] = dot(x, z) } | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def corr(l, y1, yp = nil) | |
yp, ye = ["#{y1}_y", "#{y1}_e"] if (yp.nil?) | |
xav = av(l.map { |x| x[y1] }) | |
yav = av(l.map { |x| x[yp] }) | |
tx = ty = txy = e = 0.0 | |
m = nil | |
l.each \ | |
{ | |
|z| | |
x = z[y1] | |
y = z[yp] | |
z[ye] = (x - y).abs if (!ye.nil?) | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
} | |
d = Math.sqrt(tx) * Math.sqrt(ty) | |
$z = 1e-4 | |
r = txy / (d.abs < $z ? $z : d) | |
return r | |
end | |
def fit(l, vx, vy) | |
a = {} | |
(vx + [vy]).each { |v| a[v] = l.map { |x| x[v] }.to_vector() } | |
begin | |
r = Statsample::Regression.multiple(a.to_dataset(), vy) | |
# rescue Statsample::Regression::LinearDependency | |
# return nil | |
end | |
# $stderr.puts(r.summary) | |
z = r.coeffs.merge({'c' => r.constant}) | |
predict(l, vy, z) | |
r = corr(l, vy) | |
return r, z | |
end | |
def copy(l, l1, k, n = nil) | |
k2 = n.nil? ? k : "#{k}_#{n}" | |
l1.size.times \ | |
{ | |
|i| | |
l[i] = {} if (l[i].nil?) | |
l[i][k2] = l1[i][k] | |
} | |
end | |
def keys(l, ks) | |
return l.map { |x| Hash[[ks, x.values_at(*ks)].transpose] } | |
end | |
def fit3(l, l1, k1, mn2 = 0.015, mn1 = 0.025) | |
l2 = [] | |
l1.keys.each \ | |
{ | |
|w| | |
(l3 = l1[w][0].keys - ['hc', 'cm', 'cg']).each \ | |
{ | |
|k| | |
r = corr(l1[w], k, k1) | |
l2 << [k, w, r] | |
} | |
# $stderr.puts([w, l3.size].inspect) | |
} | |
l4 = l2.select { |x| !x[2].finite? } | |
raise l4.inspect if (!l4.empty?) | |
l = l.map { |x| Hash[k1, x[k1]] } | |
l3 = [] | |
l2.sort_by { |x| -x[2].abs }.select { |x| x[2].abs > mn1 }.each \ | |
{ | |
|k, w, r| | |
next if ([l3.map { |x| (x - r).abs }.min, mn2].compact[0] < mn2) | |
p([k, w, r]) | |
copy(l, l1[w], k, w) | |
l3 << r | |
} | |
$stderr.puts("#{l3.size} / #{l2.size}") | |
r, z = fit(l, l[0].keys - [k1], k1) | |
p([r, Hash[z.sort_by { |x| -x[1].abs }]]) | |
return keys(l, [k1, "#{k1}_y", "#{k1}_e"]) | |
end | |
def var2(l, l1) | |
l2 = fit3(l, l1, 'hc', 0.025) | |
l3 = fit3(l, l1, 'cm', 0.03) | |
l4 = fit3(l, l1, 'cg', 0.025) | |
l1 = (0...l2.size).map { |i| l2[i].merge(l3[i]).merge(l4[i]) } | |
copy(l1, l, 'c') | |
['hc', 'cm', 'cg'].each_with_index \ | |
{ | |
|k, i| | |
outafn(l1.sort_by { |x| x[k] }, axes2(['hc', 'hc_y', 'hc_e']), i + 1, "by #{k}") | |
outafn(l1.sort_by { |x| x["#{k}_e"] }, axes2(['hc', 'hc_y', 'hc_e']), "#{i + 1}b", "by #{k}_e") | |
} | |
end | |
def f2(n) | |
return n.odd? ? (n * 3 + 1) / 2 : n / 2 | |
end | |
def adv(l) | |
l.each \ | |
{ | |
|x| | |
1.times { x['ns'] = f2(x['ns'].to_i(2)).to_s(2) } | |
# x['hc'] = x['c'].to_f / x['ns'].length | |
} | |
return l | |
end | |
def avgs(l) | |
ks = l[0][0].keys | |
l1 = [] | |
l[0].size.times { |i| l1[i] = Hash[[ks, ks.map { |k| avg(l.map { |x| x[i][k] }) }].transpose] } | |
return l1 | |
end | |
def smooth(l, fn) | |
l1 = [] | |
l1 << fn.call(l) | |
19.times { l1 << fn.call(adv(l)) } | |
return avgs(l1) | |
end | |
def fit3b(l) | |
a = {} | |
(1..4).each { |w| [0, 1].each { |b| a["#{w}_#{b}"] = smooth(l, lambda { |l| review3(l, w, b)}) } } | |
a[nil] = smooth(l, lambda { |l| data(l) }) | |
var2(l, a) | |
end | |
def bitdiff(ns1, ns2) | |
mn = [ns1.length, ns2.length].min | |
b1 = ns1[-mn..-1].to_i(2) | |
b2 = ns2[-mn..-1].to_i(2) | |
s = (b1 ^ b2).to_s(2) | |
t = 0 | |
s.length.times { |i| t += s[i].to_i } | |
return t | |
end | |
def prefixdiff(x, ns1, ns2, k, n = '') | |
x["pd#{n}"] = [bitdiff(x[k], ns1), bitdiff(x[k], ns2)].min.to_f / [ns1.length + 1, ns2.length + 1].max | |
end | |
def prefixdiffs(l, k = 'ns', n = '') | |
l = l.sort_by { |x| x[k] } | |
l.size.times \ | |
{ | |
|x| | |
prefixdiff(l[x], x == 0 ? '' : l[x - 1][k], x == l.size - 1 ? '' : l[x + 1][k], k, n) | |
} | |
end | |
def prefixdiffs2(l) | |
prefixdiffs(l, 'ns', 1) | |
prefixdiffs(l, 'ns2', 2) | |
l.each { |x| x['pd'] = [x['pd1'], x['pd2']].min } | |
end | |
def sample2(l, n) | |
d = (l.size - 1).to_f / n | |
l2 = [] | |
(n + 1).times \ | |
{ | |
|i| | |
j = (i * d).to_i | |
l2 << l[j] | |
} | |
return l2 | |
end | |
def merge() | |
return load(['x']) if (File.exists?(fn = 'gnuplotx.cmd')) | |
l = load((1..5).map { |x| "#{x}-1"}) | |
l.each \ | |
{ | |
|x| | |
ns = x['ns'] | |
n = ns.to_i(2) | |
n = f2(n) | |
ns = n.to_s(2) | |
ns2 = ns.reverse | |
x['ns'] = ns | |
x['ns2'] = ns2 | |
x['hc'] = x['c'].to_f / ns.length | |
} | |
prefixdiffs(l) | |
l = sample2(l.select { |x| x['pd'] > 0.40 }, 1000) | |
l.sort_by! { |x| x['hc'] } | |
outd(f = File.open(fn, 'w'), l) | |
return l | |
end | |
fit3b(merge()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment