require 'statsample' | |
def outin(fn) | |
l = (f = File.open(fn)).readlines | |
f.close | |
raise if (l.shift.chop != '$dat << eof') | |
k = l.shift.split | |
i = l.index("eof\n") | |
raise if (i.nil?) | |
l[i..-1] = [] | |
l2 = l.map { |x| Hash[[k, x.split.map { |x| #x.sub!('NaN', '0'); | |
Kernel.eval(x) }].transpose] } | |
l2.each { |x| x['ns'] = x['ns'].to_s } | |
$stderr.puts([fn, l2.size].inspect) | |
return l2 | |
end | |
def load(l1 = []) | |
l = [] | |
l1.each \ | |
{ | |
|x| | |
l.concat(outin("gnuplot#{x}.cmd")) | |
} | |
return l | |
end | |
def outd(f, l) | |
f.puts('$dat << eof') | |
k = l[0].keys | |
f.puts(k.join("\t")) | |
l.each { |x| f.puts(x.values.join("\t")) } | |
f.puts('eof') | |
return k | |
end | |
def outa(f, l, a = {}, t = '') | |
k = outd(f, l) | |
f.puts("set colors classic; set title '#{t}'; ") | |
f.puts("set ytics nomirror; set y2tics;") | |
f.puts("plot \\") | |
k, ct = [k - ['t'], "(column('t')):"] if (k.member?('t')) | |
k.each \ | |
{ | |
|x| | |
next if (a.member?(x) && a[x].nil?) | |
opt = a.fetch(x, '') | |
opt += ' lw 2 ' if (!opt.include?('lw')) | |
opt += ' with line ' if (!opt.include?('with') && !opt.include?('pt')) | |
f.puts("'$dat' using #{ct}(column('#{x}')) #{opt} title '#{x}',\\") | |
} | |
f.puts | |
# f.puts("reset; pause -1;") | |
end | |
def outafn(l, a = {}, fno = nil, t = '') | |
fn = "gnuplot#{fno}.cmd" | |
outa(f = File.open(fn, 'w'), l, a, t) | |
f.close | |
$stderr.puts([fn, t, l.size].inspect) | |
end | |
def runs(ns) | |
ns = ns.reverse | |
s = '' | |
l = [] | |
ns.length.times \ | |
{ | |
|i| | |
s += ns[i] | |
l, s = [l + [[s, i]], ''] if (ns[i + 1] != ns[i]) | |
} | |
return l | |
end | |
def log(x) | |
return | |
$x = [] if ($x.nil?) | |
return if ($x.member?(x)) | |
$x << x | |
$stderr.puts(x.inspect) | |
end | |
def stat(k, l) | |
t = l.inject { |a, x| a + x } | |
t2 = l.inject(0) { |a, x| a + (x ** 2) } | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
raise [l, c, z].inspect if (sd.nan?) | |
return {"a#{k}" => a, "s#{k}" => sd} | |
end | |
def sum(l) | |
return l.inject { |t, x| t + x } | |
end | |
def avg(l) | |
return sum(l).to_f / l.size | |
end | |
def avg2(l) | |
return l.empty? ? 0 : avg(l) | |
end | |
def stat2(k, l) | |
return l.empty? ? {"a#{k}" => 0, "s#{k}" => 0} : stat(k, l) | |
end | |
def diffs(l) | |
a = stat2(nil, l) | |
l.sort! | |
m = l[l.size / 2] | |
ma = avg2(l.select { |x| x > m }) | |
am = a['a'] / m | |
l2 = (1...l.size).map { |x| l[x] - l[x - 1] } | |
a2 = stat2('2', l2) | |
mx = l.max | |
return {'m' => m, | |
'ma' => ma, | |
'am' => am, | |
'mn' => l.min, | |
'mx' => mx, | |
'c' => l.size, | |
}.merge(a).merge(a2) | |
end | |
def filter(l, b = nil, t = nil, t2 = nil) | |
log({'b' => b, 't' => t}) | |
l = l.select { |x| x[0][0] == b.to_s } if (!b.nil?) | |
return l.sort_by { |x| [x[0].length, x[1]] }.reverse[0...t] if (!t.nil?) | |
return l.select { |x| x[0].length >= t2 } if (!t2.nil?) | |
return l | |
end | |
def review3(l, w, b = 1) | |
l2 = [] | |
l3 = [] | |
(0...l.size).each_with_index \ | |
{ | |
|j| | |
x = l[j] | |
ns = x['ns'] | |
l1 = runs(ns) | |
case w | |
when 1 | |
l1 = filter(l1, b, 3) | |
when 2 | |
l1 = filter(l1, b, 10) | |
when 3 | |
l1 = filter(l1, b, 15) | |
when 4 | |
l1 = filter(l1, b, nil, 3) | |
end | |
l3 += (0...l1.size).map { |x| {'j' => j, 'i' => l1[x][1], 'w' => l1[x][0].length} } | |
l2 << diffs(l1.map { |s, i| i }).merge({'hc' => x['hc'], 'cm' => x['cm'], 'cg' => x['cg']}) | |
} | |
return l2 | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def len(ns, p) | |
l = ns.split(p) | |
l = [] if (l.nil?) | |
l.shift if (l[0] == '') | |
return l.map { |x| x.length } | |
end | |
def len1(ns) | |
return len(ns, /0+/) | |
end | |
def len0(ns) | |
return len(ns, /1+/) | |
end | |
def len01(ns) | |
return len1(ns), len0(ns) | |
end | |
def e(ns) | |
return len01(ns).flatten.size.to_f / ns.length | |
end | |
def log2(x) | |
return Math.log(x) / Math.log(2.0) | |
end | |
def lh(ns) | |
nw2 = ns.length / 2 | |
return ns[nw2..-1], ns[0..nw2] | |
end | |
def dlh(ns) | |
nw1, nw2 = lh(ns) | |
d1 = d(nw1) | |
d2 = d(nw2) | |
return d(ns) - 0.5, d1 - 0.5, d2 - 0.5, d1 - d2 | |
end | |
def d4(ns) | |
return d(ns[0..(ns.length / 4)]) | |
end | |
def e4(ns) | |
return e(ns[0..(ns.length / 4)]) | |
end | |
def midpt(ns) | |
w2 = ns.length / 4 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def midpt1(ns) | |
return midpt(ns.reverse) | |
end | |
def d1(s) | |
c = s.split('').select { |x| x == '1' }.size | |
return c | |
end | |
def midpt2(ns) | |
w2 = d1(ns) / 2 | |
l = ns.split('') | |
i = j = 0 | |
while (i < w2 && j < l.size) | |
i += l[j].to_i | |
j += 1 | |
end | |
return j.to_f / ns.length | |
end | |
def not0(x) | |
return x == 0 ? 1 : x | |
end | |
def fix(l1) | |
l1[0].keys.each \ | |
{ | |
|k| | |
l = l1.map { |x| x[k] }.select { |x| x.finite? } | |
next if (l.size == l1.size) | |
m = avg(l) | |
# p("#{k} #{m}") | |
l1.each { |x| x[k] = m if (!x[k].finite?) } | |
} | |
end | |
def data(l, l1 = [], l2 = [], l3 = [], l4 = []) | |
l.each \ | |
{ | |
|x| | |
ns = x['ns'] | |
d, d1, d2, d12 = dlh(ns) | |
mp = midpt(ns) - 0.5 | |
mp2 = midpt2(ns) - 0.5 | |
s1 = mp.abs / not0(d12.abs) | |
s3 = mp.abs * d12.abs | |
s5 = -mp * d12 | |
t1 = mp2.abs / not0(d12.abs) | |
t3 = mp2.abs * d12.abs | |
t5 = -mp2 * d12 | |
mx0 = len0(ns).max | |
mx0 = 0 if (mx0.nil?) | |
mx1 = len1(ns).max | |
mx01 = [mx0, mx1].max | |
mx = log2(log2(ns.to_i(2).to_f)) | |
a0 = avg(len0(ns)) | |
a1 = avg(len1(ns)) | |
a01 = avg(len01(ns).flatten) | |
a = { | |
's1' => s1, | |
's2' => 1 / s1, | |
's4' => Math.log(1 / s3), | |
's3' => s3, | |
's5' => -s5, | |
't1' => t1, | |
't2' => 1 / t1, | |
't4' => Math.log(1 / t3), | |
't3' => t3, | |
't5' => -t5, | |
} | |
l1 << a | |
a2 = { | |
'a0' => a0, | |
'a1' => a1, | |
'a01' => a01, | |
'a01t' => (a0 + a1) / 2, | |
'mx0' => mx0, | |
'mx1' => mx1, | |
'mx01' => mx01, | |
} | |
l2 << a2 | |
a3 = { | |
'e4' => e4(ns) - 0.5, | |
'd4' => d4(ns) - 0.5, | |
'd12' => d12, | |
'd1' => d1, | |
'd1a' => d1.abs, | |
'd2' => d2, | |
'd2a' => d2.abs, | |
'd' => d, | |
'da' => d.abs | |
} | |
l3 << a3 | |
a4 = { | |
'a01d' => a0 + a1 - 2 * a01, | |
'mp' => mp, | |
'mpa' => mp.abs, | |
'mp2' => mp2, | |
'mp2a' => mp2.abs, | |
'a0m' => a0 / mx, | |
'a1m' => a1 / mx, | |
} | |
l4 << a4 | |
} | |
fix(l1) | |
return (0...l.size).map { |x| [l1[x], l2[x], l3[x], l4[x]].inject({}) { |h, x| h.merge(x) } | |
.merge({'hc' => l[x]['hc'], 'cm' => l[x]['cm'], 'cg' => l[x]['cg']}) } | |
end | |
def axes2(x2) | |
return x2.inject({}) { |h, x| h.merge({x => 'axes x1y2' }) } | |
end | |
def dot(x, z) | |
t = z['c'] | |
(z.keys - ['c']).each { |v| t += z[v] * x[v] } | |
return t | |
end | |
def predict(l, vy, z) | |
l.each { |x| x["#{vy}_y"] = dot(x, z) } | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def corr(l, y1, yp = nil) | |
yp, ye = ["#{y1}_y", "#{y1}_e"] if (yp.nil?) | |
xav = av(l.map { |x| x[y1] }) | |
yav = av(l.map { |x| x[yp] }) | |
tx = ty = txy = e = 0.0 | |
m = nil | |
l.each \ | |
{ | |
|z| | |
x = z[y1] | |
y = z[yp] | |
z[ye] = (x - y).abs if (!ye.nil?) | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
} | |
d = Math.sqrt(tx) * Math.sqrt(ty) | |
$z = 1e-4 | |
r = txy / (d.abs < $z ? $z : d) | |
return r | |
end | |
def fit(l, vx, vy) | |
a = {} | |
(vx + [vy]).each { |v| a[v] = l.map { |x| x[v] }.to_vector() } | |
begin | |
r = Statsample::Regression.multiple(a.to_dataset(), vy) | |
# rescue Statsample::Regression::LinearDependency | |
# return nil | |
end | |
# $stderr.puts(r.summary) | |
z = r.coeffs.merge({'c' => r.constant}) | |
predict(l, vy, z) | |
r = corr(l, vy) | |
return r, z | |
end | |
def copy(l, l1, k, n = nil) | |
k2 = n.nil? ? k : "#{k}_#{n}" | |
l1.size.times \ | |
{ | |
|i| | |
l[i] = {} if (l[i].nil?) | |
l[i][k2] = l1[i][k] | |
} | |
end | |
def keys(l, ks) | |
return l.map { |x| Hash[[ks, x.values_at(*ks)].transpose] } | |
end | |
def fit3(l, l1, k1, mn2 = 0.015, mn1 = 0.025) | |
l2 = [] | |
l1.keys.each \ | |
{ | |
|w| | |
(l3 = l1[w][0].keys - ['hc', 'cm', 'cg']).each \ | |
{ | |
|k| | |
r = corr(l1[w], k, k1) | |
l2 << [k, w, r] | |
} | |
# $stderr.puts([w, l3.size].inspect) | |
} | |
l4 = l2.select { |x| !x[2].finite? } | |
raise l4.inspect if (!l4.empty?) | |
l = l.map { |x| Hash[k1, x[k1]] } | |
l3 = [] | |
l2.sort_by { |x| -x[2].abs }.select { |x| x[2].abs > mn1 }.each \ | |
{ | |
|k, w, r| | |
next if ([l3.map { |x| (x - r).abs }.min, mn2].compact[0] < mn2) | |
p([k, w, r]) | |
copy(l, l1[w], k, w) | |
l3 << r | |
} | |
$stderr.puts("#{l3.size} / #{l2.size}") | |
r, z = fit(l, l[0].keys - [k1], k1) | |
p([r, Hash[z.sort_by { |x| -x[1].abs }]]) | |
return keys(l, [k1, "#{k1}_y", "#{k1}_e"]) | |
end | |
def var2(l, l1) | |
l2 = fit3(l, l1, 'hc', 0.025) | |
l3 = fit3(l, l1, 'cm', 0.03) | |
l4 = fit3(l, l1, 'cg', 0.025) | |
l1 = (0...l2.size).map { |i| l2[i].merge(l3[i]).merge(l4[i]) } | |
copy(l1, l, 'c') | |
['hc', 'cm', 'cg'].each_with_index \ | |
{ | |
|k, i| | |
outafn(l1.sort_by { |x| x[k] }, axes2(['hc', 'hc_y', 'hc_e']), i + 1, "by #{k}") | |
outafn(l1.sort_by { |x| x["#{k}_e"] }, axes2(['hc', 'hc_y', 'hc_e']), "#{i + 1}b", "by #{k}_e") | |
} | |
end | |
def f2(n) | |
return n.odd? ? (n * 3 + 1) / 2 : n / 2 | |
end | |
def adv(l) | |
l.each \ | |
{ | |
|x| | |
1.times { x['ns'] = f2(x['ns'].to_i(2)).to_s(2) } | |
# x['hc'] = x['c'].to_f / x['ns'].length | |
} | |
return l | |
end | |
def avgs(l) | |
ks = l[0][0].keys | |
l1 = [] | |
l[0].size.times { |i| l1[i] = Hash[[ks, ks.map { |k| avg(l.map { |x| x[i][k] }) }].transpose] } | |
return l1 | |
end | |
def smooth(l, fn) | |
l1 = [] | |
l1 << fn.call(l) | |
19.times { l1 << fn.call(adv(l)) } | |
return avgs(l1) | |
end | |
def fit3b(l) | |
a = {} | |
(1..4).each { |w| [0, 1].each { |b| a["#{w}_#{b}"] = smooth(l, lambda { |l| review3(l, w, b)}) } } | |
a[nil] = smooth(l, lambda { |l| data(l) }) | |
var2(l, a) | |
end | |
def bitdiff(ns1, ns2) | |
mn = [ns1.length, ns2.length].min | |
b1 = ns1[-mn..-1].to_i(2) | |
b2 = ns2[-mn..-1].to_i(2) | |
s = (b1 ^ b2).to_s(2) | |
t = 0 | |
s.length.times { |i| t += s[i].to_i } | |
return t | |
end | |
def prefixdiff(x, ns1, ns2, k, n = '') | |
x["pd#{n}"] = [bitdiff(x[k], ns1), bitdiff(x[k], ns2)].min.to_f / [ns1.length + 1, ns2.length + 1].max | |
end | |
def prefixdiffs(l, k = 'ns', n = '') | |
l = l.sort_by { |x| x[k] } | |
l.size.times \ | |
{ | |
|x| | |
prefixdiff(l[x], x == 0 ? '' : l[x - 1][k], x == l.size - 1 ? '' : l[x + 1][k], k, n) | |
} | |
end | |
def prefixdiffs2(l) | |
prefixdiffs(l, 'ns', 1) | |
prefixdiffs(l, 'ns2', 2) | |
l.each { |x| x['pd'] = [x['pd1'], x['pd2']].min } | |
end | |
def sample2(l, n) | |
d = (l.size - 1).to_f / n | |
l2 = [] | |
(n + 1).times \ | |
{ | |
|i| | |
j = (i * d).to_i | |
l2 << l[j] | |
} | |
return l2 | |
end | |
def merge() | |
return load(['x']) if (File.exists?(fn = 'gnuplotx.cmd')) | |
l = load((1..5).map { |x| "#{x}-1"}) | |
l.each \ | |
{ | |
|x| | |
ns = x['ns'] | |
n = ns.to_i(2) | |
n = f2(n) | |
ns = n.to_s(2) | |
ns2 = ns.reverse | |
x['ns'] = ns | |
x['ns2'] = ns2 | |
x['hc'] = x['c'].to_f / ns.length | |
} | |
prefixdiffs(l) | |
l = sample2(l.select { |x| x['pd'] > 0.40 }, 1000) | |
l.sort_by! { |x| x['hc'] } | |
outd(f = File.open(fn, 'w'), l) | |
return l | |
end | |
fit3b(merge()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment