Created
March 25, 2017 00:57
-
-
Save vznvzn/b2b39c54f73fa452f2424cd6afdbbd80 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'statsample' | |
def f2(n) | |
n = (n * 3 + 1) / 2 while (n.odd?) | |
n /= 2 while (n.even?) | |
return n | |
end | |
def adv(x, l2) | |
x['n'] = x['nb'].to_i(2) | |
n1 = n = x['n'] | |
l = [n] | |
while (n >= n1 && n != 1) | |
n = f2(n) | |
l << n | |
end | |
x['ls'] = l.size | |
x['ns'] = x['n'].to_s(2).length | |
x['h2'] = (x['ls'].to_f / x['ns'] * 50).to_i | |
x['d'] = d(x['nb']) | |
l2 << x | |
return x | |
end | |
def stat(l) | |
l = [0] if (l.empty?) | |
t = t2 = 0 | |
l.each \ | |
{ | |
|x| | |
t += x | |
t2 += x ** 2 | |
} | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
return a, sd, l.max.to_f, l.min.to_f | |
end | |
def init(w, f) | |
s = (0...w).map { rand(2).to_s }.join | |
s[0, 1] = '1' if (f) | |
return s | |
end | |
def dist(w, f) | |
l = [] | |
l2 = [] | |
f0, f1, f2 = f | |
20.times { l << adv({'nb' => init(w, f0)}, l2) } | |
c = 2e3.to_i | |
seen = {} | |
i = n = 0 | |
while (n < c) | |
i += 1 | |
l.sort_by! { |x| -x['ls'] } | |
t = f1 ? [l.size, 400].min : l.size / 10 | |
case i % 3 | |
when 0 | |
x = l[rand(t)] | |
s = x['nb'] | |
r = f0 ? (rand(s.length - 1) + 1) : rand(s.length) | |
s[r, 1] = (s[r, 1].to_i ^ 1).to_s | |
when 1 | |
x = l[rand(t)] | |
y = l[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (rand(2) == 0) ? sx[j, 1] : sy[j, 1] | |
} | |
when 2 | |
x = l[rand(t)] | |
y = l[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
r = rand(w + 1) | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (j < r) ? sx[j, 1] : sy[j, 1] | |
} | |
end | |
next if (s.to_i == 0) | |
if (f2) then | |
next if (seen.member?(s)) | |
seen[s] = nil | |
end | |
l.pop if (l.size >= 500) | |
l << adv({'nb' => s}, l2) | |
n += 1 | |
end | |
# $stderr.puts("#{i - n} dups") | |
return l2 | |
end | |
def hist(l) | |
h = {} | |
l.each \ | |
{ | |
|x| | |
h[x] = h.fetch(x, 0) + 1 | |
} | |
return h.sort | |
end | |
def range(l, k) | |
l1 = l.map { |x| x[k]} | |
a, sd, mx, mn = stat(l1) | |
h = hist(l1) | |
x = h.max_by { |k, v| v } | |
return {"#{k}_a" => a, "#{k}_sd" => sd, | |
"#{k}_mx" => mx, "#{k}_mn" => mn, | |
"#{k}_md" => x[0]} | |
end | |
def stat2(l, t, n) | |
return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l)[0..2].map { |x| x / t }].transpose] | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def data(x) | |
n = x['n'] | |
ns = n.to_s(2) | |
nl = ns.length | |
m = nl / 2 | |
nsh = ns[0..m] | |
nsl = ns[m..-1] | |
asdm1 = stat2(ns.split(/0+/).map { |x| x.length }, nl, 1) | |
l1 = ns.split(/1+/) | |
l1.shift | |
asdm0 = stat2(l1.map { |x| x.length }, nl, 0) | |
return {'n' => x['n'], 'ns' => x['ns'], 'h2' => x['h2'], 'd' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm1).merge(asdm0) | |
end | |
def fit(l, y, lx) | |
a = {} | |
(lx + [y]).each { |x| a[x] = l.map { |b| b[x] }.to_vector() } | |
ds = a.to_dataset() | |
r = Statsample::Regression.multiple(ds, y) | |
# $stderr.puts(r.summary) | |
return r.coeffs.merge({'c' => r.constant}) | |
end | |
def predict(l, y, z) | |
l.each \ | |
{ | |
|x| | |
t = z['c'] | |
(z.keys - ['c']).each { |k| t += z[k] * x[k] } | |
x["#{y}_p"] = t | |
} | |
end | |
def solve(l, y) | |
z = fit(l, 'h2', ['d', 'dh', 'dl', 'a0', 'sd0', 'mx0', 'a1', 'sd1', 'mx1']) | |
predict(l, 'h2', z) | |
end | |
def out1(fn, l) | |
f = File.open(fn, 'w') | |
f.puts(l[0].keys.join("\t")) | |
l.each { |x| f.puts(x.values.join("\t")) } | |
f.close | |
end | |
def balance(l, f1, f2) | |
h = {} | |
l.each \ | |
{ | |
|x| | |
h[x['h2']] = [] if (!h.member?(x['h2'])) | |
h[x['h2']] << x | |
} | |
l2 = [] | |
c = 10 | |
f = {'n' => f1, 'd' => f2} | |
h.sort.select { |k, v| v.size >= c }.each \ | |
{ | |
|k, v| | |
['n', 'd'].each \ | |
{ | |
|k| | |
next if (!f[k]) | |
v.sort_by! { |x| x[k] } | |
c.times \ | |
{ | |
|i| | |
j = ((i.to_f / (c - 1)) * (v.size - 1)).to_i | |
next if (v[j].member?('.')) | |
v[j]['.'] = nil | |
l2 << v[j] | |
} | |
# c.times { break if (v.empty?); l2 << v.delete_at(rand(v.size)) } | |
} | |
} | |
return l2 | |
end | |
def balance2(l) | |
h = {} | |
c = 10 | |
l.each \ | |
{ | |
|x| | |
h[x['h2']] = [] if (!h.member?(x['h2'])) | |
h[x['h2']] << x if (h[x['h2']].size < c) | |
} | |
l2 = [] | |
h.sort.select { |k, v| v.size == c }.each \ | |
{ | |
|k, v| | |
l2 += v | |
} | |
return l2 | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def coef(l) | |
l = l.map { |x| data(x) } | |
begin | |
solve(l, 'h2') | |
rescue Statsample::Regression::LinearDependency | |
return nil | |
end | |
xav = av(l.map { |x| x['h2'] }) | |
yav = av(l.map { |x| x['h2_p'] }) | |
tx = ty = txy = e = 0.0 | |
l.each \ | |
{ | |
|z| | |
x = z['h2'] | |
y = z['h2_p'] | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
e += (x - y) ** 2 | |
} | |
r = txy / (Math.sqrt(tx) * Math.sqrt(ty)) | |
e /= l.size | |
return r, e | |
end | |
def tobool(x) | |
return (0..2).map { |y| x[y, 1] == '1' } | |
end | |
def out(fn, a) | |
f = File.open(fn, 'a') | |
f.puts(a.keys.join("\t")) if (f.size == 0) | |
f.puts(a.values.join("\t")) | |
f.close | |
end | |
def avg(a, k, x) | |
return if (x.nil?) | |
a[k] = {'t' => 0.0, 'c' => 0.0} if (!a.member?(k)) | |
a[k]['t'] += x | |
a[k]['c'] += 1 | |
a[k]['a'] = a[k]['t'] / a[k]['c'] | |
end | |
fn = 'out.txt' | |
File.open(fn, 'w').close | |
w = 20 | |
a = {} | |
loop \ | |
{ | |
l = dist(w, [true, true, true]) | |
l = balance2(l) | |
r, e = coef(l) | |
out(fn, {'r' => r, 'e' => e, 'w' => w, 'c' => l.size}) if (!r.nil?) | |
w += 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment