Created
May 3, 2017 03:28
-
-
Save vznvzn/2bfba225a8cd9a121085571eab6ff927 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'statsample' | |
def f2(n) | |
n = (n * 3 + 1) / 2 while (n.odd?) | |
n /= 2 while (n.even?) | |
return n | |
end | |
def setx(x, mn, mx) | |
x['x'] = (x['r'] - mn) / (mx - mn) * 9.99 if (!mn.nil? && mn != mx) | |
end | |
def adv(x, seen, l1, c, mn = nil, mx = nil) | |
x['n'] = x['nb'].to_i(2) | |
n1 = n = x['n'] | |
return if (seen.member?(n)) | |
seen[n] = x | |
l = [n] | |
while (n >= n1 && n != 1 && l.size < c) | |
n = f2(n) | |
l << n | |
end | |
x['n_2'] = n | |
x['ls'] = l.size | |
x['ns'] = x['n'].to_s(2).length | |
x['r'] = nil | |
if (l.size == c) then | |
x['r'] = n.to_s(2).length.to_f / x['ns'] | |
setx(x, mn, mx) if (!mn.nil?) | |
end | |
x['.'] = x['x'].nil? ? '?' : x['x'].to_i.to_s | |
l1 << x | |
end | |
def stat(l) | |
l = [0] if (l.empty?) | |
t = t2 = 0 | |
l.each \ | |
{ | |
|x| | |
t += x | |
t2 += x ** 2 | |
} | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
return a, sd, l.max.to_f, l.min.to_f | |
end | |
def init(w, f) | |
s = (0...(w - 1)).map { rand(2).to_s }.join + '1' | |
s[0, 1] = '1' if (f) | |
return s | |
end | |
def freq(l, h = {}) | |
l1 = l.map { |x| x['x'].nil? ? nil : x['x'].to_i }.compact | |
h.replace(Hash[hist(l1)]) | |
# $stderr.puts(h.inspect) | |
l1 = h.sort_by { |k, v| v }.reverse.map { |x| x[0] } | |
h2 = Hash[[l1, (0...l1.size).to_a].transpose] | |
$stderr.puts(h.sort.map { |k, v| [k, v, h2.fetch(k, 0) ] }.inspect) | |
return h2 | |
end | |
def minmax(l) | |
l = l.map { |x| x['r'] }.compact | |
return l.min, l.max | |
end | |
def rescale(l, mn, mx) | |
l.each \ | |
{ | |
|x| | |
setx(x, mn, mx) if (!x['r'].nil?) | |
} | |
end | |
def hist(l) | |
h = {} | |
l.each \ | |
{ | |
|x| | |
h[x] = h.fetch(x, 0) + 1 | |
} | |
return h.sort | |
end | |
def dist(w, c, c1) | |
l1 = [] | |
f0 = f1 = true | |
seen = {} | |
20.times { adv({'nb' => init(w, f0)}, seen, l1, c1) } | |
i = n = z = 0 | |
mn = mx = nil | |
h1 = {} | |
h = {} | |
while (n < c) | |
i += 1 | |
if (i % 100 == 0) then | |
mn, mx = minmax(seen.values) | |
rescale(seen.values.select { |x| x.member?('x') }, mn, mx) | |
h1 = freq(l1.select { |x| !x.member?('*') && x.member?('x') }, h) | |
h2 = freq(seen.values.select { |x| x.member?('*') }) | |
$stderr.puts([h1.size, h2.size].inspect) | |
$stderr.puts | |
end | |
l1 = l1.sort_by { |x| [x['ls'], x.member?('x') ? h2.fetch(x['x'].to_i, 9) : 0] }.reverse | |
t = f1 ? l1.size / 4 : [l1.size, 400].min | |
case i % 3 | |
when 0 | |
x = l1[rand(t)] | |
s = x['nb'].dup | |
r = f0 ? (rand(s.length - 2) + 1) : rand(s.length) | |
s[r, 1] = (s[r, 1].to_i ^ 1).to_s | |
a = x['.'] | |
when 1 | |
x = l1[rand(t)] | |
y = l1[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (rand(2) == 0) ? sx[j, 1] : sy[j, 1] | |
} | |
a = x['.'] + ' x ' + y['.'] | |
when 2 | |
x = l1[rand(t)] | |
y = l1[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
r = rand(w + 1) | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (j < r) ? sx[j, 1] : sy[j, 1] | |
} | |
a = x['.'] + ' y ' + y['.'] | |
end | |
# p(a) | |
next if (s.to_i == 0) | |
l1.pop if (l1.size >= 500) | |
if (h1.size >= 5) then | |
x = l1.select { |x| x.member?('x') && !x.member?('*') && h.fetch(x['x'].to_i, 0) >= 3 }.max_by{ |x| h2.fetch(x['x'].to_i, 9) } | |
if (!x.nil?) then | |
x['*'] = nil | |
h[x['x'].to_i] -= 1 | |
end | |
end | |
adv({'nb' => s}, seen, l1, c1, mn, mx) | |
n += 1 | |
end | |
mn, mx = minmax(l = seen.values.select { |x| x.member?('x') }) | |
rescale(l, mn, mx) | |
return l | |
end | |
def stat2(l, t, n) | |
return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l)[0..2].map { |x| x / t }].transpose] | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def data(n) | |
ns = n.to_s(2) | |
nl = ns.length | |
m = nl / 2 | |
nsh = ns[0..m] | |
nsl = ns[m..-1] | |
asdm1 = stat2(ns.split(/0+/).map { |x| x.length }, nl, 1) | |
l1 = ns.split(/1+/) | |
l1.shift | |
asdm0 = stat2(l1.map { |x| x.length }, nl, 0) | |
return {'d' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm1).merge(asdm0) | |
end | |
def fit(l, y, lx) | |
a = {} | |
(lx + [y]).each { |x| a[x] = l.map { |b| b[x] }.to_vector() } | |
ds = a.to_dataset() | |
r = Statsample::Regression.multiple(ds, y) | |
# $stderr.puts(r.summary) | |
return r.coeffs.merge({'c' => r.constant}) | |
end | |
def predict(l, y, z) | |
l.each \ | |
{ | |
|x| | |
t = z['c'] | |
(z.keys - ['c']).each { |k| t += z[k] * x[k] } | |
x["#{y}_p"] = t | |
} | |
end | |
def solve(l, y, x) | |
z = fit(l, y, x) | |
predict(l, y, z) | |
return z | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def corr(l, y1, yp) | |
xav = av(l.map { |x| x[y1] }) | |
yav = av(l.map { |x| x[yp] }) | |
tx = ty = txy = e = 0.0 | |
l.each \ | |
{ | |
|z| | |
x = z[y1] | |
y = z[yp] | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
e += (x - y) ** 2 | |
} | |
r = txy / (Math.sqrt(tx) * Math.sqrt(ty)) | |
e /= l.size | |
return r | |
end | |
def center(l, y) | |
a, sd = stat(l.map { |x| x[y] }) | |
l.each \ | |
{ | |
|x| | |
x["#{y}z"] = (x[y] - a) / sd | |
} | |
end | |
def balance1(l) | |
c = 50 | |
m = 10.to_f | |
d = m / c | |
x0 = 0.0 | |
l2 = [] | |
seen = {} | |
while (x0 <= m) | |
l.sort_by! { |x| (x['x'] - x0).abs } | |
x0 += d | |
l[0...10].each \ | |
{ | |
|x| | |
next if (seen.member?(x['n'])) | |
seen[x['n']] = nil | |
l2 << x | |
} | |
end | |
return l2 | |
end | |
def coef(l, y0, x0) | |
r = nil | |
begin | |
z = solve(l, y0, x0) | |
r = corr(l, y0, "#{y0}_p") | |
rescue Statsample::Regression::LinearDependency | |
end | |
return {y0 => z.merge({'r' => r})} | |
end | |
def expand(x, l) | |
n = x['n'] | |
while (n != x['n_2']) | |
n1 = n | |
n = n.even? ? n / 2 : n * 3 + 1 | |
l << {'n' => n1, 'n_2' => n, 'x' => n.to_s(2).length.to_f / n1.to_s(2).length } if (n.odd?) | |
end | |
end | |
def reduce(l) | |
l2 = [] | |
l.each_with_index { |x, i| l2 << x if (rand(10) == 0) } | |
return l2 | |
end | |
def parity(l) | |
p = [0, 0] | |
l.each \ | |
{ | |
|x| | |
p[x % 2] += 1 | |
} | |
p(p) | |
end | |
def test(x0, l1) | |
a = {} | |
l = dist(80, 10e3.to_i, 20) | |
a['c1'] = l.size | |
l = balance1(l) | |
a['c2'] = l.size | |
# parity(l.map { |x| x['n']}) | |
l2 = [] | |
l.each { |x| expand(x, l2) } | |
a['c3'] = l2.size | |
# parity(l2.map { |x| x['n']}) | |
# parity(l2.map { |x| x['n_2']}) | |
l1.replace(reduce(l2)) | |
a['c4'] = l1.size | |
$stderr.puts(a.select{ |k| ['c1', 'c2', 'c3', 'c4'].member?(k) }.inspect) | |
l1.each \ | |
{ | |
|x| | |
x.merge!(data(x['n'])) | |
x.merge!(Hash[data(x['n_2']).to_a.map { |k, v| ["#{k}_2", v] }]) | |
} | |
a.merge!(coef(l1, 'x', x0)) | |
f = false | |
x0.each \ | |
{ | |
|x| | |
a.merge!(coef(l1, "#{x}_2", x0)) | |
f = true if (a["#{x}_2"].nil?) | |
} | |
return nil if f | |
(['x'] + x0.map { |x| "#{x}_2"}).each { |x| $stderr.puts([x, a[x]['r']].inspect) } | |
return a | |
end | |
def out(fn, a) | |
return if (a.nil?) | |
f = File.open(fn, 'a') | |
f.puts(a.keys.join("\t")) if (f.size == 0) | |
f.puts(a.values.join("\t")) | |
f.close | |
end | |
def plot(x0, r = '') | |
f = File.open('plot.cmd', 'w') | |
f.printf("plot #{r} ") | |
x0.each \ | |
{ | |
|x| | |
f.print("'out.txt' using (column('#{x}')) with line,") | |
} | |
f.puts | |
f.close | |
end | |
def est(n, x0, a1) | |
x2 = x0.map { |x| "#{x}_2" } + ['x'] | |
a = data(n) | |
c2 = c3 = 0 | |
x = n.to_s(2).length.to_f | |
c = 1e3.to_i | |
c.times \ | |
{ | |
x2.each \ | |
{ | |
|x| | |
predict([a], x, a1[x].reject { |k| k == 'r' }) | |
} | |
a2 = {} | |
x0.each \ | |
{ | |
|x| | |
a2[x] = a["#{x}_2_p"] | |
} | |
a2['x'] = a['x_p'] | |
a = a2 | |
if (x > 1.0) then | |
x *= a['x'] | |
c2 += 1 | |
end | |
break if (a['x'] < 0) | |
c3 += 1 | |
} | |
return {'c2' => (c2 == c ? 0 : c2), 'c3' => (c3 == c ? 0 : c3)} | |
end | |
def out2(fn, l) | |
File.open(fn, 'w').close | |
l.each { |x| out(fn, x) } | |
end | |
def seq(n) | |
l = [] | |
while (n != 1) | |
n = n.even? ? n / 2 : n * 3 + 1 | |
l << n | |
end | |
return l | |
end | |
def dense(w, d) | |
a = (0...(w - 1)).to_a | |
s = '0' * (w - 1) | |
l = [] | |
(d * w - 1).to_i.times { l << a.delete_at(rand(a.size)) } | |
l.each { |x| s[x, 1] = '1' } | |
return ('1' + s).to_i(2) | |
end | |
def sample(w, c) | |
l = [] | |
c.times { |i| l << dense(w, i.to_f / (c - 1)) } | |
l = l.map { |x| seq(x) } | |
# $stderr.puts(l.map { |x| x.size }.inspect) | |
return l | |
end | |
def est1(x) | |
return { 'c1' => -(d(x.to_s(2)) - 0.5).abs } | |
end | |
def model() | |
x0 = ['d', 'dh', 'dl', 'a0', 'sd0', 'mx0', 'a1', 'sd1', 'mx1'] | |
a1 = test(x0, []) | |
File.open(fn = 'out.txt', 'w').close | |
w = 10 | |
loop \ | |
{ | |
|i| | |
l = sample(w, 400) | |
l2 = [] | |
l.each \ | |
{ | |
|l1| | |
l2 << {'c' => l1.size}.merge(est1(l1[0])).merge(est(l1[0] * 2, x0, a1)) | |
} | |
out2('out1.txt', l2) | |
out(fn, {'w' => w, | |
'r1' => corr(l2, 'c', 'c1'), | |
'r2' => corr(l2, 'c', 'c2'), | |
'r3' => corr(l2, 'c', 'c3'), | |
'z2' => l2.select { |x| x['c2'] == 0 }.size, | |
'z3' => l2.select { |x| x['c3'] == 0 }.size, | |
'a' => av(l.map { |x| x.size })}) | |
w += 1 | |
} | |
end | |
model() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment