Created
March 24, 2017 01:09
-
-
Save vznvzn/23a52c94be1f8eedda8662aefc1ce0f8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'statsample' | |
def f2(n) | |
n = (n * 3 + 1) / 2 while (n.odd?) | |
n /= 2 while (n.even?) | |
return n | |
end | |
def adv(x) | |
x['n'] = x['nb'].to_i(2) | |
n1 = n = x['n'] | |
l = [n] | |
while (n >= n1 && n != 1) | |
n = f2(n) | |
l << n | |
end | |
x['ls'] = l.size | |
x['ns'] = x['n'].to_s(2).length | |
x['h2'] = (x['ls'].to_f / x['ns'] * 50).to_i | |
$h[x['h2']] = [] if (!$h.member?(x['h2'])) | |
$h[x['h2']] << x if ($h[x['h2']].size < $c) | |
return x | |
end | |
def stat(l) | |
l = [0] if (l.empty?) | |
t = t2 = 0 | |
l.each \ | |
{ | |
|x| | |
t += x | |
t2 += x ** 2 | |
} | |
c = l.size | |
a = t.to_f / c | |
z = t2.to_f / c - a ** 2 | |
sd = Math.sqrt(z < 0 ? 0 : z) | |
return a, sd, l.max.to_f, l.min.to_f | |
end | |
def init(w, f) | |
s = (0...w).map { rand(2).to_s }.join | |
s[0, 1] = '1' if (f) | |
return s | |
end | |
def dist(w, f) | |
l = [] | |
f0, f1, f2 = f | |
20.times { l << adv({'nb' => init(w, f0)}) } | |
c = 2e3.to_i | |
seen = {} | |
i = n = 0 | |
while (n < c) | |
i += 1 | |
l.sort_by! { |x| -x['ls'] } | |
t = f1 ? [l.size, 400].min : l.size / 10 | |
case i % 3 | |
when 0 | |
x = l[rand(t)] | |
s = x['nb'] | |
r = f0 ? (rand(s.length - 1) + 1) : rand(s.length) | |
s[r, 1] = (s[r, 1].to_i ^ 1).to_s | |
when 1 | |
x = l[rand(t)] | |
y = l[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (rand(2) == 0) ? sx[j, 1] : sy[j, 1] | |
} | |
when 2 | |
x = l[rand(t)] | |
y = l[rand(t)] | |
sx = x['nb'] | |
sy = y['nb'] | |
s = '' | |
r = rand(w + 1) | |
w.times \ | |
{ | |
|j| | |
s[j, 1] = (j < r) ? sx[j, 1] : sy[j, 1] | |
} | |
end | |
next if (s.to_i == 0) | |
if (f2) then | |
next if (seen.member?(s)) | |
seen[s] = nil | |
end | |
l.pop if (l.size >= 500) | |
l << adv({'nb' => s}) | |
n += 1 | |
end | |
# $stderr.puts("#{i - n} dups") | |
return l | |
end | |
def hist(l) | |
h = {} | |
l.each \ | |
{ | |
|x| | |
h[x] = h.fetch(x, 0) + 1 | |
} | |
return h.sort | |
end | |
def range(l, k) | |
l1 = l.map { |x| x[k]} | |
a, sd, mx, mn = stat(l1) | |
h = hist(l1) | |
x = h.max_by { |k, v| v } | |
return {"#{k}_a" => a, "#{k}_sd" => sd, | |
"#{k}_mx" => mx, "#{k}_mn" => mn, | |
"#{k}_md" => x[0]} | |
end | |
def stat2(l, t, n) | |
return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l)[0..2].map { |x| x / t }].transpose] | |
end | |
def d(s) | |
c = s.split('').select { |x| x == '1' }.size | |
d = c.to_f / s.length | |
return d | |
end | |
def data(x) | |
n = x['n'] | |
ns = n.to_s(2) | |
nl = ns.length | |
m = nl / 2 | |
nsh = ns[0..m] | |
nsl = ns[m..-1] | |
asdm1 = stat2(ns.split(/0+/).map { |x| x.length }, nl, 1) | |
l1 = ns.split(/1+/) | |
l1.shift | |
asdm0 = stat2(l1.map { |x| x.length }, nl, 0) | |
return {'n' => x['n'], 'ns' => x['ns'], 'h2' => x['h2'], 'd' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm1) #.merge(asdm0) | |
end | |
def fit(l, y, lx) | |
a = {} | |
(lx + [y]).each { |x| a[x] = l.map { |b| b[x] }.to_vector() } | |
ds = a.to_dataset() | |
r = Statsample::Regression.multiple(ds, y) | |
# $stderr.puts(r.summary) | |
return r.coeffs.merge({'c' => r.constant}) | |
end | |
def predict(l, y, z) | |
l.each \ | |
{ | |
|x| | |
t = z['c'] | |
(z.keys - ['c']).each { |k| t += z[k] * x[k] } | |
x["#{y}_p"] = t | |
} | |
end | |
def solve(l, y) | |
z = fit(l, 'h2', ['d', 'dh', 'dl', #'a0', 'sd0', 'mx0', | |
'a1', 'sd1', 'mx1']) | |
predict(l, 'h2', z) | |
end | |
def out(fn, l) | |
f = File.open(fn, 'w') | |
f.puts(l[0].keys.join("\t")) | |
l.each { |x| f.puts(x.values.join("\t")) } | |
f.close | |
end | |
def balance1(l, k) | |
h = {} | |
l.each \ | |
{ | |
|x| | |
h[x[k]] = [] if (!h.member?(x[k])) | |
h[x[k]] << x | |
} | |
l2 = [] | |
c = 10 | |
h.sort.select { |k, v| v.size >= c }.each \ | |
{ | |
|k, v| | |
c.times { l2 << v.delete_at(rand(v.size)) } | |
} | |
return l2 | |
end | |
def balance2(l, k) | |
h = {} | |
c = 10 | |
l.each \ | |
{ | |
|x| | |
h[x[k]] = [] if (!h.member?(x[k])) | |
h[x[k]] << x if (h[x[k]].size < c) | |
} | |
l2 = [] | |
h.sort.select { |k, v| v.size == c }.each \ | |
{ | |
|k, v| | |
c.times { l2 << v.delete_at(rand(v.size)) } | |
} | |
return l2 | |
end | |
def balance3() | |
l = [] | |
$h.sort.select { |k, v| v.size == $c }.each \ | |
{ | |
|k, v| | |
l += v | |
} | |
return l | |
end | |
def graph(fn, l) | |
l = l.map { |x| data(x) } | |
solve(l, 'h2') | |
out(fn, l.map { |x| x.select { |k, v| ['h2', 'h2_p', 'ns'].member?(k) } }.sort_by { |x| x['h2'] }) | |
end | |
def sum(l) | |
t = 0.0 | |
l.each { |x| t += x } | |
return t | |
end | |
def av(l) | |
return nil if (l.empty?) | |
return sum(l) / l.size | |
end | |
def coef(l) | |
l = l.map { |x| data(x) } | |
begin | |
solve(l, 'h2') | |
rescue Statsample::Regression::LinearDependency | |
return nil | |
end | |
xav = av(l.map { |x| x['h2'] }) | |
yav = av(l.map { |x| x['h2_p'] }) | |
tx = ty = txy = 0.0 | |
l.each \ | |
{ | |
|z| | |
x = z['h2'] | |
y = z['h2_p'] | |
txy += (x - xav) * (y - yav) | |
tx += (x - xav) ** 2 | |
ty += (y - yav) ** 2 | |
} | |
r = txy / (Math.sqrt(tx) * Math.sqrt(ty)) | |
return r | |
end | |
def tobool(x) | |
return (0..2).map { |y| x[y, 1] == '1' } | |
end | |
def flags() | |
l = [] | |
8.times \ | |
{ | |
|x| | |
s = '' | |
3.times { |y| s << (x & (1 << y) == 0 ? '0' : '1') } | |
l << s.reverse | |
} | |
return l | |
end | |
def out(fn, a) | |
f = File.open(fn, 'a') | |
f.puts(a.keys.join("\t")) if (f.size == 0) | |
f.puts(a.values.join("\t")) | |
f.close | |
end | |
def plot(fn, l) | |
['x', 'y', 'z'].each \ | |
{ | |
|xyz| | |
f1 = File.open("plot#{xyz}.cmd", 'w') | |
f1.print("plot [][0:1] ") | |
l.each \ | |
{ | |
|f| | |
f1.print("'#{fn}' using (column('r#{f}#{xyz}')) with line title '#{f}#{xyz}',") | |
} | |
f1.close | |
} | |
end | |
def avg(a, k, x) | |
return if (x.nil?) | |
a[k] = {'t' => 0.0, 'c' => 0.0} if (!a.member?(k)) | |
a[k]['t'] += x | |
a[k]['c'] += 1 | |
a[k]['a'] = a[k]['t'] / a[k]['c'] | |
end | |
l1 = flags() | |
fn = 'out.txt' | |
plot(fn, l1) | |
File.open(fn, 'w').close | |
$c = 10 | |
w = ARGV[0].nil? ? 50 : ARGV[0].to_i | |
a = {} | |
loop \ | |
{ | |
l1.each \ | |
{ | |
|f| | |
$h = {} | |
l = dist(w, tobool(f)) | |
avg(a, "r#{f}x", coef(balance1(l, 'h2'))) | |
avg(a, "r#{f}y", coef(balance2(l, 'h2'))) | |
avg(a, "r#{f}z", coef(balance3())) | |
} | |
out(fn, Hash[a.map { |k, v| [k, v['a']] }]) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment