Skip to content

Instantly share code, notes, and snippets.

@vznvzn
Created March 31, 2017 01:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vznvzn/b792f05e72136175d196b0e242d4ff81 to your computer and use it in GitHub Desktop.
Save vznvzn/b792f05e72136175d196b0e242d4ff81 to your computer and use it in GitHub Desktop.
require 'statsample'
def f2(n)
n = (n * 3 + 1) / 2 while (n.odd?)
n /= 2 while (n.even?)
return n
end
def setx(x, mn, mx)
x['x'] = (x['r'] - mn) / (mx - mn) * 9.99 if (!mn.nil? && mn != mx)
end
def adv(x, seen, l1, mn = nil, mx = nil)
x['n'] = x['nb'].to_i(2)
n1 = n = x['n']
return if (seen.member?(n))
seen[n] = x
l = [n]
c = 10
while (n >= n1 && n != 1 && l.size < c)
n = f2(n)
l << n
end
x['ls'] = l.size
x['ns'] = x['n'].to_s(2).length
x['r'] = nil
if (l.size == c) then
x['r'] = n.to_s(2).length.to_f / x['ns']
setx(x, mn, mx) if (!mn.nil?)
end
x['.'] = x['x'].nil? ? '?' : x['x'].to_i.to_s
l1 << x
end
def stat(l)
l = [0] if (l.empty?)
t = t2 = 0
l.each \
{
|x|
t += x
t2 += x ** 2
}
c = l.size
a = t.to_f / c
z = t2.to_f / c - a ** 2
sd = Math.sqrt(z < 0 ? 0 : z)
return a, sd, l.max.to_f, l.min.to_f
end
def init(w, f)
s = (0...w).map { rand(2).to_s }.join
s[0, 1] = '1' if (f)
return s
end
def freq(l, h = {})
l1 = l.map { |x| x['x'].nil? ? nil : x['x'].to_i }.compact
h.replace(Hash[hist(l1)])
# $stderr.puts(h.inspect)
l1 = h.sort_by { |k, v| v }.reverse.map { |x| x[0] }
h2 = Hash[[l1, (0...l1.size).to_a].transpose]
$stderr.puts(h.sort.map { |k, v| [k, v, h2.fetch(k, 0) ] }.inspect)
return h2
end
def minmax(l)
l = l.map { |x| x['r'] }.compact
return l.min, l.max
end
def rescale(l, mn, mx)
l.each \
{
|x|
setx(x, mn, mx) if (!x['r'].nil?)
}
end
def hist(l)
h = {}
l.each \
{
|x|
h[x] = h.fetch(x, 0) + 1
}
return h.sort
end
def dist(w)
l1 = []
f0 = f1 = true
seen = {}
20.times { adv({'nb' => init(w, f0)}, seen, l1) }
c = 10e3.to_i
i = n = z = 0
mn = mx = nil
h1 = {}
h = {}
while (n < c)
i += 1
if (i % 100 == 0) then
mn, mx = minmax(seen.values)
rescale(seen.values.select { |x| x.member?('x') }, mn, mx)
h1 = freq(l1.select { |x| !x.member?('*') && x.member?('x') }, h)
h2 = freq(seen.values.select { |x| x.member?('*') })
$stderr.puts([h1.size, h2.size].inspect)
$stderr.puts
end
l1 = l1.sort_by { |x| [x['ls'], x.member?('x') ? h2.fetch(x['x'].to_i, 9) : 0] }.reverse
t = f1 ? l1.size / 4 : [l1.size, 400].min
case i % 3
when 0
x = l1[rand(t)]
s = x['nb'].dup
r = f0 ? (rand(s.length - 1) + 1) : rand(s.length)
s[r, 1] = (s[r, 1].to_i ^ 1).to_s
a = x['.']
when 1
x = l1[rand(t)]
y = l1[rand(t)]
sx = x['nb']
sy = y['nb']
s = ''
w.times \
{
|j|
s[j, 1] = (rand(2) == 0) ? sx[j, 1] : sy[j, 1]
}
a = x['.'] + ' x ' + y['.']
when 2
x = l1[rand(t)]
y = l1[rand(t)]
sx = x['nb']
sy = y['nb']
s = ''
r = rand(w + 1)
w.times \
{
|j|
s[j, 1] = (j < r) ? sx[j, 1] : sy[j, 1]
}
a = x['.'] + ' y ' + y['.']
end
# p(a)
next if (s.to_i == 0)
l1.pop if (l1.size >= 500)
if (h1.size >= 5) then
x = l1.select { |x| x.member?('x') && !x.member?('*') && h.fetch(x['x'].to_i, 0) >= 3 }.max_by{ |x| h2.fetch(x['x'].to_i, 9) }
if (!x.nil?) then
x['*'] = nil
h[x['x'].to_i] -= 1
end
end
adv({'nb' => s}, seen, l1, mn, mx)
n += 1
end
mn, mx = minmax(l = seen.values.select { |x| x.member?('x') })
rescale(l, mn, mx)
return l
end
def stat2(l, t, n)
return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l)[0..2].map { |x| x / t }].transpose]
end
def d(s)
c = s.split('').select { |x| x == '1' }.size
d = c.to_f / s.length
return d
end
def data(x)
n = x['n']
ns = n.to_s(2)
nl = ns.length
m = nl / 2
nsh = ns[0..m]
nsl = ns[m..-1]
asdm1 = stat2(ns.split(/0+/).map { |x| x.length }, nl, 1)
l1 = ns.split(/1+/)
l1.shift
asdm0 = stat2(l1.map { |x| x.length }, nl, 0)
return {'n' => x['n'], 'ns' => x['ns'], 'x' => x['x'], 'ls' => x['ls'], 'd' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm1).merge(asdm0)
end
def fit(l, y, lx)
a = {}
(lx + [y]).each { |x| a[x] = l.map { |b| b[x] }.to_vector() }
ds = a.to_dataset()
r = Statsample::Regression.multiple(ds, y)
# $stderr.puts(r.summary)
return r.coeffs.merge({'c' => r.constant})
end
def predict(l, y, z)
l.each \
{
|x|
t = z['c']
(z.keys - ['c']).each { |k| t += z[k] * x[k] }
x["#{y}_p"] = t
}
end
def solve(l, y)
z = fit(l, y, ['d', 'dh', 'dl', 'a0', 'sd0', 'mx0', 'a1', 'sd1', 'mx1'])
predict(l, y, z)
end
def sum(l)
t = 0.0
l.each { |x| t += x }
return t
end
def av(l)
return nil if (l.empty?)
return sum(l) / l.size
end
def corr(l, y1, yp)
xav = av(l.map { |x| x[y1] })
yav = av(l.map { |x| x[yp] })
tx = ty = txy = e = 0.0
l.each \
{
|z|
x = z[y1]
y = z[yp]
txy += (x - xav) * (y - yav)
tx += (x - xav) ** 2
ty += (y - yav) ** 2
e += (x - y) ** 2
}
r = txy / (Math.sqrt(tx) * Math.sqrt(ty))
e /= l.size
return r, e
end
def coef(l, y0)
l.replace(l.map { |x| data(x) })
begin
solve(l, y0)
# rescue Statsample::Regression::LinearDependency
# return nil
end
r, e = corr(l, y0, "#{y0}_p")
return r, e
end
def center(l, y)
a, sd = stat(l.map { |x| x[y] })
l.each \
{
|x|
x["#{y}z"] = (x[y] - a) / sd
}
end
def balance(l)
l.sort_by! { |x| x['x'] }
l2 = []
l.each_with_index \
{
|x, i|
l2 << x if (i % 5 == 0)
}
return l2
end
def balance2(l)
c = 20
d = 10.0 / c
x0 = 0.0
l2 = []
seen = {}
while (x0 <= 10)
l.sort_by! { |x| (x['x'] - x0).abs }
x0 += d
l[0...10].each \
{
|x|
next if (seen.member?(x['n']))
seen[x['n']] = nil
l2 << x
}
end
return l2
end
l = dist(50)
$stderr.puts(l.size)
l = balance2(l)
$stderr.puts(l.size)
$stderr.puts(coef(l, 'x').inspect)
center(l, 'x')
center(l, 'x_p')
$stderr.puts(corr(l, 'xz', 'x_pz').inspect)
l.sort_by { |x| x['xz'] }.each \
{
|x|
puts([x['xz'], x['x_pz']].join("\t"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment