vznvzn/fit24c.rb

## fit24c.rb
require 'statsample'


def stat(l)
	l = [0] if (l.empty?)
        t = t2 = 0
        l.each \
        {
		|x|
		t += x
		t2 += x ** 2
        }
        c = l.size
        a = t.to_f / c
        z = t2.to_f / c - a ** 2
        sd = Math.sqrt(z < 0 ? 0 : z)

	return a, sd, l.max.to_f
end

def stat2(l, t, n)
	return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l).map { |x| x / t }].transpose]
end

def d(s)
        c = s.split('').select { |x| x == '1' }.size
	d = c.to_f / s.length
        return d

end

def data(x)

	n = x['n']
	ns = n.to_s(2)
	nl = ns.length
	m = nl / 2

	nsh = ns[0..m]
	nsl = ns[m..-1]

	asdm1 = stat2(ns.split(/0+/).map { |x| x.length }, nl, 1)

	l1 = ns.split(/1+/)
	l1.shift
	asdm0 = stat2(l1.map { |x| x.length }, nl, 0)

	return {'n' => x['n'], 'y' => x['y'], 'd' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm0).merge(asdm1)
end

def f2(n)

	n = (n * 3 + 1) / 2 while (n.odd?)
	n /= 2 while (n.even?)
	return n

end

def adv(n, c)

	l = []

	c.times \
	{
		n = f2(n)
		l << n
	}
	return l

end


def fit(l, y, lx)

	a = {}

	(lx + [y]).each { |x| a[x] = l.map { |b| b[x] }.to_vector() }

	ds = a.to_dataset()

	r = Statsample::Regression.multiple(ds, y)
	$stderr.puts(r.summary)

	return r.coeffs.merge({'c' => r.constant})

end

def dot(x, z)

	t = z['c']

	(z.keys - ['c']).each { |k| t += z[k] * x[k] }
	return t
end

def sum(l)

	t = 0
	l.each { |x| t += x }
	return t
end

def av(l)
	return nil if (l.empty?)
	return sum(l) / l.size
end

def predict(z, l1)
	l1.each_with_index \
	{
		|x, i|

		t = dot(x, z)

		ns1 = x['n'].to_i.to_s(2).length

		t = 100 if (t < 100)
		l2 = [t.to_f * ns1]

		adv(x['n'].to_i, 10).each \
		{
			|n|
			x2 = data({'n' => n})
			ns = n.to_s(2).length

			t = dot(x2, z)
			t = 100 if (t < 100)

			t *= ns.to_f
			l2 << t
		}
		x['y_p'] = av(l2) / ns1
		x['y_p'] = 100 if (x['y_p'] < 100)
	}
end

def out(fn, l)

	f = File.open("#{fn}.txt", 'w')
	f.puts(l[0].keys.join("\t"))
	l.each { |x| f.puts(x.values.join("\t")) }
	f.close

end

def read(fn)

	l = File.open('data.txt').readlines
	l1 = l.shift.split
	return l.map \
	{
		|x|
		Hash[[l1, x.split.map { |y| y.to_f }].transpose]
	}

end


l2 = read('data.txt')

$stderr.puts("#{l2.size} pts")
# out('out', l2)

z = fit(l2, 'y', l2[0].keys - ['y', 'n'])

predict(z, l2)

out('out1', l2.sort_by { |x| x['y'] })
	require 'statsample'


	def stat(l)
	l = [0] if (l.empty?)
	t = t2 = 0
	l.each \
	{
	\|x\|
	t += x
	t2 += x ** 2
	}
	c = l.size
	a = t.to_f / c
	z = t2.to_f / c - a ** 2
	sd = Math.sqrt(z < 0 ? 0 : z)

	return a, sd, l.max.to_f
	end

	def stat2(l, t, n)
	return Hash[[["a#{n}", "sd#{n}", "mx#{n}"], stat(l).map { \|x\| x / t }].transpose]
	end

	def d(s)
	c = s.split('').select { \|x\| x == '1' }.size
	d = c.to_f / s.length
	return d

	end

	def data(x)

	n = x['n']
	ns = n.to_s(2)
	nl = ns.length
	m = nl / 2

	nsh = ns[0..m]
	nsl = ns[m..-1]

	asdm1 = stat2(ns.split(/0+/).map { \|x\| x.length }, nl, 1)

	l1 = ns.split(/1+/)
	l1.shift
	asdm0 = stat2(l1.map { \|x\| x.length }, nl, 0)

	return {'n' => x['n'], 'y' => x['y'], 'd' => d(ns), 'dh' => d(nsh), 'dl' => d(nsl)}.merge(asdm0).merge(asdm1)
	end

	def f2(n)

	n = (n * 3 + 1) / 2 while (n.odd?)
	n /= 2 while (n.even?)
	return n

	end

	def adv(n, c)

	l = []

	c.times \
	{
	n = f2(n)
	l << n
	}
	return l

	end


	def fit(l, y, lx)

	a = {}

	(lx + [y]).each { \|x\| a[x] = l.map { \|b\| b[x] }.to_vector() }

	ds = a.to_dataset()

	r = Statsample::Regression.multiple(ds, y)
	$stderr.puts(r.summary)

	return r.coeffs.merge({'c' => r.constant})

	end

	def dot(x, z)

	t = z['c']

	(z.keys - ['c']).each { \|k\| t += z[k] * x[k] }
	return t
	end

	def sum(l)

	t = 0
	l.each { \|x\| t += x }
	return t
	end

	def av(l)
	return nil if (l.empty?)
	return sum(l) / l.size
	end

	def predict(z, l1)
	l1.each_with_index \
	{
	\|x, i\|

	t = dot(x, z)

	ns1 = x['n'].to_i.to_s(2).length

	t = 100 if (t < 100)
	l2 = [t.to_f * ns1]

	adv(x['n'].to_i, 10).each \
	{
	\|n\|
	x2 = data({'n' => n})
	ns = n.to_s(2).length

	t = dot(x2, z)
	t = 100 if (t < 100)

	t *= ns.to_f
	l2 << t
	}
	x['y_p'] = av(l2) / ns1
	x['y_p'] = 100 if (x['y_p'] < 100)
	}
	end

	def out(fn, l)

	f = File.open("#{fn}.txt", 'w')
	f.puts(l[0].keys.join("\t"))
	l.each { \|x\| f.puts(x.values.join("\t")) }
	f.close

	end

	def read(fn)

	l = File.open('data.txt').readlines
	l1 = l.shift.split
	return l.map \
	{
	\|x\|
	Hash[[l1, x.split.map { \|y\| y.to_f }].transpose]
	}

	end



	l2 = read('data.txt')

	$stderr.puts("#{l2.size} pts")
	# out('out', l2)

	z = fit(l2, 'y', l2[0].keys - ['y', 'n'])

	predict(z, l2)

	out('out1', l2.sort_by { \|x\| x['y'] })