Skip to content

Instantly share code, notes, and snippets.

@timm
Last active October 4, 2019 20:35
Show Gist options
  • Save timm/17ecaac48307ccc6c6f88bd229701ec0 to your computer and use it in GitHub Desktop.
Save timm/17ecaac48307ccc6c6f88bd229701ec0 to your computer and use it in GitHub Desktop.
some random project code (note: not a fully working system)
#!/usr/bin/env python3
# vim: sta:et:sw=2:ts=2:sts=2
from lib import *
class Distance(Pretty):
def __init__(i, lst=[],
some=None,
key=id,
fun= lambda x,y: abs(x-y)**2):
"Use just 'some' of the data."
if some is None:
i.lst = lst
else:
i.lst = [any(lst) for _ in range(some)]
i.fun = fun
i.key = key
i.called=0
i.cache = {}
def dist(i,one,two):
"If we've done this before, return a cached value."
i.called += 1
k1,k2 = i.key(one), i.key(two)
if k1 > k2:
one,two = two,one
k1,k2 = k2,k1
k = (k1,k2)
if not k in i.cache:
i.cache[k] = i.fun(one,two)
return i.cache[k]
def neighbors(i,j):
"return the distance of lst rows k from j."
return sorted([( i.dist(j,k), k) for k in i.lst if
i.key(j) != i.key(k)])
def closest( i,j): return i.neighbors(j)[0]
def furthest(i,j): return i.neighbors(j)[-1]
def distant(i,j, far=THE.dist.far):
"Ignore outliers by avoiding the most distant thing."
tmp = i.neighbors(j)
m = min(len(tmp)-1, int( len(tmp) * far ))
return tmp[m]
def fastmap(i,w=None):
"""The Fastmap trick:
"Find two distant points in 2N comparisons."""
w = w or any(i.lst)
_,x = i.distant(w)
c,y = i.distant(x)
return x,y,c
def pivots(i,tries=THE.dist.tries):
"""Seek distant points that nearly divide data in 2
half (and 'nearly' means within, say, 10%)"""
x,y,c = None,None,None
left,right = 0,0
best = 10**32
for _ in range(tries):
x1,y1,c1 = i.fastmap()
left,right = 0,1
for z in i.lst:
if i.dist(z,x1) < i.dist(z,y1):
left += 1
else:
right += 1
nearly = abs(1 - left/right) # the rui fraction
if nearly < best:
best, x,y,c = nearly, x1,y1,c1
if nearly < THE.dist.balanced:
break
#print(left, right)
return x,y,c
class RowDistance(Distance):
def __init__(i,t,some=THE.dist.some,fun=None):
super().__init__(lst=t.rows, some=some,
key=lambda z: z.id,
fun=fun or (lambda i,j: i.dist(j, t.cols.indep)))
#!/usr/bin/env python3
# vim: sta:et:sw=2:ts=2:sts=2
"""
Config options
"""
from boot import *
THE= o(
dist = o(some = 256,
tries = 30,
balanced= 0.1,
far = 0.8),
gen = o(cap = True,
retries = 10,
steps = 10,
cf = 0.3,
f = 0.3),
tree = o(rpMin = 0.5,
minObs=4,
rnd =2),
char = o( sep = ",",
num = "$",
less = "<",
more = ">",
skip = "?",
klass= "!",
doomed = r'([\n\t\r ]|#.*)'),
row = o( p = 2),
div = o( trivial = 1.025,
cohen = 0.3,
min = 0.5)
)
#!/usr/bin/env python3
# vim: nospell:sta:et:sw=2:ts=2:sts=2
from lib import *
from limits import Limit, Limits
from pom3 import pom3
from tbl import Tbl
from cols import Cols
from dist import RowDistance
class RpTree(Pretty):
"""
If there are too few thigns, then make a leaf.
Else, find some good pivots and divide things equally between them.
"""
def __init__(i, t,lst, up=None, lvl=0,d=None):
print('|. ' * lvl + str(len(lst))) # important debugging tool
i.leaf = None
i.kids = []
i._up = up
i.lvl = lvl
i.n = len(lst)
d = d or RowDistance(t)
if len(lst) < 2*(len(t.rows) ** THE.tree.rpMin):
i.leaf = t.clone()
for row in lst:
i.leaf + row.cells
else:
x,y,c = d.pivots()
pos = lambda z: ((d.dist(x,z)**2 + c**2 - d.dist(y,z)**2)/(2*c),z)
xs = sorted(pos( one ) for one in lst)
median = xs[ int(len(xs)/2) ][0]
#median = sum([x[0] for x in xs])/len(lst)
left,right = [],[]
for pos,one in xs:
what = left if pos <= median else right
what += [one]
i.kids = [ RpTree(t, lst= left, up= i, lvl= lvl+1,d=d),
RpTree(t, lst= right, up= i, lvl= lvl+1,d=d) ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment