Skip to content

Instantly share code, notes, and snippets.

@abutcher
Created June 9, 2010 20:15
Show Gist options
  • Save abutcher/432104 to your computer and use it in GitHub Desktop.
Save abutcher/432104 to your computer and use it in GitHub Desktop.
import csv
import random
import sys
class Node:
right = None
left = None
data = None
variance = None
def __init__(self, data):
self.data = data
self.variance = variance(data)
def compass(path):
data = extract(path)
root = Node(data)
print root.variance
def variance(data):
mean, stdv = meanstdv(transpose(data)[-1])
return stdv
def meanstdv(x):
from math import sqrt
n, mean, std = len(x), 0, 0
for a in x:
mean = mean + a
mean = mean / float(n)
for a in x:
std = std + (a - mean)**2
std = sqrt(std / float(n-1))
return mean, std
def transpose(lists):
if not lists: return []
return map(lambda *row: list(row), *lists)
def separate(these):
thisgroup = []
thatgroup = []
this = randomelement(these)
these.remove(this)
that = farthestfrom(this)
these.remove(that)
these.append(this)
this = farthestfrom(that)
these.remove(this)
thisgroup.append(this)
thatgroup.append(that)
for instance in these:
if distance(instance, this) > distance(instance, that):
thatgroup.append(instance)
else:
thisgroup.append(instance)
return thisgroup, thatgroup
def randomelement(l):
return l[random.randint(0,len(l))]
def closestto(this, these, d=0.0):
for instance in these:
if distance(this, instance) > d:
that = instance
d = distance(this, instance)
return that
def farthestfrom(this, these, d=sys.maxint):
for instance in these:
if distance(this, instance) < d:
that = instance
d = distance(this, instance)
return that
def distance(vecone, vectwo, d=0.0):
for i in range(len(vecone)):
if isnumeric(vecone[i]):
d = d + (vectwo[i] - vecone[i])**2
elif vecone[i] == vectwo[i]:
d += 1
return d
def extract(path):
data = []
reader = csv.reader(open(path, "r"))
for row in reader:
if len(row) > 1 and '@' not in row[0]:
for i in range(len(row)):
if isnumeric(row[i]):
row[i] = float(row[i])
data.append(row)
return data
def isnumeric(s):
try:
float(s)
return True
except ValueError:
return False
compass("arff/telecom1.arff")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment