Skip to content

Instantly share code, notes, and snippets.

@pearcemc
Created October 22, 2010 18:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pearcemc/641072 to your computer and use it in GitHub Desktop.
Save pearcemc/641072 to your computer and use it in GitHub Desktop.
class MultiClassDS:
"""
Generates a dataset of n points from c classes in ratio r
"""
def __init__(self, classes, length=1000, ratio=None):
#set params
self.classes, self.length, self.ratio = classes, length, ratio
#no ratio supplied? set it to even split
if not ratio: self._normalise_ratio()
#else set the supplied ratio
else: self._set_ratio(ratio)
#create the data
self.generate()
def _normalise_ratio(self):
"""if no ratio info supplied assign even split"""
numc = len(self.classes)
for clas in self.classes:
clas.ratio = 1.0/float(len(self.classes))
clas.count = int(clas.ratio*self.length)
def _set_ratio(self, ratio):
"""if ratio info supplied set it on the classes"""
for i, clas in enumerate(self.classes):
clas.ratio = ratio[i]
clas.count = int(ratio[i]*self.length)
def generate(self):
"""generate self.length data points by self.ratio for each class"""
for clas in self.classes:
clas.data = []
for i in range(clas.count):
clas.data.append(clas.generate())
def get_vectors(self):
"""vectorise the data points class by class- for visualisation etc"""
self.vectors = []
for clas in self.classes:
clas.vectors = []
for i in range(len(clas.data[0])):
v = [p[i] for p in clas.data]
clas.vectors.append(v)
self.vectors.append(clas.vectors)
return self.vectors
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment