Skip to content

Instantly share code, notes, and snippets.

@fgregg
Created September 21, 2012 13:42
Show Gist options
  • Save fgregg/3761519 to your computer and use it in GitHub Desktop.
Save fgregg/3761519 to your computer and use it in GitHub Desktop.
Assignment one, consolidate thes functions
def recordDistances(candidates, data_d, data_model):
# The record array has two elements, the first element is an array
# of floats that has length equal the number of fields. The second
# argument is a array of length 2 which stores the id of the
# considered elements in the pair.
fields = data_model['fields']
field_dtype = [('names', 'a20', len(fields)), ('values', 'f4',
len(fields))]
record_dtype = [('pairs', [('pair1', 'i4'), ('pair2', 'i4')]),
('field_distances', field_dtype)]
distances = numpy.zeros(1, dtype=field_dtype)
record_distances = numpy.zeros(len(candidates), dtype=record_dtype)
for (i, pair) in enumerate(candidates):
c_distances = calculateDistance(data_d[pair[0]],
data_d[pair[1]],
fields,
distances)
record_distances[i] = ((pair[0], pair[1]),
(c_distances['names'],
c_distances['values']))
return record_distances
def recordDistancesII(candidates, data_model):
# The record array has two elements, the first element is an array
# of floats that has length equal the number of fields. The second
# argument is a array of length 2 which stores the id of the
# considered elements in the pair.
fields = data_model['fields']
field_dtype = [('names', 'a20', len(fields)), ('values', 'f4',
len(fields))]
record_dtype = [('pairs', [('pair1', 'i4'), ('pair2', 'i4')]),
('field_distances', field_dtype)]
distances = numpy.zeros(1, dtype=field_dtype)
record_distances = numpy.zeros(len(candidates), dtype=record_dtype)
for (i, pair) in enumerate(candidates):
instance_1, instance_2 = pair
key_1, record_1 = instance_1
key_2, record_2 = instance_2
c_distances = calculateDistance(record_1,
record_2,
fields,
distances)
record_distances[i] = ((key_1, key_2),
(c_distances['names'],
c_distances['values']))
return record_distances
# appends training data to the training data collection
def addTrainingData(labeled_pairs, data_model, training_data=[]):
fields = data_model['fields']
field_dtype = training_data.dtype[1]
distances = numpy.zeros(1, dtype=field_dtype)
num_training_pairs = len(labeled_pairs[0]) + len(labeled_pairs[1])
new_training_data = numpy.zeros(num_training_pairs,
dtype=training_data.dtype)
i = 0
for (label, examples) in labeled_pairs.items():
for pair in examples:
c_distances = core.calculateDistance(pair[0],
pair[1],
fields,
distances)
example = (label, c_distances)
new_training_data[i] = example
i += 1
training_data = numpy.append(training_data, new_training_data)
return training_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment