Created
September 21, 2012 13:42
-
-
Save fgregg/3761519 to your computer and use it in GitHub Desktop.
Assignment one, consolidate thes functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def recordDistances(candidates, data_d, data_model): | |
# The record array has two elements, the first element is an array | |
# of floats that has length equal the number of fields. The second | |
# argument is a array of length 2 which stores the id of the | |
# considered elements in the pair. | |
fields = data_model['fields'] | |
field_dtype = [('names', 'a20', len(fields)), ('values', 'f4', | |
len(fields))] | |
record_dtype = [('pairs', [('pair1', 'i4'), ('pair2', 'i4')]), | |
('field_distances', field_dtype)] | |
distances = numpy.zeros(1, dtype=field_dtype) | |
record_distances = numpy.zeros(len(candidates), dtype=record_dtype) | |
for (i, pair) in enumerate(candidates): | |
c_distances = calculateDistance(data_d[pair[0]], | |
data_d[pair[1]], | |
fields, | |
distances) | |
record_distances[i] = ((pair[0], pair[1]), | |
(c_distances['names'], | |
c_distances['values'])) | |
return record_distances | |
def recordDistancesII(candidates, data_model): | |
# The record array has two elements, the first element is an array | |
# of floats that has length equal the number of fields. The second | |
# argument is a array of length 2 which stores the id of the | |
# considered elements in the pair. | |
fields = data_model['fields'] | |
field_dtype = [('names', 'a20', len(fields)), ('values', 'f4', | |
len(fields))] | |
record_dtype = [('pairs', [('pair1', 'i4'), ('pair2', 'i4')]), | |
('field_distances', field_dtype)] | |
distances = numpy.zeros(1, dtype=field_dtype) | |
record_distances = numpy.zeros(len(candidates), dtype=record_dtype) | |
for (i, pair) in enumerate(candidates): | |
instance_1, instance_2 = pair | |
key_1, record_1 = instance_1 | |
key_2, record_2 = instance_2 | |
c_distances = calculateDistance(record_1, | |
record_2, | |
fields, | |
distances) | |
record_distances[i] = ((key_1, key_2), | |
(c_distances['names'], | |
c_distances['values'])) | |
return record_distances | |
# appends training data to the training data collection | |
def addTrainingData(labeled_pairs, data_model, training_data=[]): | |
fields = data_model['fields'] | |
field_dtype = training_data.dtype[1] | |
distances = numpy.zeros(1, dtype=field_dtype) | |
num_training_pairs = len(labeled_pairs[0]) + len(labeled_pairs[1]) | |
new_training_data = numpy.zeros(num_training_pairs, | |
dtype=training_data.dtype) | |
i = 0 | |
for (label, examples) in labeled_pairs.items(): | |
for pair in examples: | |
c_distances = core.calculateDistance(pair[0], | |
pair[1], | |
fields, | |
distances) | |
example = (label, c_distances) | |
new_training_data[i] = example | |
i += 1 | |
training_data = numpy.append(training_data, new_training_data) | |
return training_data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment