Skip to content

Instantly share code, notes, and snippets.

@orico
Created April 18, 2018 14:56
Show Gist options
  • Save orico/e5ffb0915970ddd60d36ec82a339f0ec to your computer and use it in GitHub Desktop.
Save orico/e5ffb0915970ddd60d36ec82a339f0ec to your computer and use it in GitHub Desktop.
AL-TheAlgorithm
class TheAlgorithm(object):
accuracies = []
def __init__(self, initial_labeled_samples, model_object, selection_function):
self.initial_labeled_samples = initial_labeled_samples
self.model_object = model_object
self.sample_selection_function = selection_function
def run(self, X_train_full, y_train_full, X_test, y_test):
# initialize process by applying base learner to labeled training data set to obtain Classifier
(permutation, X_train, y_train) = \
get_k_random_samples(self.initial_labeled_samples,
X_train_full, y_train_full)
self.queried = self.initial_labeled_samples
self.samplecount = [self.initial_labeled_samples]
# permutation, X_train, y_train = get_equally_k_random_samples(self.initial_labeled_samples,classes)
# assign the val set the rest of the 'unlabelled' training data
X_val = np.array([])
y_val = np.array([])
X_val = np.copy(X_train_full)
X_val = np.delete(X_val, permutation, axis=0)
y_val = np.copy(y_train_full)
y_val = np.delete(y_val, permutation, axis=0)
print ('val set:', X_val.shape, y_val.shape, permutation.shape)
print ()
# normalize data
normalizer = Normalize()
X_train, X_val, X_test = normalizer.normalize(X_train, X_val, X_test)
self.clf_model = TrainModel(self.model_object)
(X_train, X_val, X_test) = self.clf_model.train(X_train, y_train, X_val, X_test, 'balanced')
active_iteration = 1
self.clf_model.get_test_accuracy(1, y_test)
# fpfn = self.clf_model.test_y_predicted.ravel() != y_val.ravel()
# print(fpfn)
# self.fpfncount = []
# self.fpfncount.append(fpfn.sum() / y_test.shape[0] * 100)
while self.queried < max_queried:
active_iteration += 1
# get validation probabilities
probas_val = \
self.clf_model.model_object.classifier.predict_proba(X_val)
print ('val predicted:',
self.clf_model.val_y_predicted.shape,
self.clf_model.val_y_predicted)
print ('probabilities:', probas_val.shape, '\n',
np.argmax(probas_val, axis=1))
# select samples using a selection function
uncertain_samples = \
self.sample_selection_function.select(probas_val, self.initial_labeled_samples)
# normalization needs to be inversed and recalculated based on the new train and test set.
X_train, X_val, X_test = normalizer.inverse(X_train, X_val, X_test)
# get the uncertain samples from the validation set
print ('trainset before', X_train.shape, y_train.shape)
X_train = np.concatenate((X_train, X_val[uncertain_samples]))
y_train = np.concatenate((y_train, y_val[uncertain_samples]))
print ('trainset after', X_train.shape, y_train.shape)
self.samplecount.append(X_train.shape[0])
bin_count = np.bincount(y_train.astype('int64'))
unique = np.unique(y_train.astype('int64'))
print (
'updated train set:',
X_train.shape,
y_train.shape,
'unique(labels):',
bin_count,
unique,
)
X_val = np.delete(X_val, uncertain_samples, axis=0)
y_val = np.delete(y_val, uncertain_samples, axis=0)
print ('val set:', X_val.shape, y_val.shape)
print ()
# normalize again after creating the 'new' train/test sets
normalizer = Normalize()
X_train, X_val, X_test = normalizer.normalize(X_train, X_val, X_test)
self.queried += self.initial_labeled_samples
(X_train, X_val, X_test) = self.clf_model.train(X_train, y_train, X_val, X_test, 'balanced')
self.clf_model.get_test_accuracy(active_iteration, y_test)
print ('final active learning accuracies',
self.clf_model.accuracies)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment