Skip to content

Instantly share code, notes, and snippets.

@HanaanY
Created August 21, 2018 21:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HanaanY/c80b3be2cffffc184d942557d50fa10b to your computer and use it in GitHub Desktop.
Save HanaanY/c80b3be2cffffc184d942557d50fa10b to your computer and use it in GitHub Desktop.
from keras.utils.io_utils import HDF5Matrix
import random
import numpy as np
random.seed(52)
sub_sample = 0.01
data_path = '/home/naan/SnapshotSerengeti/data/SnapshotSerengeti224.h5'
train_l = HDF5Matrix(data_path, 'train_labels')
dev_l = HDF5Matrix(data_path, 'dev_labels')
test_l = HDF5Matrix(data_path, 'test_labels')
random.seed(52)
def get_sample_indices(total, frac):
return random.sample(range(total), round(total*frac))
def label_sampler(labels, list_IDs):
'Generates a test set sample from label IDs'
Y = np.zeros((len(list_IDs)))
for i, idx in enumerate(list_IDs):
Y[i] = labels[idx]
return Y
train_IDs = get_sample_indices(train_l.size, sub_sample)
dev_IDs = get_sample_indices(dev_l.size, sub_sample)
test_IDs = get_sample_indices(test_l.size, sub_sample)
all_labels = [train_l, dev_l, test_l]
splits = [train_IDs, dev_IDs, test_IDs]
for i, IDs in enumerate(splits):
positive = np.sum(label_sampler(all_labels[i], IDs))
n = len(IDs)
print('{} out of {}'.format(positive,n))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment