HanaanY/sensecheck.py

## sensecheck.py
from keras.utils.io_utils import HDF5Matrix
import random
import numpy as np

random.seed(52)

sub_sample = 0.01

data_path = '/home/naan/SnapshotSerengeti/data/SnapshotSerengeti224.h5'

train_l = HDF5Matrix(data_path, 'train_labels')
dev_l = HDF5Matrix(data_path, 'dev_labels')
test_l = HDF5Matrix(data_path, 'test_labels')

random.seed(52)

def get_sample_indices(total, frac):
    return random.sample(range(total), round(total*frac))

def label_sampler(labels, list_IDs):
    'Generates a test set sample from label IDs'
    Y = np.zeros((len(list_IDs)))
    for i, idx in enumerate(list_IDs):
        Y[i] = labels[idx]
    return Y

train_IDs = get_sample_indices(train_l.size, sub_sample)
dev_IDs = get_sample_indices(dev_l.size, sub_sample)
test_IDs = get_sample_indices(test_l.size, sub_sample)

all_labels = [train_l, dev_l, test_l]
splits = [train_IDs, dev_IDs, test_IDs]

for i, IDs in enumerate(splits):
    positive = np.sum(label_sampler(all_labels[i], IDs))
    n = len(IDs)
    print('{} out of {}'.format(positive,n))
	from keras.utils.io_utils import HDF5Matrix
	import random
	import numpy as np

	random.seed(52)

	sub_sample = 0.01

	data_path = '/home/naan/SnapshotSerengeti/data/SnapshotSerengeti224.h5'

	train_l = HDF5Matrix(data_path, 'train_labels')
	dev_l = HDF5Matrix(data_path, 'dev_labels')
	test_l = HDF5Matrix(data_path, 'test_labels')

	random.seed(52)

	def get_sample_indices(total, frac):
	return random.sample(range(total), round(total*frac))

	def label_sampler(labels, list_IDs):
	'Generates a test set sample from label IDs'
	Y = np.zeros((len(list_IDs)))
	for i, idx in enumerate(list_IDs):
	Y[i] = labels[idx]
	return Y

	train_IDs = get_sample_indices(train_l.size, sub_sample)
	dev_IDs = get_sample_indices(dev_l.size, sub_sample)
	test_IDs = get_sample_indices(test_l.size, sub_sample)

	all_labels = [train_l, dev_l, test_l]
	splits = [train_IDs, dev_IDs, test_IDs]

	for i, IDs in enumerate(splits):
	positive = np.sum(label_sampler(all_labels[i], IDs))
	n = len(IDs)
	print('{} out of {}'.format(positive,n))