gautham20/equal_group_sampling.py

## equal_group_sampling.py
# Stratified sampling of data in such way that the distribution of the grouped column in the sample
# is almost same as in original data

def group_sampler(group_data, total_df_len, n_samples):
    return group_data.sample(n=int(np.ceil((len(group_data)/ total_df_len)*n_samples)))

group_sampler_200 = partial(group_sampler, total_df_len=len(filtered_cells), n_samples=200)

filtered_200_cells = filtered_cells.groupby('group_column', as_index=False).apply(cell_group_sampler_200)
	# Stratified sampling of data in such way that the distribution of the grouped column in the sample
	# is almost same as in original data

	def group_sampler(group_data, total_df_len, n_samples):
	return group_data.sample(n=int(np.ceil((len(group_data)/ total_df_len)*n_samples)))

	group_sampler_200 = partial(group_sampler, total_df_len=len(filtered_cells), n_samples=200)

	filtered_200_cells = filtered_cells.groupby('group_column', as_index=False).apply(cell_group_sampler_200)