Created
January 20, 2016 21:02
-
-
Save aphearin/1a125a7ee6cd370740ef to your computer and use it in GitHub Desktop.
Demonstration of a group aggregation calculation using Numpy and Astropy Tables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from astropy.table import Table | |
############################################ | |
# First we create some fake data | |
# We'll make a galaxy group catalog | |
# using a million galaxies so that the calculation | |
# would be very expensive to do naively | |
Ngals = 1e6 | |
Ngroups = 3e5 | |
galid = np.arange(Ngals, dtype=int) | |
np.random.shuffle(galid) | |
mstar = np.random.random(Ngals) | |
sfr = np.random.random(Ngals) | |
# The groupid is just the galid for the first Ngroups galaxies | |
# Those will be our central galaxies | |
# The remaining galaxies are assigned to a random group | |
groupid = np.zeros(Ngals, dtype=int) | |
groupid[0:Ngroups] = galid[0:Ngroups] | |
groupid[Ngroups:] = np.random.choice(galid[0:Ngroups], size=len(groupid[Ngroups:])) | |
# Bundle the result into a table | |
galtable = Table({'galid': galid, 'groupid': groupid, 'mstar': mstar, 'sfr': sfr}) | |
############################################ | |
# At this point, we now have our fake data. The aggregation algorithm now proceeds. | |
galtable.sort('groupid') | |
group_ids, group_indices, group_richness = np.unique( | |
galtable['groupid'].data, return_index = True, return_counts = True) | |
# Using .data in the above line is important since looping over an Astropy Column is | |
# orders of magnitude slower than looping over the data in the memory buffer | |
temp_arr = np.zeros(len(galtable)) | |
mstar_buffer = galtable['mstar'].data | |
sfr_buffer = galtable['sfr'].data | |
for igroup, groupid in enumerate(group_ids): | |
first_igroup_idx = group_indices[igroup] | |
last_igroup_idx = first_igroup_idx + group_richness[igroup] | |
mstar_igroup = mstar_buffer[first_igroup_idx:last_igroup_idx] | |
sfr_igroup = sfr_buffer[first_igroup_idx:last_igroup_idx] | |
igroup_result = np.mean(mstar_igroup*sfr_igroup) | |
temp_arr[first_igroup_idx:last_igroup_idx] = igroup_result | |
galtable['mstar_weighted_sfr'] = temp_arr | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment