Skip to content

Instantly share code, notes, and snippets.

@aphearin
Created January 20, 2016 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aphearin/1a125a7ee6cd370740ef to your computer and use it in GitHub Desktop.
Save aphearin/1a125a7ee6cd370740ef to your computer and use it in GitHub Desktop.
Demonstration of a group aggregation calculation using Numpy and Astropy Tables
import numpy as np
from astropy.table import Table
############################################
# First we create some fake data
# We'll make a galaxy group catalog
# using a million galaxies so that the calculation
# would be very expensive to do naively
Ngals = 1e6
Ngroups = 3e5
galid = np.arange(Ngals, dtype=int)
np.random.shuffle(galid)
mstar = np.random.random(Ngals)
sfr = np.random.random(Ngals)
# The groupid is just the galid for the first Ngroups galaxies
# Those will be our central galaxies
# The remaining galaxies are assigned to a random group
groupid = np.zeros(Ngals, dtype=int)
groupid[0:Ngroups] = galid[0:Ngroups]
groupid[Ngroups:] = np.random.choice(galid[0:Ngroups], size=len(groupid[Ngroups:]))
# Bundle the result into a table
galtable = Table({'galid': galid, 'groupid': groupid, 'mstar': mstar, 'sfr': sfr})
############################################
# At this point, we now have our fake data. The aggregation algorithm now proceeds.
galtable.sort('groupid')
group_ids, group_indices, group_richness = np.unique(
galtable['groupid'].data, return_index = True, return_counts = True)
# Using .data in the above line is important since looping over an Astropy Column is
# orders of magnitude slower than looping over the data in the memory buffer
temp_arr = np.zeros(len(galtable))
mstar_buffer = galtable['mstar'].data
sfr_buffer = galtable['sfr'].data
for igroup, groupid in enumerate(group_ids):
first_igroup_idx = group_indices[igroup]
last_igroup_idx = first_igroup_idx + group_richness[igroup]
mstar_igroup = mstar_buffer[first_igroup_idx:last_igroup_idx]
sfr_igroup = sfr_buffer[first_igroup_idx:last_igroup_idx]
igroup_result = np.mean(mstar_igroup*sfr_igroup)
temp_arr[first_igroup_idx:last_igroup_idx] = igroup_result
galtable['mstar_weighted_sfr'] = temp_arr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment