aphearin/numpy_table_agg.py

## numpy_table_agg.py
import numpy as np
from astropy.table import Table

############################################
# First we create some fake data
# We'll make a galaxy group catalog
# using a million galaxies so that the calculation
# would be very expensive to do naively
Ngals = 1e6
Ngroups = 3e5

galid = np.arange(Ngals, dtype=int)
np.random.shuffle(galid)
mstar = np.random.random(Ngals)
sfr = np.random.random(Ngals)

# The groupid is just the galid for the first Ngroups galaxies
# Those will be our central galaxies
# The remaining galaxies are assigned to a random group
groupid = np.zeros(Ngals, dtype=int)
groupid[0:Ngroups] = galid[0:Ngroups]
groupid[Ngroups:] = np.random.choice(galid[0:Ngroups], size=len(groupid[Ngroups:]))

# Bundle the result into a table
galtable = Table({'galid': galid, 'groupid': groupid, 'mstar': mstar, 'sfr': sfr})

############################################
# At this point, we now have our fake data. The aggregation algorithm now proceeds.
galtable.sort('groupid')

group_ids, group_indices, group_richness = np.unique(
    galtable['groupid'].data, return_index = True, return_counts = True)
# Using .data in the above line is important since looping over an Astropy Column is
# orders of magnitude slower than looping over the data in the memory buffer

temp_arr = np.zeros(len(galtable))
mstar_buffer = galtable['mstar'].data
sfr_buffer = galtable['sfr'].data
for igroup, groupid in enumerate(group_ids):
    first_igroup_idx = group_indices[igroup]
    last_igroup_idx = first_igroup_idx + group_richness[igroup]
    mstar_igroup = mstar_buffer[first_igroup_idx:last_igroup_idx]
    sfr_igroup = sfr_buffer[first_igroup_idx:last_igroup_idx]
    igroup_result = np.mean(mstar_igroup*sfr_igroup)
    temp_arr[first_igroup_idx:last_igroup_idx] = igroup_result

galtable['mstar_weighted_sfr'] = temp_arr
	import numpy as np
	from astropy.table import Table

	############################################
	# First we create some fake data
	# We'll make a galaxy group catalog
	# using a million galaxies so that the calculation
	# would be very expensive to do naively
	Ngals = 1e6
	Ngroups = 3e5

	galid = np.arange(Ngals, dtype=int)
	np.random.shuffle(galid)
	mstar = np.random.random(Ngals)
	sfr = np.random.random(Ngals)

	# The groupid is just the galid for the first Ngroups galaxies
	# Those will be our central galaxies
	# The remaining galaxies are assigned to a random group
	groupid = np.zeros(Ngals, dtype=int)
	groupid[0:Ngroups] = galid[0:Ngroups]
	groupid[Ngroups:] = np.random.choice(galid[0:Ngroups], size=len(groupid[Ngroups:]))

	# Bundle the result into a table
	galtable = Table({'galid': galid, 'groupid': groupid, 'mstar': mstar, 'sfr': sfr})

	############################################
	# At this point, we now have our fake data. The aggregation algorithm now proceeds.
	galtable.sort('groupid')

	group_ids, group_indices, group_richness = np.unique(
	galtable['groupid'].data, return_index = True, return_counts = True)
	# Using .data in the above line is important since looping over an Astropy Column is
	# orders of magnitude slower than looping over the data in the memory buffer

	temp_arr = np.zeros(len(galtable))
	mstar_buffer = galtable['mstar'].data
	sfr_buffer = galtable['sfr'].data
	for igroup, groupid in enumerate(group_ids):
	first_igroup_idx = group_indices[igroup]
	last_igroup_idx = first_igroup_idx + group_richness[igroup]
	mstar_igroup = mstar_buffer[first_igroup_idx:last_igroup_idx]
	sfr_igroup = sfr_buffer[first_igroup_idx:last_igroup_idx]
	igroup_result = np.mean(mstar_igroup*sfr_igroup)
	temp_arr[first_igroup_idx:last_igroup_idx] = igroup_result

	galtable['mstar_weighted_sfr'] = temp_arr