Skip to content

Instantly share code, notes, and snippets.

@bradenmacdonald
Created April 16, 2012 22:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bradenmacdonald/0b6454a28c65fe307e63 to your computer and use it in GitHub Desktop.
Save bradenmacdonald/0b6454a28c65fe307e63 to your computer and use it in GitHub Desktop.
Quantified comparisons between two or more data cubes, using astrodendro
#!/usr/bin/env python
# coding: utf8
"""
Quantified comparisons between two or more data cubes, using dendrograms.
Copyright 2012 Braden MacDonald.
Re-use for any purpose but leave my name in this source file. No warranty.
"""
from __future__ import division
import numpy as np
import sys
import os
import math
import subprocess
from astrodendro import Dendrogram
from astrodendro.components import Branch, Leaf
from astrocube import DataCube
from scipy.stats.stats import nanmean
############# Set up parameters ############
sys.setrecursionlimit(20000)
min_flux = 0.5
min_npix = 40
min_delta = 0.20
STAT_RESOLUTION = 0.1 # Flux increment per step out of total flux range of 0 to 10
NORM_FMAX = 10.0 # Normalize maximum flux to this value
def get_cube(name, add_noise=0):
" Load and normalize a data cube. Use add_noise=0.6 to make it noisy."
cube_path = name
# Normalize all cubes t have peak flux value of 10
use_noise = False
data = DataCube(cube_path, calc_noise_dev=False).data
data = data * (NORM_FMAX/np.nanmax(data)) # Normalize
if add_noise > 0:
data = data + np.random.normal(scale=add_noise, size=data.shape)
data = data * (NORM_FMAX/np.nanmax(data)) # Re-normalize, since noise will have changed the max
return data
def get_dendrogram(cube_name, add_noise=0):
cube_data = get_cube(cube_name, add_noise)
d = Dendrogram(cube_data, minimum_flux=min_flux, minimum_npix=min_npix, minimum_delta=min_delta, verbose=True)
return d
def get_stats(d):
" Generate various statistics of a dendrogram "
root = d.main_branch
if type(root) is not Branch:
return None # That dendrogram is not complex enough; all useful structure has been filtered out.
def nans(length, dtype=float):
"Create an array of nans with length length"
a = np.empty((length,), dtype)
a.fill(np.nan)
return a
res = NORM_FMAX//STAT_RESOLUTION+1 # number of entries we'll need in our statistics arrays
stats = {'NVF': nans(res), 'EVF': nans(res), 'HVF': nans(res)} # n(items) vs. flux, eccentricity vs. flux, mean Height vs. flux
all_items = root.descendants
nitems = len(all_items)
max_eccentricity = max([item.eccentricity for item in all_items])
f = STAT_RESOLUTION # Start just above zero
idx = 0 # Integer flux coordinate, which we will use to index the stats arrays, since 'f' is a float and thus cannot be used as an index
while f <= NORM_FMAX:
matching_items = [item for item in all_items if item.fmax > f and (item.parent is None or item.parent.merge_level < f)]
if matching_items:
# Num items vs. flux:
stats['NVF'][idx] = len(matching_items) # note: this needs to be normalized!
# Eccentricity vs. flux:
eccentricities = np.array([item.eccentricity for item in matching_items], dtype=np.int)
stats['EVF'][idx] = (np.mean(eccentricities) / max_eccentricity)
# mean Height vs. flux:
heights = np.array([ item.height for item in matching_items])
stats['HVF'][idx] = np.mean(heights)/NORM_FMAX
f += STAT_RESOLUTION
idx += 1
# Normalize NVF:
stats['NVF'] = stats['NVF'] / np.nanmax(stats['NVF'])
return stats # stats is a dictionary, where each value is a list of numbers between 0.0 and 1.0, indexed by flux
if __name__ == '__main__':
if len(sys.argv) < 3:
exit("Usage: base_fits_file fits_file2 [fits_file3...]")
cube_names = sys.argv[2:]
base_cube = sys.argv[1]
print("Generating dendrogram for {}:".format(base_cube))
d1 = get_dendrogram(base_cube)
stats1 = get_stats(d1)
if stats1 is None:
sys.exit("That dendrogram is not complex enough; all useful structure has been filtered out.")
summaries = [] # list of (cube name str, summary str, total difference value)
for cube_name in cube_names:
print("Creating dendrogram for {}".format(cube_name))
d2 = get_dendrogram(cube_name)
stats2 = get_stats(d2)
if stats2 is None:
print("That dendrogram is not complex enough; all useful structure has been filtered out.")
continue
# Print some comparisons
def diff(list1, list2):
" Returns the mean difference between two statistics, given two lists, one for each cube "
tuples = zip(list1, list2) # Each tuple in the result is e.g. (e1, e2), i.e. contains the mean eccentricities from each dendrogram at a given flux level
diffs = [abs(e2-e1) for e1,e2 in tuples if (not np.isnan(e1) and not np.isnan(e2))]
return np.mean(diffs)*100
nvf_diff = diff(stats1['NVF'], stats2['NVF'])
evf_diff = diff(stats1['EVF'], stats2['EVF'])
hvf_diff = diff(stats1['HVF'], stats2['HVF'])
total = nvf_diff+evf_diff+hvf_diff
summary = "NVF {: >5.2f} EVF {: >5.2f} HVF {: >5.2f} Total {: >4.1f}".format(nvf_diff, evf_diff, hvf_diff, total)
summaries.append( (cube_name, summary, total) )
print("Comparing {} with {}:".format(base_cube, cube_name))
print(summary)
print(" ")
print("Summary: Comparing with {}".format(base_cube))
summaries.sort(key=lambda t: t[2])
for (cube_name, summary, total) in summaries:
print("{: >16}: {}".format(cube_name[:16],summary))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment