-
-
Save bradenmacdonald/0b6454a28c65fe307e63 to your computer and use it in GitHub Desktop.
Quantified comparisons between two or more data cubes, using astrodendro
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf8 | |
""" | |
Quantified comparisons between two or more data cubes, using dendrograms. | |
Copyright 2012 Braden MacDonald. | |
Re-use for any purpose but leave my name in this source file. No warranty. | |
""" | |
from __future__ import division | |
import numpy as np | |
import sys | |
import os | |
import math | |
import subprocess | |
from astrodendro import Dendrogram | |
from astrodendro.components import Branch, Leaf | |
from astrocube import DataCube | |
from scipy.stats.stats import nanmean | |
############# Set up parameters ############ | |
sys.setrecursionlimit(20000) | |
min_flux = 0.5 | |
min_npix = 40 | |
min_delta = 0.20 | |
STAT_RESOLUTION = 0.1 # Flux increment per step out of total flux range of 0 to 10 | |
NORM_FMAX = 10.0 # Normalize maximum flux to this value | |
def get_cube(name, add_noise=0): | |
" Load and normalize a data cube. Use add_noise=0.6 to make it noisy." | |
cube_path = name | |
# Normalize all cubes t have peak flux value of 10 | |
use_noise = False | |
data = DataCube(cube_path, calc_noise_dev=False).data | |
data = data * (NORM_FMAX/np.nanmax(data)) # Normalize | |
if add_noise > 0: | |
data = data + np.random.normal(scale=add_noise, size=data.shape) | |
data = data * (NORM_FMAX/np.nanmax(data)) # Re-normalize, since noise will have changed the max | |
return data | |
def get_dendrogram(cube_name, add_noise=0): | |
cube_data = get_cube(cube_name, add_noise) | |
d = Dendrogram(cube_data, minimum_flux=min_flux, minimum_npix=min_npix, minimum_delta=min_delta, verbose=True) | |
return d | |
def get_stats(d): | |
" Generate various statistics of a dendrogram " | |
root = d.main_branch | |
if type(root) is not Branch: | |
return None # That dendrogram is not complex enough; all useful structure has been filtered out. | |
def nans(length, dtype=float): | |
"Create an array of nans with length length" | |
a = np.empty((length,), dtype) | |
a.fill(np.nan) | |
return a | |
res = NORM_FMAX//STAT_RESOLUTION+1 # number of entries we'll need in our statistics arrays | |
stats = {'NVF': nans(res), 'EVF': nans(res), 'HVF': nans(res)} # n(items) vs. flux, eccentricity vs. flux, mean Height vs. flux | |
all_items = root.descendants | |
nitems = len(all_items) | |
max_eccentricity = max([item.eccentricity for item in all_items]) | |
f = STAT_RESOLUTION # Start just above zero | |
idx = 0 # Integer flux coordinate, which we will use to index the stats arrays, since 'f' is a float and thus cannot be used as an index | |
while f <= NORM_FMAX: | |
matching_items = [item for item in all_items if item.fmax > f and (item.parent is None or item.parent.merge_level < f)] | |
if matching_items: | |
# Num items vs. flux: | |
stats['NVF'][idx] = len(matching_items) # note: this needs to be normalized! | |
# Eccentricity vs. flux: | |
eccentricities = np.array([item.eccentricity for item in matching_items], dtype=np.int) | |
stats['EVF'][idx] = (np.mean(eccentricities) / max_eccentricity) | |
# mean Height vs. flux: | |
heights = np.array([ item.height for item in matching_items]) | |
stats['HVF'][idx] = np.mean(heights)/NORM_FMAX | |
f += STAT_RESOLUTION | |
idx += 1 | |
# Normalize NVF: | |
stats['NVF'] = stats['NVF'] / np.nanmax(stats['NVF']) | |
return stats # stats is a dictionary, where each value is a list of numbers between 0.0 and 1.0, indexed by flux | |
if __name__ == '__main__': | |
if len(sys.argv) < 3: | |
exit("Usage: base_fits_file fits_file2 [fits_file3...]") | |
cube_names = sys.argv[2:] | |
base_cube = sys.argv[1] | |
print("Generating dendrogram for {}:".format(base_cube)) | |
d1 = get_dendrogram(base_cube) | |
stats1 = get_stats(d1) | |
if stats1 is None: | |
sys.exit("That dendrogram is not complex enough; all useful structure has been filtered out.") | |
summaries = [] # list of (cube name str, summary str, total difference value) | |
for cube_name in cube_names: | |
print("Creating dendrogram for {}".format(cube_name)) | |
d2 = get_dendrogram(cube_name) | |
stats2 = get_stats(d2) | |
if stats2 is None: | |
print("That dendrogram is not complex enough; all useful structure has been filtered out.") | |
continue | |
# Print some comparisons | |
def diff(list1, list2): | |
" Returns the mean difference between two statistics, given two lists, one for each cube " | |
tuples = zip(list1, list2) # Each tuple in the result is e.g. (e1, e2), i.e. contains the mean eccentricities from each dendrogram at a given flux level | |
diffs = [abs(e2-e1) for e1,e2 in tuples if (not np.isnan(e1) and not np.isnan(e2))] | |
return np.mean(diffs)*100 | |
nvf_diff = diff(stats1['NVF'], stats2['NVF']) | |
evf_diff = diff(stats1['EVF'], stats2['EVF']) | |
hvf_diff = diff(stats1['HVF'], stats2['HVF']) | |
total = nvf_diff+evf_diff+hvf_diff | |
summary = "NVF {: >5.2f} EVF {: >5.2f} HVF {: >5.2f} Total {: >4.1f}".format(nvf_diff, evf_diff, hvf_diff, total) | |
summaries.append( (cube_name, summary, total) ) | |
print("Comparing {} with {}:".format(base_cube, cube_name)) | |
print(summary) | |
print(" ") | |
print("Summary: Comparing with {}".format(base_cube)) | |
summaries.sort(key=lambda t: t[2]) | |
for (cube_name, summary, total) in summaries: | |
print("{: >16}: {}".format(cube_name[:16],summary)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment