Created
September 11, 2013 00:26
-
-
Save willettk/6517775 to your computer and use it in GitHub Desktop.
Quick Python code to collate the GZ: Quench data. Takes ~15-20 minutes to run on MacBookPro laptop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import csv | |
import numpy as np | |
import operator | |
from astropy.io import fits as pyfits | |
''' | |
To create the full collated data for GZ: Quench: | |
>>> import quench_collate as qc | |
>>> listcoll = qc.collate_answers() | |
>>> qc.write_json(listcoll) | |
>>> qc.write_csv(listcoll) | |
- Kyle Willett (willett@physics.umn.edu) | |
29 Aug 2013 - corrected the decision tree, moving question 8 outside question 2 requirement. | |
''' | |
quenchdir = '/Users/willettk/Astronomy/Research/GalaxyZoo/quench/' | |
filename = '2013-08-29_galaxy_zoo_starburst_classifications.csv' | |
def load_data(): | |
f = open(quenchdir+filename,'rb') | |
a = f.readlines() | |
f.close() | |
return a | |
def data_reader(): | |
f = open(quenchdir+filename,'rb') | |
reader = csv.reader(f) | |
headers = reader.next() | |
column = {} | |
for h in headers: | |
column[h] = [] | |
for row in reader: | |
for h,v in zip(headers,row): | |
column[h].append(v) | |
return column | |
def unique_subjects(): | |
column = data_reader() | |
subjects = set(column['subject_id']) | |
sdss = [] | |
for sub in subjects: | |
sdss.append(column['sdss_id'][column['subject_id'].index(sub)]) | |
return subjects,sdss | |
def unique_sdss(): | |
column = data_reader() | |
sdss = set(column['sdss_id']) | |
subjects = [] | |
for s in sdss: | |
subjects.append(column['subject_id'][column['sdss_id'].index(s)]) | |
return subjects,sdss | |
def colldict(): | |
collated = {} | |
collated["mini_project-0"] = {"a-0":0,"a-1":0 ,"a-2":0} | |
collated["mini_project-1"] = {"a-0":0,"a-1":0 ,"a-2":0} | |
collated["mini_project-2"] = {"a-0":0,"a-1":0} | |
collated["mini_project-3"] = {"a-0":0,"a-1":0} | |
collated["mini_project-4"] = {"a-0":0,"a-1":0} | |
collated["mini_project-5"] = {"a-0":0,"a-1":0} | |
collated["mini_project-6"] = {"a-0":0,"a-1":0 ,"a-2":0} | |
collated["mini_project-7"] = {"a-0":0,"a-1":0 ,"a-2":0} | |
collated["mini_project-8"] = {"a-0":0,"a-1":0 ,"a-2":0} | |
collated["mini_project-9"] = {"a-0":0,"a-1":0 ,"a-2":0 ,"a-3":0} | |
collated["mini_project-10"] = {"a-0":0,"a-1":0} | |
collated["mini_project-11"] = {"a-0":0,"a-1":0} | |
return collated | |
def collate_answers(bysdss=False): | |
if bysdss: | |
subjects,sdss = unique_sdss() | |
else: | |
subjects,sdss = unique_subjects() | |
rows = load_data() | |
listcoll = [] | |
keys = ['mini_project-%i' % strno for strno in range(12)] | |
for subject_id,sdss_id in zip(subjects,sdss): | |
collated = colldict() | |
if bysdss: | |
keyid,keyind = sdss_id,2 | |
else: | |
keyid,keyind = subject_id,1 | |
for row in rows[1:]: | |
splitrow = row.split(',') | |
if splitrow[keyind] == subject_id: | |
for k,a in zip(keys,splitrow[5:]): | |
if a != '': | |
collated[k][a.split('\n')[0]] += 1 | |
listcoll.append((subject_id,sdss_id,collated)) | |
return listcoll | |
def dict_to_str(gzq_dict): | |
dictstr = '{"mini_project-0":%s,"mini_project-1":%s,"mini_project-2":%s,"mini_project-3":%s,"mini_project-4":%s,"mini_project-5":%s,"mini_project-6":%s,"mini_project-7":%s,"mini_project-8":%s,"mini_project-9":%s,"mini_project-10":%s,"mini_project-11":%s}' % (gzq_dict['mini_project-0'],gzq_dict['mini_project-1'],gzq_dict['mini_project-2'],gzq_dict['mini_project-3'],gzq_dict['mini_project-4'],gzq_dict['mini_project-5'],gzq_dict['mini_project-6'],gzq_dict['mini_project-7'],gzq_dict['mini_project-8'],gzq_dict['mini_project-9'],gzq_dict['mini_project-10'],gzq_dict['mini_project-11']) | |
dictstr_nospace = dictstr.translate(None,' ') | |
return dictstr_nospace | |
def write_json(listcoll,bysdss=False): | |
filestub = '_bysdss' if bysdss else '_bysubject' | |
writefilename = 'gzquench_consensus%s.csv' % filestub | |
f = open(quenchdir+writefilename,'wb') | |
f.write('subject_id,sdss_id,vote_total,most_common_path,votes\n') | |
for l in listcoll: | |
votetotal = np.sum(l[2]['mini_project-0'].values()) | |
mcp = quench_tree(l[2]) | |
splititem = (str(l[0]),str(l[1]),str(votetotal),mcp,dict_to_str(l[2])+'\n') | |
writeitem = '\t'.join(splititem) | |
f.write(writeitem) | |
f.close() | |
return None | |
def write_fits(listcoll,bysdss=False): | |
filestub = '_bysdss' if bysdss else '_bysubject' | |
writefilename = 'gzquench_consensus%s.fits' % filestub | |
subject_id = [] | |
sdss_id = [] | |
total_votes = [] | |
mostcommonpath = [] | |
t00_a00_count,t00_a01_count,t00_a02_count = [],[],[] | |
t00_a00_fraction,t00_a01_fraction,t00_a02_fraction = [],[],[] | |
t01_a00_count,t01_a01_count,t01_a02_count = [],[],[] | |
t01_a00_fraction,t01_a01_fraction,t01_a02_fraction = [],[],[] | |
t02_a00_count,t02_a01_count= [],[] | |
t02_a00_fraction,t02_a01_fraction= [],[] | |
t03_a00_count,t03_a01_count= [],[] | |
t03_a00_fraction,t03_a01_fraction= [],[] | |
t04_a00_count,t04_a01_count= [],[] | |
t04_a00_fraction,t04_a01_fraction= [],[] | |
t05_a00_count,t05_a01_count= [],[] | |
t05_a00_fraction,t05_a01_fraction= [],[] | |
t06_a00_count,t06_a01_count,t06_a02_count = [],[],[] | |
t06_a00_fraction,t06_a01_fraction,t06_a02_fraction = [],[],[] | |
t07_a00_count,t07_a01_count,t07_a02_count = [],[],[] | |
t07_a00_fraction,t07_a01_fraction,t07_a02_fraction = [],[],[] | |
t08_a00_count,t08_a01_count,t08_a02_count = [],[],[] | |
t08_a00_fraction,t08_a01_fraction,t08_a02_fraction = [],[],[] | |
t09_a00_count,t09_a01_count,t09_a02_count,t09_a03_count = [],[],[],[] | |
t09_a00_fraction,t09_a01_fraction,t09_a02_fraction,t09_a03_fraction = [],[],[],[] | |
t10_a00_count,t10_a01_count= [],[] | |
t10_a00_fraction,t10_a01_fraction= [],[] | |
t11_a00_count,t11_a01_count= [],[] | |
t11_a00_fraction,t11_a01_fraction= [],[] | |
for l in listcoll: | |
subject_id.append(l[0]) | |
sdss_id.append(l[1]) | |
total_votes.append(np.sum(l[2]['mini_project-0'].values())) | |
mostcommonpath.append(quench_tree(l[2])) | |
t00_count = np.sum(l[2]['mini_project-0'].values()).astype(float) | |
t01_count = np.sum(l[2]['mini_project-1'].values()).astype(float) | |
t02_count = np.sum(l[2]['mini_project-2'].values()).astype(float) | |
t03_count = np.sum(l[2]['mini_project-3'].values()).astype(float) | |
t04_count = np.sum(l[2]['mini_project-4'].values()).astype(float) | |
t05_count = np.sum(l[2]['mini_project-5'].values()).astype(float) | |
t06_count = np.sum(l[2]['mini_project-6'].values()).astype(float) | |
t07_count = np.sum(l[2]['mini_project-7'].values()).astype(float) | |
t08_count = np.sum(l[2]['mini_project-8'].values()).astype(float) | |
t09_count = np.sum(l[2]['mini_project-9'].values()).astype(float) | |
t10_count = np.sum(l[2]['mini_project-10'].values()).astype(float) | |
t11_count = np.sum(l[2]['mini_project-11'].values()).astype(float) | |
t00_a00_count.append(l[2]['mini_project-0']['a-0']) | |
t00_a01_count.append(l[2]['mini_project-0']['a-1']) | |
t00_a02_count.append(l[2]['mini_project-0']['a-2']) | |
t00_a00_fraction.append(l[2]['mini_project-0']['a-0']/t00_count if t00_count > 0. else 0.) | |
t00_a01_fraction.append(l[2]['mini_project-0']['a-1']/t00_count if t00_count > 0. else 0.) | |
t00_a02_fraction.append(l[2]['mini_project-0']['a-2']/t00_count if t00_count > 0. else 0.) | |
t01_a00_count.append(l[2]['mini_project-1']['a-0']) | |
t01_a01_count.append(l[2]['mini_project-1']['a-1']) | |
t01_a02_count.append(l[2]['mini_project-1']['a-2']) | |
t01_a00_fraction.append(l[2]['mini_project-1']['a-0']/t01_count if t01_count > 0. else 0.) | |
t01_a01_fraction.append(l[2]['mini_project-1']['a-1']/t01_count if t01_count > 0. else 0.) | |
t01_a02_fraction.append(l[2]['mini_project-1']['a-2']/t01_count if t01_count > 0. else 0.) | |
t02_a00_count.append(l[2]['mini_project-2']['a-0']) | |
t02_a01_count.append(l[2]['mini_project-2']['a-1']) | |
t02_a00_fraction.append(l[2]['mini_project-2']['a-0']/t02_count if t02_count > 0. else 0.) | |
t02_a01_fraction.append(l[2]['mini_project-2']['a-1']/t02_count if t02_count > 0. else 0.) | |
t03_a00_count.append(l[2]['mini_project-3']['a-0']) | |
t03_a01_count.append(l[2]['mini_project-3']['a-1']) | |
t03_a00_fraction.append(l[2]['mini_project-3']['a-0']/t03_count if t03_count > 0. else 0.) | |
t03_a01_fraction.append(l[2]['mini_project-3']['a-1']/t03_count if t03_count > 0. else 0.) | |
t04_a00_count.append(l[2]['mini_project-4']['a-0']) | |
t04_a01_count.append(l[2]['mini_project-4']['a-1']) | |
t04_a00_fraction.append(l[2]['mini_project-4']['a-0']/t04_count if t04_count > 0. else 0.) | |
t04_a01_fraction.append(l[2]['mini_project-4']['a-1']/t04_count if t04_count > 0. else 0.) | |
t05_a00_count.append(l[2]['mini_project-5']['a-0']) | |
t05_a01_count.append(l[2]['mini_project-5']['a-1']) | |
t05_a00_fraction.append(l[2]['mini_project-5']['a-0']/t05_count if t05_count > 0. else 0.) | |
t05_a01_fraction.append(l[2]['mini_project-5']['a-1']/t05_count if t05_count > 0. else 0.) | |
t06_a00_count.append(l[2]['mini_project-6']['a-0']) | |
t06_a01_count.append(l[2]['mini_project-6']['a-1']) | |
t06_a02_count.append(l[2]['mini_project-6']['a-2']) | |
t06_a00_fraction.append(l[2]['mini_project-6']['a-0']/t06_count if t06_count > 0. else 0.) | |
t06_a01_fraction.append(l[2]['mini_project-6']['a-1']/t06_count if t06_count > 0. else 0.) | |
t06_a02_fraction.append(l[2]['mini_project-6']['a-2']/t06_count if t06_count > 0. else 0.) | |
t07_a00_count.append(l[2]['mini_project-7']['a-0']) | |
t07_a01_count.append(l[2]['mini_project-7']['a-1']) | |
t07_a02_count.append(l[2]['mini_project-7']['a-2']) | |
t07_a00_fraction.append(l[2]['mini_project-7']['a-0']/t07_count if t07_count > 0. else 0.) | |
t07_a01_fraction.append(l[2]['mini_project-7']['a-1']/t07_count if t07_count > 0. else 0.) | |
t07_a02_fraction.append(l[2]['mini_project-7']['a-2']/t07_count if t07_count > 0. else 0.) | |
t08_a00_count.append(l[2]['mini_project-8']['a-0']) | |
t08_a01_count.append(l[2]['mini_project-8']['a-1']) | |
t08_a02_count.append(l[2]['mini_project-8']['a-2']) | |
t08_a00_fraction.append(l[2]['mini_project-8']['a-0']/t08_count if t08_count > 0. else 0.) | |
t08_a01_fraction.append(l[2]['mini_project-8']['a-1']/t08_count if t08_count > 0. else 0.) | |
t08_a02_fraction.append(l[2]['mini_project-8']['a-2']/t08_count if t08_count > 0. else 0.) | |
t09_a00_count.append(l[2]['mini_project-9']['a-0']) | |
t09_a01_count.append(l[2]['mini_project-9']['a-1']) | |
t09_a02_count.append(l[2]['mini_project-9']['a-2']) | |
t09_a03_count.append(l[2]['mini_project-9']['a-3']) | |
t09_a00_fraction.append(l[2]['mini_project-9']['a-0']/t09_count if t09_count > 0. else 0.) | |
t09_a01_fraction.append(l[2]['mini_project-9']['a-1']/t09_count if t09_count > 0. else 0.) | |
t09_a02_fraction.append(l[2]['mini_project-9']['a-2']/t09_count if t09_count > 0. else 0.) | |
t09_a03_fraction.append(l[2]['mini_project-9']['a-3']/t09_count if t09_count > 0. else 0.) | |
t10_a00_count.append(l[2]['mini_project-10']['a-0']) | |
t10_a01_count.append(l[2]['mini_project-10']['a-1']) | |
t10_a00_fraction.append(l[2]['mini_project-10']['a-0']/t10_count if t10_count > 0. else 0.) | |
t10_a01_fraction.append(l[2]['mini_project-10']['a-1']/t10_count if t10_count > 0. else 0.) | |
t11_a00_count.append(l[2]['mini_project-11']['a-0']) | |
t11_a01_count.append(l[2]['mini_project-11']['a-1']) | |
t11_a00_fraction.append(l[2]['mini_project-11']['a-0']/t11_count if t11_count > 0. else 0.) | |
t11_a01_fraction.append(l[2]['mini_project-11']['a-1']/t11_count if t11_count > 0. else 0.) | |
col_subject_id = pyfits.Column(name = 'subject_id', format='A24', array=subject_id) | |
col_sdss_id = pyfits.Column(name = 'sdss_id', format='K', array=sdss_id) | |
col_total_votes = pyfits.Column(name = 'total_votes', format='I4', array=total_votes) | |
col_mostcommonpath = pyfits.Column(name = 'most_common_path', format='A100', array=mostcommonpath) | |
col_t00_a00_count = pyfits.Column(name = 't00_a00_count', format='I4', array=t00_a00_count) | |
col_t00_a01_count = pyfits.Column(name = 't00_a01_count', format='I4', array=t00_a01_count) | |
col_t00_a02_count = pyfits.Column(name = 't00_a02_count', format='I4', array=t00_a02_count) | |
col_t00_a00_fraction = pyfits.Column(name = 't00_a00_fraction', format='E5.3', array=t00_a00_fraction) | |
col_t00_a01_fraction = pyfits.Column(name = 't00_a01_fraction', format='E5.3', array=t00_a01_fraction) | |
col_t00_a02_fraction = pyfits.Column(name = 't00_a02_fraction', format='E5.3', array=t00_a02_fraction) | |
col_t01_a00_count = pyfits.Column(name = 't01_a00_count', format='I4', array=t01_a00_count) | |
col_t01_a01_count = pyfits.Column(name = 't01_a01_count', format='I4', array=t01_a01_count) | |
col_t01_a02_count = pyfits.Column(name = 't01_a02_count', format='I4', array=t01_a02_count) | |
col_t01_a00_fraction = pyfits.Column(name = 't01_a00_fraction', format='E5.3', array=t01_a00_fraction) | |
col_t01_a01_fraction = pyfits.Column(name = 't01_a01_fraction', format='E5.3', array=t01_a01_fraction) | |
col_t01_a02_fraction = pyfits.Column(name = 't01_a02_fraction', format='E5.3', array=t01_a02_fraction) | |
col_t02_a00_count = pyfits.Column(name = 't02_a00_count', format='I4', array=t02_a00_count) | |
col_t02_a01_count = pyfits.Column(name = 't02_a01_count', format='I4', array=t02_a01_count) | |
col_t02_a00_fraction = pyfits.Column(name = 't02_a00_fraction', format='E5.3', array=t02_a00_fraction) | |
col_t02_a01_fraction = pyfits.Column(name = 't02_a01_fraction', format='E5.3', array=t02_a01_fraction) | |
col_t03_a00_count = pyfits.Column(name = 't03_a00_count', format='I4', array=t03_a00_count) | |
col_t03_a01_count = pyfits.Column(name = 't03_a01_count', format='I4', array=t03_a01_count) | |
col_t03_a00_fraction = pyfits.Column(name = 't03_a00_fraction', format='E5.3', array=t03_a00_fraction) | |
col_t03_a01_fraction = pyfits.Column(name = 't03_a01_fraction', format='E5.3', array=t03_a01_fraction) | |
col_t04_a00_count = pyfits.Column(name = 't04_a00_count', format='I4', array=t04_a00_count) | |
col_t04_a01_count = pyfits.Column(name = 't04_a01_count', format='I4', array=t04_a01_count) | |
col_t04_a00_fraction = pyfits.Column(name = 't04_a00_fraction', format='E5.3', array=t04_a00_fraction) | |
col_t04_a01_fraction = pyfits.Column(name = 't04_a01_fraction', format='E5.3', array=t04_a01_fraction) | |
col_t05_a00_count = pyfits.Column(name = 't05_a00_count', format='I4', array=t05_a00_count) | |
col_t05_a01_count = pyfits.Column(name = 't05_a01_count', format='I4', array=t05_a01_count) | |
col_t05_a00_fraction = pyfits.Column(name = 't05_a00_fraction', format='E5.3', array=t05_a00_fraction) | |
col_t05_a01_fraction = pyfits.Column(name = 't05_a01_fraction', format='E5.3', array=t05_a01_fraction) | |
col_t06_a00_count = pyfits.Column(name = 't06_a00_count', format='I4', array=t06_a00_count) | |
col_t06_a01_count = pyfits.Column(name = 't06_a01_count', format='I4', array=t06_a01_count) | |
col_t06_a02_count = pyfits.Column(name = 't06_a02_count', format='I4', array=t06_a02_count) | |
col_t06_a00_fraction = pyfits.Column(name = 't06_a00_fraction', format='E5.3', array=t06_a00_fraction) | |
col_t06_a01_fraction = pyfits.Column(name = 't06_a01_fraction', format='E5.3', array=t06_a01_fraction) | |
col_t06_a02_fraction = pyfits.Column(name = 't06_a02_fraction', format='E5.3', array=t06_a02_fraction) | |
col_t07_a00_count = pyfits.Column(name = 't07_a00_count', format='I4', array=t07_a00_count) | |
col_t07_a01_count = pyfits.Column(name = 't07_a01_count', format='I4', array=t07_a01_count) | |
col_t07_a02_count = pyfits.Column(name = 't07_a02_count', format='I4', array=t07_a02_count) | |
col_t07_a00_fraction = pyfits.Column(name = 't07_a00_fraction', format='E5.3', array=t07_a00_fraction) | |
col_t07_a01_fraction = pyfits.Column(name = 't07_a01_fraction', format='E5.3', array=t07_a01_fraction) | |
col_t07_a02_fraction = pyfits.Column(name = 't07_a02_fraction', format='E5.3', array=t07_a02_fraction) | |
col_t08_a00_count = pyfits.Column(name = 't08_a00_count', format='I4', array=t08_a00_count) | |
col_t08_a01_count = pyfits.Column(name = 't08_a01_count', format='I4', array=t08_a01_count) | |
col_t08_a02_count = pyfits.Column(name = 't08_a02_count', format='I4', array=t08_a02_count) | |
col_t08_a00_fraction = pyfits.Column(name = 't08_a00_fraction', format='E5.3', array=t08_a00_fraction) | |
col_t08_a01_fraction = pyfits.Column(name = 't08_a01_fraction', format='E5.3', array=t08_a01_fraction) | |
col_t08_a02_fraction = pyfits.Column(name = 't08_a02_fraction', format='E5.3', array=t08_a02_fraction) | |
col_t09_a00_count = pyfits.Column(name = 't09_a00_count', format='I4', array=t09_a00_count) | |
col_t09_a01_count = pyfits.Column(name = 't09_a01_count', format='I4', array=t09_a01_count) | |
col_t09_a02_count = pyfits.Column(name = 't09_a02_count', format='I4', array=t09_a02_count) | |
col_t09_a03_count = pyfits.Column(name = 't09_a03_count', format='I4', array=t09_a03_count) | |
col_t09_a00_fraction = pyfits.Column(name = 't09_a00_fraction', format='E5.3', array=t09_a00_fraction) | |
col_t09_a01_fraction = pyfits.Column(name = 't09_a01_fraction', format='E5.3', array=t09_a01_fraction) | |
col_t09_a02_fraction = pyfits.Column(name = 't09_a02_fraction', format='E5.3', array=t09_a02_fraction) | |
col_t09_a03_fraction = pyfits.Column(name = 't09_a03_fraction', format='E5.3', array=t09_a03_fraction) | |
col_t10_a00_count = pyfits.Column(name = 't10_a00_count', format='I4', array=t10_a00_count) | |
col_t10_a01_count = pyfits.Column(name = 't10_a01_count', format='I4', array=t10_a01_count) | |
col_t10_a00_fraction = pyfits.Column(name = 't10_a00_fraction', format='E5.3', array=t10_a00_fraction) | |
col_t10_a01_fraction = pyfits.Column(name = 't10_a01_fraction', format='E5.3', array=t10_a01_fraction) | |
col_t11_a00_count = pyfits.Column(name = 't11_a00_count', format='I4', array=t11_a00_count) | |
col_t11_a01_count = pyfits.Column(name = 't11_a01_count', format='I4', array=t11_a01_count) | |
col_t11_a00_fraction = pyfits.Column(name = 't11_a00_fraction', format='E5.3', array=t11_a00_fraction) | |
col_t11_a01_fraction = pyfits.Column(name = 't11_a01_fraction', format='E5.3', array=t11_a01_fraction) | |
primary_hdu = pyfits.PrimaryHDU() | |
hdulist = pyfits.HDUList([primary_hdu]) | |
tb1_hdu = pyfits.new_table([\ | |
col_subject_id, | |
col_sdss_id, | |
col_total_votes, | |
col_mostcommonpath, | |
col_t00_a00_count, | |
col_t00_a01_count, | |
col_t00_a02_count, | |
col_t00_a00_fraction, | |
col_t00_a01_fraction, | |
col_t00_a02_fraction, | |
col_t01_a00_count, | |
col_t01_a01_count, | |
col_t01_a02_count, | |
col_t01_a00_fraction, | |
col_t01_a01_fraction, | |
col_t01_a02_fraction, | |
col_t02_a00_count, | |
col_t02_a01_count, | |
col_t02_a00_fraction, | |
col_t02_a01_fraction, | |
col_t03_a00_count, | |
col_t03_a01_count, | |
col_t03_a00_fraction, | |
col_t03_a01_fraction, | |
col_t04_a00_count, | |
col_t04_a01_count, | |
col_t04_a00_fraction, | |
col_t04_a01_fraction, | |
col_t05_a00_count, | |
col_t05_a01_count, | |
col_t05_a00_fraction, | |
col_t05_a01_fraction, | |
col_t06_a00_count, | |
col_t06_a01_count, | |
col_t06_a02_count, | |
col_t06_a00_fraction, | |
col_t06_a01_fraction, | |
col_t06_a02_fraction, | |
col_t07_a00_count, | |
col_t07_a01_count, | |
col_t07_a02_count, | |
col_t07_a00_fraction, | |
col_t07_a01_fraction, | |
col_t07_a02_fraction, | |
col_t08_a00_count, | |
col_t08_a01_count, | |
col_t08_a02_count, | |
col_t08_a00_fraction, | |
col_t08_a01_fraction, | |
col_t08_a02_fraction, | |
col_t09_a00_count, | |
col_t09_a01_count, | |
col_t09_a02_count, | |
col_t09_a03_count, | |
col_t09_a00_fraction, | |
col_t09_a01_fraction, | |
col_t09_a02_fraction, | |
col_t09_a03_fraction, | |
col_t10_a00_count, | |
col_t10_a01_count, | |
col_t10_a00_fraction, | |
col_t10_a01_fraction, | |
col_t11_a00_count, | |
col_t11_a01_count, | |
col_t11_a00_fraction, | |
col_t11_a01_fraction,]) | |
tb1_hdu.name = 'GZQUENCH' | |
hdulist.append(tb1_hdu) | |
hdulist.writeto(quenchdir+writefilename,clobber=True) | |
return None | |
def max_item(jdict): | |
mi = max(jdict.iteritems(), key=operator.itemgetter(1))[0] | |
return mi | |
def quench_tree(gal): | |
keys = gal.keys() | |
assert 'mini_project-0' in keys, \ | |
'Cannot find mini_project-0 in keys' | |
char = '' | |
# First answer | |
s0_max = max_item(gal['mini_project-0']) | |
char += 's0%s;' % re.sub('-','',str(s0_max)) | |
# Star/artifact | |
if s0_max != 'a-2': | |
# Smooth galaxies | |
if s0_max == 'a-0': | |
s1_max = max_item(gal['mini_project-1']) | |
char += 's1%s;' % re.sub('-','',str(s1_max)) | |
# Features/disk | |
if s0_max == 'a-1': | |
s2_max = max_item(gal['mini_project-2']) | |
char += 's2%s;' % re.sub('-','',str(s2_max)) | |
# Edge-on disk | |
if s2_max == 'a-0': | |
# Edge on bulge | |
s3_max = max_item(gal['mini_project-8']) | |
char += 's3%s;' % re.sub('-','',str(s3_max)) | |
# Not edge-on disk | |
else: | |
# Bar | |
s4_max = max_item(gal['mini_project-4']) | |
char += 's4%s;' % re.sub('-','',str(s4_max)) | |
# Spiral | |
s5_max = max_item(gal['mini_project-5']) | |
char += 's5%s;' % re.sub('-','',str(s5_max)) | |
if s5_max == 'a-0': | |
# Arm tightness | |
s6_max = max_item(gal['mini_project-6']) | |
char += 's6%s;' % re.sub('-','',str(s6_max)) | |
# Bulge prominence | |
s7_max = max_item(gal['mini_project-7']) | |
char += 's7%s;' % re.sub('-','',str(s7_max)) | |
# Off-center bright clumps? | |
s8_max = max_item(gal['mini_project-8']) | |
char += 's8%s;' % re.sub('-','',str(s8_max)) | |
# Merging or tidal debris? | |
s9_max = max_item(gal['mini_project-9']) | |
char += 's9%s;' % re.sub('-','',str(s9_max)) | |
# Symmetrical? | |
s10_max = max_item(gal['mini_project-10']) | |
char += 's10%s;' % re.sub('-','',str(s10_max)) | |
# Discuss object? | |
s11_max = max_item(gal['mini_project-11']) | |
char += 's11%s;' % re.sub('-','',str(s11_max)) | |
return char | |
def find_duplicates(): | |
cols = data_reader() | |
subjects,sdss = unique_subjects() | |
for s in subjects: | |
smatch=[] | |
for idx,si in enumerate(cols['subject_id']): | |
if si == s: | |
smatch.append(idx) | |
sdss_ids = [cols['sdss_id'][i] for i in smatch] | |
if len(set(sdss_ids)) > 1: | |
print s,set(sdss_ids) | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment