Skip to content

Instantly share code, notes, and snippets.

@willettk
Created September 11, 2013 00:26
Show Gist options
  • Save willettk/6517775 to your computer and use it in GitHub Desktop.
Save willettk/6517775 to your computer and use it in GitHub Desktop.
Quick Python code to collate the GZ: Quench data. Takes ~15-20 minutes to run on MacBookPro laptop.
import re
import csv
import numpy as np
import operator
from astropy.io import fits as pyfits
'''
To create the full collated data for GZ: Quench:
>>> import quench_collate as qc
>>> listcoll = qc.collate_answers()
>>> qc.write_json(listcoll)
>>> qc.write_csv(listcoll)
- Kyle Willett (willett@physics.umn.edu)
29 Aug 2013 - corrected the decision tree, moving question 8 outside question 2 requirement.
'''
quenchdir = '/Users/willettk/Astronomy/Research/GalaxyZoo/quench/'
filename = '2013-08-29_galaxy_zoo_starburst_classifications.csv'
def load_data():
f = open(quenchdir+filename,'rb')
a = f.readlines()
f.close()
return a
def data_reader():
f = open(quenchdir+filename,'rb')
reader = csv.reader(f)
headers = reader.next()
column = {}
for h in headers:
column[h] = []
for row in reader:
for h,v in zip(headers,row):
column[h].append(v)
return column
def unique_subjects():
column = data_reader()
subjects = set(column['subject_id'])
sdss = []
for sub in subjects:
sdss.append(column['sdss_id'][column['subject_id'].index(sub)])
return subjects,sdss
def unique_sdss():
column = data_reader()
sdss = set(column['sdss_id'])
subjects = []
for s in sdss:
subjects.append(column['subject_id'][column['sdss_id'].index(s)])
return subjects,sdss
def colldict():
collated = {}
collated["mini_project-0"] = {"a-0":0,"a-1":0 ,"a-2":0}
collated["mini_project-1"] = {"a-0":0,"a-1":0 ,"a-2":0}
collated["mini_project-2"] = {"a-0":0,"a-1":0}
collated["mini_project-3"] = {"a-0":0,"a-1":0}
collated["mini_project-4"] = {"a-0":0,"a-1":0}
collated["mini_project-5"] = {"a-0":0,"a-1":0}
collated["mini_project-6"] = {"a-0":0,"a-1":0 ,"a-2":0}
collated["mini_project-7"] = {"a-0":0,"a-1":0 ,"a-2":0}
collated["mini_project-8"] = {"a-0":0,"a-1":0 ,"a-2":0}
collated["mini_project-9"] = {"a-0":0,"a-1":0 ,"a-2":0 ,"a-3":0}
collated["mini_project-10"] = {"a-0":0,"a-1":0}
collated["mini_project-11"] = {"a-0":0,"a-1":0}
return collated
def collate_answers(bysdss=False):
if bysdss:
subjects,sdss = unique_sdss()
else:
subjects,sdss = unique_subjects()
rows = load_data()
listcoll = []
keys = ['mini_project-%i' % strno for strno in range(12)]
for subject_id,sdss_id in zip(subjects,sdss):
collated = colldict()
if bysdss:
keyid,keyind = sdss_id,2
else:
keyid,keyind = subject_id,1
for row in rows[1:]:
splitrow = row.split(',')
if splitrow[keyind] == subject_id:
for k,a in zip(keys,splitrow[5:]):
if a != '':
collated[k][a.split('\n')[0]] += 1
listcoll.append((subject_id,sdss_id,collated))
return listcoll
def dict_to_str(gzq_dict):
dictstr = '{"mini_project-0":%s,"mini_project-1":%s,"mini_project-2":%s,"mini_project-3":%s,"mini_project-4":%s,"mini_project-5":%s,"mini_project-6":%s,"mini_project-7":%s,"mini_project-8":%s,"mini_project-9":%s,"mini_project-10":%s,"mini_project-11":%s}' % (gzq_dict['mini_project-0'],gzq_dict['mini_project-1'],gzq_dict['mini_project-2'],gzq_dict['mini_project-3'],gzq_dict['mini_project-4'],gzq_dict['mini_project-5'],gzq_dict['mini_project-6'],gzq_dict['mini_project-7'],gzq_dict['mini_project-8'],gzq_dict['mini_project-9'],gzq_dict['mini_project-10'],gzq_dict['mini_project-11'])
dictstr_nospace = dictstr.translate(None,' ')
return dictstr_nospace
def write_json(listcoll,bysdss=False):
filestub = '_bysdss' if bysdss else '_bysubject'
writefilename = 'gzquench_consensus%s.csv' % filestub
f = open(quenchdir+writefilename,'wb')
f.write('subject_id,sdss_id,vote_total,most_common_path,votes\n')
for l in listcoll:
votetotal = np.sum(l[2]['mini_project-0'].values())
mcp = quench_tree(l[2])
splititem = (str(l[0]),str(l[1]),str(votetotal),mcp,dict_to_str(l[2])+'\n')
writeitem = '\t'.join(splititem)
f.write(writeitem)
f.close()
return None
def write_fits(listcoll,bysdss=False):
filestub = '_bysdss' if bysdss else '_bysubject'
writefilename = 'gzquench_consensus%s.fits' % filestub
subject_id = []
sdss_id = []
total_votes = []
mostcommonpath = []
t00_a00_count,t00_a01_count,t00_a02_count = [],[],[]
t00_a00_fraction,t00_a01_fraction,t00_a02_fraction = [],[],[]
t01_a00_count,t01_a01_count,t01_a02_count = [],[],[]
t01_a00_fraction,t01_a01_fraction,t01_a02_fraction = [],[],[]
t02_a00_count,t02_a01_count= [],[]
t02_a00_fraction,t02_a01_fraction= [],[]
t03_a00_count,t03_a01_count= [],[]
t03_a00_fraction,t03_a01_fraction= [],[]
t04_a00_count,t04_a01_count= [],[]
t04_a00_fraction,t04_a01_fraction= [],[]
t05_a00_count,t05_a01_count= [],[]
t05_a00_fraction,t05_a01_fraction= [],[]
t06_a00_count,t06_a01_count,t06_a02_count = [],[],[]
t06_a00_fraction,t06_a01_fraction,t06_a02_fraction = [],[],[]
t07_a00_count,t07_a01_count,t07_a02_count = [],[],[]
t07_a00_fraction,t07_a01_fraction,t07_a02_fraction = [],[],[]
t08_a00_count,t08_a01_count,t08_a02_count = [],[],[]
t08_a00_fraction,t08_a01_fraction,t08_a02_fraction = [],[],[]
t09_a00_count,t09_a01_count,t09_a02_count,t09_a03_count = [],[],[],[]
t09_a00_fraction,t09_a01_fraction,t09_a02_fraction,t09_a03_fraction = [],[],[],[]
t10_a00_count,t10_a01_count= [],[]
t10_a00_fraction,t10_a01_fraction= [],[]
t11_a00_count,t11_a01_count= [],[]
t11_a00_fraction,t11_a01_fraction= [],[]
for l in listcoll:
subject_id.append(l[0])
sdss_id.append(l[1])
total_votes.append(np.sum(l[2]['mini_project-0'].values()))
mostcommonpath.append(quench_tree(l[2]))
t00_count = np.sum(l[2]['mini_project-0'].values()).astype(float)
t01_count = np.sum(l[2]['mini_project-1'].values()).astype(float)
t02_count = np.sum(l[2]['mini_project-2'].values()).astype(float)
t03_count = np.sum(l[2]['mini_project-3'].values()).astype(float)
t04_count = np.sum(l[2]['mini_project-4'].values()).astype(float)
t05_count = np.sum(l[2]['mini_project-5'].values()).astype(float)
t06_count = np.sum(l[2]['mini_project-6'].values()).astype(float)
t07_count = np.sum(l[2]['mini_project-7'].values()).astype(float)
t08_count = np.sum(l[2]['mini_project-8'].values()).astype(float)
t09_count = np.sum(l[2]['mini_project-9'].values()).astype(float)
t10_count = np.sum(l[2]['mini_project-10'].values()).astype(float)
t11_count = np.sum(l[2]['mini_project-11'].values()).astype(float)
t00_a00_count.append(l[2]['mini_project-0']['a-0'])
t00_a01_count.append(l[2]['mini_project-0']['a-1'])
t00_a02_count.append(l[2]['mini_project-0']['a-2'])
t00_a00_fraction.append(l[2]['mini_project-0']['a-0']/t00_count if t00_count > 0. else 0.)
t00_a01_fraction.append(l[2]['mini_project-0']['a-1']/t00_count if t00_count > 0. else 0.)
t00_a02_fraction.append(l[2]['mini_project-0']['a-2']/t00_count if t00_count > 0. else 0.)
t01_a00_count.append(l[2]['mini_project-1']['a-0'])
t01_a01_count.append(l[2]['mini_project-1']['a-1'])
t01_a02_count.append(l[2]['mini_project-1']['a-2'])
t01_a00_fraction.append(l[2]['mini_project-1']['a-0']/t01_count if t01_count > 0. else 0.)
t01_a01_fraction.append(l[2]['mini_project-1']['a-1']/t01_count if t01_count > 0. else 0.)
t01_a02_fraction.append(l[2]['mini_project-1']['a-2']/t01_count if t01_count > 0. else 0.)
t02_a00_count.append(l[2]['mini_project-2']['a-0'])
t02_a01_count.append(l[2]['mini_project-2']['a-1'])
t02_a00_fraction.append(l[2]['mini_project-2']['a-0']/t02_count if t02_count > 0. else 0.)
t02_a01_fraction.append(l[2]['mini_project-2']['a-1']/t02_count if t02_count > 0. else 0.)
t03_a00_count.append(l[2]['mini_project-3']['a-0'])
t03_a01_count.append(l[2]['mini_project-3']['a-1'])
t03_a00_fraction.append(l[2]['mini_project-3']['a-0']/t03_count if t03_count > 0. else 0.)
t03_a01_fraction.append(l[2]['mini_project-3']['a-1']/t03_count if t03_count > 0. else 0.)
t04_a00_count.append(l[2]['mini_project-4']['a-0'])
t04_a01_count.append(l[2]['mini_project-4']['a-1'])
t04_a00_fraction.append(l[2]['mini_project-4']['a-0']/t04_count if t04_count > 0. else 0.)
t04_a01_fraction.append(l[2]['mini_project-4']['a-1']/t04_count if t04_count > 0. else 0.)
t05_a00_count.append(l[2]['mini_project-5']['a-0'])
t05_a01_count.append(l[2]['mini_project-5']['a-1'])
t05_a00_fraction.append(l[2]['mini_project-5']['a-0']/t05_count if t05_count > 0. else 0.)
t05_a01_fraction.append(l[2]['mini_project-5']['a-1']/t05_count if t05_count > 0. else 0.)
t06_a00_count.append(l[2]['mini_project-6']['a-0'])
t06_a01_count.append(l[2]['mini_project-6']['a-1'])
t06_a02_count.append(l[2]['mini_project-6']['a-2'])
t06_a00_fraction.append(l[2]['mini_project-6']['a-0']/t06_count if t06_count > 0. else 0.)
t06_a01_fraction.append(l[2]['mini_project-6']['a-1']/t06_count if t06_count > 0. else 0.)
t06_a02_fraction.append(l[2]['mini_project-6']['a-2']/t06_count if t06_count > 0. else 0.)
t07_a00_count.append(l[2]['mini_project-7']['a-0'])
t07_a01_count.append(l[2]['mini_project-7']['a-1'])
t07_a02_count.append(l[2]['mini_project-7']['a-2'])
t07_a00_fraction.append(l[2]['mini_project-7']['a-0']/t07_count if t07_count > 0. else 0.)
t07_a01_fraction.append(l[2]['mini_project-7']['a-1']/t07_count if t07_count > 0. else 0.)
t07_a02_fraction.append(l[2]['mini_project-7']['a-2']/t07_count if t07_count > 0. else 0.)
t08_a00_count.append(l[2]['mini_project-8']['a-0'])
t08_a01_count.append(l[2]['mini_project-8']['a-1'])
t08_a02_count.append(l[2]['mini_project-8']['a-2'])
t08_a00_fraction.append(l[2]['mini_project-8']['a-0']/t08_count if t08_count > 0. else 0.)
t08_a01_fraction.append(l[2]['mini_project-8']['a-1']/t08_count if t08_count > 0. else 0.)
t08_a02_fraction.append(l[2]['mini_project-8']['a-2']/t08_count if t08_count > 0. else 0.)
t09_a00_count.append(l[2]['mini_project-9']['a-0'])
t09_a01_count.append(l[2]['mini_project-9']['a-1'])
t09_a02_count.append(l[2]['mini_project-9']['a-2'])
t09_a03_count.append(l[2]['mini_project-9']['a-3'])
t09_a00_fraction.append(l[2]['mini_project-9']['a-0']/t09_count if t09_count > 0. else 0.)
t09_a01_fraction.append(l[2]['mini_project-9']['a-1']/t09_count if t09_count > 0. else 0.)
t09_a02_fraction.append(l[2]['mini_project-9']['a-2']/t09_count if t09_count > 0. else 0.)
t09_a03_fraction.append(l[2]['mini_project-9']['a-3']/t09_count if t09_count > 0. else 0.)
t10_a00_count.append(l[2]['mini_project-10']['a-0'])
t10_a01_count.append(l[2]['mini_project-10']['a-1'])
t10_a00_fraction.append(l[2]['mini_project-10']['a-0']/t10_count if t10_count > 0. else 0.)
t10_a01_fraction.append(l[2]['mini_project-10']['a-1']/t10_count if t10_count > 0. else 0.)
t11_a00_count.append(l[2]['mini_project-11']['a-0'])
t11_a01_count.append(l[2]['mini_project-11']['a-1'])
t11_a00_fraction.append(l[2]['mini_project-11']['a-0']/t11_count if t11_count > 0. else 0.)
t11_a01_fraction.append(l[2]['mini_project-11']['a-1']/t11_count if t11_count > 0. else 0.)
col_subject_id = pyfits.Column(name = 'subject_id', format='A24', array=subject_id)
col_sdss_id = pyfits.Column(name = 'sdss_id', format='K', array=sdss_id)
col_total_votes = pyfits.Column(name = 'total_votes', format='I4', array=total_votes)
col_mostcommonpath = pyfits.Column(name = 'most_common_path', format='A100', array=mostcommonpath)
col_t00_a00_count = pyfits.Column(name = 't00_a00_count', format='I4', array=t00_a00_count)
col_t00_a01_count = pyfits.Column(name = 't00_a01_count', format='I4', array=t00_a01_count)
col_t00_a02_count = pyfits.Column(name = 't00_a02_count', format='I4', array=t00_a02_count)
col_t00_a00_fraction = pyfits.Column(name = 't00_a00_fraction', format='E5.3', array=t00_a00_fraction)
col_t00_a01_fraction = pyfits.Column(name = 't00_a01_fraction', format='E5.3', array=t00_a01_fraction)
col_t00_a02_fraction = pyfits.Column(name = 't00_a02_fraction', format='E5.3', array=t00_a02_fraction)
col_t01_a00_count = pyfits.Column(name = 't01_a00_count', format='I4', array=t01_a00_count)
col_t01_a01_count = pyfits.Column(name = 't01_a01_count', format='I4', array=t01_a01_count)
col_t01_a02_count = pyfits.Column(name = 't01_a02_count', format='I4', array=t01_a02_count)
col_t01_a00_fraction = pyfits.Column(name = 't01_a00_fraction', format='E5.3', array=t01_a00_fraction)
col_t01_a01_fraction = pyfits.Column(name = 't01_a01_fraction', format='E5.3', array=t01_a01_fraction)
col_t01_a02_fraction = pyfits.Column(name = 't01_a02_fraction', format='E5.3', array=t01_a02_fraction)
col_t02_a00_count = pyfits.Column(name = 't02_a00_count', format='I4', array=t02_a00_count)
col_t02_a01_count = pyfits.Column(name = 't02_a01_count', format='I4', array=t02_a01_count)
col_t02_a00_fraction = pyfits.Column(name = 't02_a00_fraction', format='E5.3', array=t02_a00_fraction)
col_t02_a01_fraction = pyfits.Column(name = 't02_a01_fraction', format='E5.3', array=t02_a01_fraction)
col_t03_a00_count = pyfits.Column(name = 't03_a00_count', format='I4', array=t03_a00_count)
col_t03_a01_count = pyfits.Column(name = 't03_a01_count', format='I4', array=t03_a01_count)
col_t03_a00_fraction = pyfits.Column(name = 't03_a00_fraction', format='E5.3', array=t03_a00_fraction)
col_t03_a01_fraction = pyfits.Column(name = 't03_a01_fraction', format='E5.3', array=t03_a01_fraction)
col_t04_a00_count = pyfits.Column(name = 't04_a00_count', format='I4', array=t04_a00_count)
col_t04_a01_count = pyfits.Column(name = 't04_a01_count', format='I4', array=t04_a01_count)
col_t04_a00_fraction = pyfits.Column(name = 't04_a00_fraction', format='E5.3', array=t04_a00_fraction)
col_t04_a01_fraction = pyfits.Column(name = 't04_a01_fraction', format='E5.3', array=t04_a01_fraction)
col_t05_a00_count = pyfits.Column(name = 't05_a00_count', format='I4', array=t05_a00_count)
col_t05_a01_count = pyfits.Column(name = 't05_a01_count', format='I4', array=t05_a01_count)
col_t05_a00_fraction = pyfits.Column(name = 't05_a00_fraction', format='E5.3', array=t05_a00_fraction)
col_t05_a01_fraction = pyfits.Column(name = 't05_a01_fraction', format='E5.3', array=t05_a01_fraction)
col_t06_a00_count = pyfits.Column(name = 't06_a00_count', format='I4', array=t06_a00_count)
col_t06_a01_count = pyfits.Column(name = 't06_a01_count', format='I4', array=t06_a01_count)
col_t06_a02_count = pyfits.Column(name = 't06_a02_count', format='I4', array=t06_a02_count)
col_t06_a00_fraction = pyfits.Column(name = 't06_a00_fraction', format='E5.3', array=t06_a00_fraction)
col_t06_a01_fraction = pyfits.Column(name = 't06_a01_fraction', format='E5.3', array=t06_a01_fraction)
col_t06_a02_fraction = pyfits.Column(name = 't06_a02_fraction', format='E5.3', array=t06_a02_fraction)
col_t07_a00_count = pyfits.Column(name = 't07_a00_count', format='I4', array=t07_a00_count)
col_t07_a01_count = pyfits.Column(name = 't07_a01_count', format='I4', array=t07_a01_count)
col_t07_a02_count = pyfits.Column(name = 't07_a02_count', format='I4', array=t07_a02_count)
col_t07_a00_fraction = pyfits.Column(name = 't07_a00_fraction', format='E5.3', array=t07_a00_fraction)
col_t07_a01_fraction = pyfits.Column(name = 't07_a01_fraction', format='E5.3', array=t07_a01_fraction)
col_t07_a02_fraction = pyfits.Column(name = 't07_a02_fraction', format='E5.3', array=t07_a02_fraction)
col_t08_a00_count = pyfits.Column(name = 't08_a00_count', format='I4', array=t08_a00_count)
col_t08_a01_count = pyfits.Column(name = 't08_a01_count', format='I4', array=t08_a01_count)
col_t08_a02_count = pyfits.Column(name = 't08_a02_count', format='I4', array=t08_a02_count)
col_t08_a00_fraction = pyfits.Column(name = 't08_a00_fraction', format='E5.3', array=t08_a00_fraction)
col_t08_a01_fraction = pyfits.Column(name = 't08_a01_fraction', format='E5.3', array=t08_a01_fraction)
col_t08_a02_fraction = pyfits.Column(name = 't08_a02_fraction', format='E5.3', array=t08_a02_fraction)
col_t09_a00_count = pyfits.Column(name = 't09_a00_count', format='I4', array=t09_a00_count)
col_t09_a01_count = pyfits.Column(name = 't09_a01_count', format='I4', array=t09_a01_count)
col_t09_a02_count = pyfits.Column(name = 't09_a02_count', format='I4', array=t09_a02_count)
col_t09_a03_count = pyfits.Column(name = 't09_a03_count', format='I4', array=t09_a03_count)
col_t09_a00_fraction = pyfits.Column(name = 't09_a00_fraction', format='E5.3', array=t09_a00_fraction)
col_t09_a01_fraction = pyfits.Column(name = 't09_a01_fraction', format='E5.3', array=t09_a01_fraction)
col_t09_a02_fraction = pyfits.Column(name = 't09_a02_fraction', format='E5.3', array=t09_a02_fraction)
col_t09_a03_fraction = pyfits.Column(name = 't09_a03_fraction', format='E5.3', array=t09_a03_fraction)
col_t10_a00_count = pyfits.Column(name = 't10_a00_count', format='I4', array=t10_a00_count)
col_t10_a01_count = pyfits.Column(name = 't10_a01_count', format='I4', array=t10_a01_count)
col_t10_a00_fraction = pyfits.Column(name = 't10_a00_fraction', format='E5.3', array=t10_a00_fraction)
col_t10_a01_fraction = pyfits.Column(name = 't10_a01_fraction', format='E5.3', array=t10_a01_fraction)
col_t11_a00_count = pyfits.Column(name = 't11_a00_count', format='I4', array=t11_a00_count)
col_t11_a01_count = pyfits.Column(name = 't11_a01_count', format='I4', array=t11_a01_count)
col_t11_a00_fraction = pyfits.Column(name = 't11_a00_fraction', format='E5.3', array=t11_a00_fraction)
col_t11_a01_fraction = pyfits.Column(name = 't11_a01_fraction', format='E5.3', array=t11_a01_fraction)
primary_hdu = pyfits.PrimaryHDU()
hdulist = pyfits.HDUList([primary_hdu])
tb1_hdu = pyfits.new_table([\
col_subject_id,
col_sdss_id,
col_total_votes,
col_mostcommonpath,
col_t00_a00_count,
col_t00_a01_count,
col_t00_a02_count,
col_t00_a00_fraction,
col_t00_a01_fraction,
col_t00_a02_fraction,
col_t01_a00_count,
col_t01_a01_count,
col_t01_a02_count,
col_t01_a00_fraction,
col_t01_a01_fraction,
col_t01_a02_fraction,
col_t02_a00_count,
col_t02_a01_count,
col_t02_a00_fraction,
col_t02_a01_fraction,
col_t03_a00_count,
col_t03_a01_count,
col_t03_a00_fraction,
col_t03_a01_fraction,
col_t04_a00_count,
col_t04_a01_count,
col_t04_a00_fraction,
col_t04_a01_fraction,
col_t05_a00_count,
col_t05_a01_count,
col_t05_a00_fraction,
col_t05_a01_fraction,
col_t06_a00_count,
col_t06_a01_count,
col_t06_a02_count,
col_t06_a00_fraction,
col_t06_a01_fraction,
col_t06_a02_fraction,
col_t07_a00_count,
col_t07_a01_count,
col_t07_a02_count,
col_t07_a00_fraction,
col_t07_a01_fraction,
col_t07_a02_fraction,
col_t08_a00_count,
col_t08_a01_count,
col_t08_a02_count,
col_t08_a00_fraction,
col_t08_a01_fraction,
col_t08_a02_fraction,
col_t09_a00_count,
col_t09_a01_count,
col_t09_a02_count,
col_t09_a03_count,
col_t09_a00_fraction,
col_t09_a01_fraction,
col_t09_a02_fraction,
col_t09_a03_fraction,
col_t10_a00_count,
col_t10_a01_count,
col_t10_a00_fraction,
col_t10_a01_fraction,
col_t11_a00_count,
col_t11_a01_count,
col_t11_a00_fraction,
col_t11_a01_fraction,])
tb1_hdu.name = 'GZQUENCH'
hdulist.append(tb1_hdu)
hdulist.writeto(quenchdir+writefilename,clobber=True)
return None
def max_item(jdict):
mi = max(jdict.iteritems(), key=operator.itemgetter(1))[0]
return mi
def quench_tree(gal):
keys = gal.keys()
assert 'mini_project-0' in keys, \
'Cannot find mini_project-0 in keys'
char = ''
# First answer
s0_max = max_item(gal['mini_project-0'])
char += 's0%s;' % re.sub('-','',str(s0_max))
# Star/artifact
if s0_max != 'a-2':
# Smooth galaxies
if s0_max == 'a-0':
s1_max = max_item(gal['mini_project-1'])
char += 's1%s;' % re.sub('-','',str(s1_max))
# Features/disk
if s0_max == 'a-1':
s2_max = max_item(gal['mini_project-2'])
char += 's2%s;' % re.sub('-','',str(s2_max))
# Edge-on disk
if s2_max == 'a-0':
# Edge on bulge
s3_max = max_item(gal['mini_project-8'])
char += 's3%s;' % re.sub('-','',str(s3_max))
# Not edge-on disk
else:
# Bar
s4_max = max_item(gal['mini_project-4'])
char += 's4%s;' % re.sub('-','',str(s4_max))
# Spiral
s5_max = max_item(gal['mini_project-5'])
char += 's5%s;' % re.sub('-','',str(s5_max))
if s5_max == 'a-0':
# Arm tightness
s6_max = max_item(gal['mini_project-6'])
char += 's6%s;' % re.sub('-','',str(s6_max))
# Bulge prominence
s7_max = max_item(gal['mini_project-7'])
char += 's7%s;' % re.sub('-','',str(s7_max))
# Off-center bright clumps?
s8_max = max_item(gal['mini_project-8'])
char += 's8%s;' % re.sub('-','',str(s8_max))
# Merging or tidal debris?
s9_max = max_item(gal['mini_project-9'])
char += 's9%s;' % re.sub('-','',str(s9_max))
# Symmetrical?
s10_max = max_item(gal['mini_project-10'])
char += 's10%s;' % re.sub('-','',str(s10_max))
# Discuss object?
s11_max = max_item(gal['mini_project-11'])
char += 's11%s;' % re.sub('-','',str(s11_max))
return char
def find_duplicates():
cols = data_reader()
subjects,sdss = unique_subjects()
for s in subjects:
smatch=[]
for idx,si in enumerate(cols['subject_id']):
if si == s:
smatch.append(idx)
sdss_ids = [cols['sdss_id'][i] for i in smatch]
if len(set(sdss_ids)) > 1:
print s,set(sdss_ids)
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment