Skip to content

Instantly share code, notes, and snippets.

@fjaviersanchez
Created September 22, 2017 06:00
Show Gist options
  • Save fjaviersanchez/37ce1887e526fabe5758a08a142b3ac2 to your computer and use it in GitHub Desktop.
Save fjaviersanchez/37ce1887e526fabe5758a08a142b3ac2 to your computer and use it in GitHub Desktop.
import numpy as np
import dask.dataframe as dd
import desc.pserv
import desc.pserv.utils as pserv_utils
import lsst.daf.persistence
def asPandas(cat, cls=None, copy=False, unviewable="copy"):
"""!
Return a pandas dataframe view into this catalog.
@param[in] cat Input SourceCatalog object
@param[in] copy Whether to copy data from the LSST catalog to the astropy table.
Not copying is usually faster, but can keep memory from being
freed if columns are later removed from the Astropy view.
@param[in] unviewable One of the following options, indicating how to handle field types
(string and Flag) for which views cannot be constructed:
- 'copy' (default): copy only the unviewable fields.
- 'raise': raise ValueError if unviewable fields are present.
- 'skip': do not include unviewable fields in the Astropy Table.
This option is ignored if copy=True.
"""
import pandas as pd
if cls is None:
cls = pd.DataFrame
columns=dict()
if unviewable not in ("copy", "raise", "skip"):
raise ValueError("'unviewable' must be one of 'copy', 'raise', or 'skip'")
ps = cat.getMetadata()
meta = ps.toOrderedDict() if ps is not None else None
items = cat.schema.extract("*", ordered=True)
for name, item in items.items():
key = item.key
unit = item.field.getUnits() or None # use None instead of "" when empty
if key.getTypeString() == "String":
if not copy:
if unviewable == "raise":
raise ValueError("Cannot extract string unless copy=True or unviewable='copy' or 'skip'.")
elif unviewable == "skip":
continue
data = numpy.zeros(len(cat), dtype=numpy.dtype((str, key.getSize())))
for i, record in enumerate(cat):
data[i] = record.get(key)
elif key.getTypeString() == "Flag":
if not copy:
if unviewable == "raise":
raise ValueError(
"Cannot extract packed bit columns unless copy=True or unviewable='copy' or 'skip'."
)
elif unviewable == "skip":
continue
data = cat.columns.get_bool_array(key)
elif key.getTypeString() == "Angle":
data = cat.columns.get(key)
unit = "radian"
if copy:
data = data.copy()
else:
data = cat.columns.get(key)
if copy:
data = data.copy()
columns.update({name:data})
return cls(columns, copy=False)
repo_info = desc.pserv.RepositoryInfo('/global/cscratch1/sd/descdm/DC1/DC1-phoSim-3a/')
patches = repo_info.get_patches()
n=0
for tract, patch_list in patches.items():
for patch in patch_list[:100]:
if n%10==1: print 'Patch', patch
try:
df = asPandas(butler.get('deepCoadd_meas',filter='r',tract=tract,patch=patch))
calib = lsst.afw.image.Calib(butler.get('deepCoadd_md',filter='r',tract=tract,patch=patch))
calib.setThrowOnNegativeFlux(False)
df['modelfit_CModel_mag']=calib.getMagnitude(df['modelfit_CModel_flux'].values)
df['modelfit_CModel_magSigma']=calib.getMagnitude(df['modelfit_CModel_fluxSigma'].values)
df['base_PsfFlux_mag']=calib.getMagnitude(df['base_PsfFlux_flux'].values)
df['base_PsfFlux_magSigma']=calib.getMagnitude(df['base_PsfFlux_fluxSigma'].values)
if len(df)>0:
n=n+1
if n==1:
df_dask=dd.from_pandas(df,npartitions=100)
if n>1:
df_dask=dd.concat([df_dask,df],interleave_partitions=True)
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment