fjaviersanchez/dataframe.py

## dataframe.py
import numpy as np
import dask.dataframe as dd
import desc.pserv
import desc.pserv.utils as pserv_utils
import lsst.daf.persistence


def asPandas(cat, cls=None, copy=False, unviewable="copy"):
    """!
    Return a pandas dataframe view into this catalog.

    @param[in]  cat         Input SourceCatalog object

    @param[in]  copy        Whether to copy data from the LSST catalog to the astropy table.
                            Not copying is usually faster, but can keep memory from being
                            freed if columns are later removed from the Astropy view.

    @param[in]  unviewable  One of the following options, indicating how to handle field types
                            (string and Flag) for which views cannot be constructed:
                              - 'copy' (default): copy only the unviewable fields.
                              - 'raise': raise ValueError if unviewable fields are present.
                              - 'skip': do not include unviewable fields in the Astropy Table.
                            This option is ignored if copy=True.
    """
    import pandas as pd
    if cls is None:
        cls = pd.DataFrame
    columns=dict()
    if unviewable not in ("copy", "raise", "skip"):
        raise ValueError("'unviewable' must be one of 'copy', 'raise', or 'skip'")
    ps = cat.getMetadata()
    meta = ps.toOrderedDict() if ps is not None else None
    items = cat.schema.extract("*", ordered=True)
    for name, item in items.items():
        key = item.key
        unit = item.field.getUnits() or None  # use None instead of "" when empty
        if key.getTypeString() == "String":
            if not copy:
                if unviewable == "raise":
                    raise ValueError("Cannot extract string unless copy=True or unviewable='copy' or 'skip'.")
                elif unviewable == "skip":
                    continue
            data = numpy.zeros(len(cat), dtype=numpy.dtype((str, key.getSize())))
            for i, record in enumerate(cat):
                data[i] = record.get(key)
        elif key.getTypeString() == "Flag":
            if not copy:
                if unviewable == "raise":
                    raise ValueError(
                        "Cannot extract packed bit columns unless copy=True or unviewable='copy' or 'skip'."
                    )
                elif unviewable == "skip":
                    continue
            data = cat.columns.get_bool_array(key)
        elif key.getTypeString() == "Angle":
            data = cat.columns.get(key)
            unit = "radian"
            if copy:
                data = data.copy()
        else:
            data = cat.columns.get(key)
            if copy:
                data = data.copy()
        columns.update({name:data})
    return cls(columns, copy=False)

repo_info = desc.pserv.RepositoryInfo('/global/cscratch1/sd/descdm/DC1/DC1-phoSim-3a/')
patches = repo_info.get_patches()
n=0
for tract, patch_list in patches.items():
     for patch in patch_list[:100]:
            if n%10==1: print 'Patch', patch
            try:
                df = asPandas(butler.get('deepCoadd_meas',filter='r',tract=tract,patch=patch))
                calib = lsst.afw.image.Calib(butler.get('deepCoadd_md',filter='r',tract=tract,patch=patch))
                calib.setThrowOnNegativeFlux(False)
                df['modelfit_CModel_mag']=calib.getMagnitude(df['modelfit_CModel_flux'].values)
                df['modelfit_CModel_magSigma']=calib.getMagnitude(df['modelfit_CModel_fluxSigma'].values)
                df['base_PsfFlux_mag']=calib.getMagnitude(df['base_PsfFlux_flux'].values)
                df['base_PsfFlux_magSigma']=calib.getMagnitude(df['base_PsfFlux_fluxSigma'].values)
                if len(df)>0:
                    n=n+1
                if n==1:
                    df_dask=dd.from_pandas(df,npartitions=100)
                if n>1:
                    df_dask=dd.concat([df_dask,df],interleave_partitions=True)
            except:
                pass
	import numpy as np
	import dask.dataframe as dd
	import desc.pserv
	import desc.pserv.utils as pserv_utils
	import lsst.daf.persistence


	def asPandas(cat, cls=None, copy=False, unviewable="copy"):
	"""!
	Return a pandas dataframe view into this catalog.

	@param[in] cat Input SourceCatalog object

	@param[in] copy Whether to copy data from the LSST catalog to the astropy table.
	Not copying is usually faster, but can keep memory from being
	freed if columns are later removed from the Astropy view.

	@param[in] unviewable One of the following options, indicating how to handle field types
	(string and Flag) for which views cannot be constructed:
	- 'copy' (default): copy only the unviewable fields.
	- 'raise': raise ValueError if unviewable fields are present.
	- 'skip': do not include unviewable fields in the Astropy Table.
	This option is ignored if copy=True.
	"""
	import pandas as pd
	if cls is None:
	cls = pd.DataFrame
	columns=dict()
	if unviewable not in ("copy", "raise", "skip"):
	raise ValueError("'unviewable' must be one of 'copy', 'raise', or 'skip'")
	ps = cat.getMetadata()
	meta = ps.toOrderedDict() if ps is not None else None
	items = cat.schema.extract("*", ordered=True)
	for name, item in items.items():
	key = item.key
	unit = item.field.getUnits() or None # use None instead of "" when empty
	if key.getTypeString() == "String":
	if not copy:
	if unviewable == "raise":
	raise ValueError("Cannot extract string unless copy=True or unviewable='copy' or 'skip'.")
	elif unviewable == "skip":
	continue
	data = numpy.zeros(len(cat), dtype=numpy.dtype((str, key.getSize())))
	for i, record in enumerate(cat):
	data[i] = record.get(key)
	elif key.getTypeString() == "Flag":
	if not copy:
	if unviewable == "raise":
	raise ValueError(
	"Cannot extract packed bit columns unless copy=True or unviewable='copy' or 'skip'."
	)
	elif unviewable == "skip":
	continue
	data = cat.columns.get_bool_array(key)
	elif key.getTypeString() == "Angle":
	data = cat.columns.get(key)
	unit = "radian"
	if copy:
	data = data.copy()
	else:
	data = cat.columns.get(key)
	if copy:
	data = data.copy()
	columns.update({name:data})
	return cls(columns, copy=False)

	repo_info = desc.pserv.RepositoryInfo('/global/cscratch1/sd/descdm/DC1/DC1-phoSim-3a/')
	patches = repo_info.get_patches()
	n=0
	for tract, patch_list in patches.items():
	for patch in patch_list[:100]:
	if n%10==1: print 'Patch', patch
	try:
	df = asPandas(butler.get('deepCoadd_meas',filter='r',tract=tract,patch=patch))
	calib = lsst.afw.image.Calib(butler.get('deepCoadd_md',filter='r',tract=tract,patch=patch))
	calib.setThrowOnNegativeFlux(False)
	df['modelfit_CModel_mag']=calib.getMagnitude(df['modelfit_CModel_flux'].values)
	df['modelfit_CModel_magSigma']=calib.getMagnitude(df['modelfit_CModel_fluxSigma'].values)
	df['base_PsfFlux_mag']=calib.getMagnitude(df['base_PsfFlux_flux'].values)
	df['base_PsfFlux_magSigma']=calib.getMagnitude(df['base_PsfFlux_fluxSigma'].values)
	if len(df)>0:
	n=n+1
	if n==1:
	df_dask=dd.from_pandas(df,npartitions=100)
	if n>1:
	df_dask=dd.concat([df_dask,df],interleave_partitions=True)
	except:
	pass