Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Convert ZTF Parquet lightcurve files to zort format
#!/usr/bin/env python3
import os
import pyarrow.parquet as pq
import pandas as pd
import numpy as np
def parquet_to_asciilc(inpath, outdir):
"""
Convert ZTF lightcurve Parquet dataset for one field to the pre-DR5 ascii format
Parameters:
-----------
inpath: string
path to field-level directory of Parquet files
example: './data/ZTF/lc_dr5/0/field0350/'
outdir: string
path to directory for output of ascii text file for zort
"""
ds = pq.ParquetDataset(inpath, use_legacy_dataset=False)
df = ds.read().to_pandas()
df.sort_values(['filterid', 'rcid', 'objdec'],
ascending=[False, True, True],
inplace=True)
fields = df.fieldid.unique()
assert len(fields) == 1
field = fields[0]
ramin = df.objra.min()
ramax = df.objra.max()
decmin = df.objdec.min()
decmax = df.objdec.max()
outname = os.path.join(outdir,
'field{:06d}_ra{:.5f}to{:.5f}_dec{:.5f}to{:.5f}.txt'.format(
field, ramin, ramax, decmin, decmax))
with open(outname, 'w') as fh:
for row in df.itertuples():
fh.write("# %17d %3d %1d %4d %2d %9.5f %9.5f\n" %
(row.objectid, row.nepochs, row.filterid, row.fieldid,
row.rcid, row.objra, row.objdec))
idx = np.argsort(row.hmjd)
for i in idx:
fh.write(" %13.5f %6.3f %5.3f %6.3f %5d\n" %
(row.hmjd[i], row.mag[i], row.magerr[i],
row.clrcoeff[i], row.catflags[i]))
return
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="""
Convert ZTF Parquet-format lightcurves to Zort ascii tarball
""")
parser.add_argument('inpath',
help='path to field-level directory of Parquet files')
parser.add_argument('outdir',
help='directory for output file')
args = parser.parse_args()
inpath = args.inpath
outdir = args.outdir
parquet_to_asciilc(inpath, outdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment