Skip to content

Instantly share code, notes, and snippets.

@Cadair
Created March 19, 2024 16:15
Show Gist options
  • Save Cadair/f327ecfd9236daf488ee9cc1ad631a99 to your computer and use it in GitHub Desktop.
Save Cadair/f327ecfd9236daf488ee9cc1ad631a99 to your computer and use it in GitHub Desktop.
import tarfile
from pathlib import Path
import numpy as np
from sunpy.net import Fido, attrs as a
import dkist
import dkist.net
# Set this to be the parent directory of both datasets
WORKING_DIR = Path("/data/dkist/prod/pid_2_114")
# Uncomment this to run a globus transfer which will download the two datasets to the above dir
# (Assumes your Globus paths and filesystem paths are the same)
# dkist.net.transfer_complete_datasets(["BKPLX", "AJQWW"], path=WORKING_DIR/"{dataset_id}")
def make_tar(dataset, archive_dir=None, tarfilename=None, base_path=None, tile_slice=np.s_[...]):
ds = dataset
if isinstance(dataset, dkist.TiledDataset):
filenames = []
for tile in dataset.flat:
filenames += tile[tile_slice].files.filenames
# Pick one tile to use to get the base path
ds = tile
else:
filenames = dataset.files.filenames
archive_dir = archive_dir or Path(".")
tarfilename = tarfilename.format(**dataset.inventory) or f"{dataset.inventory['datasetId']}.tar"
base_path = base_path or ds.files.basepath
with tarfile.open(archive_dir / tarfilename, "w") as tf:
for fname in filenames:
tf.add(base_path / fname, arcname=fname)
if asdf_file := list(base_path.glob("*.asdf")):
tf.add(asdf_file[0], arcname=asdf_file[0].name)
# VISP
bkplx = dkist.load_dataset(WORKING_DIR / "BKPLX")
make_tar(bkplx[0], tarfilename="{datasetId}_stokesI.tar", archive_dir=Path("~").expanduser())
# VBI
ajqww = dkist.load_dataset(WORKING_DIR / "AJQWW")
make_tar(ajqww, tile_slice=np.s_[0], tarfilename="{datasetId}_single_mosaic.tar", archive_dir=Path("~").expanduser())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment