Last active
April 7, 2020 18:29
-
-
Save xbrianh/04635e34c005c9a867b47dda2f5c9e84 to your computer and use it in GitHub Desktop.
Output the first n lines of a block gzip file stored in a google bucket.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import io | |
import argparse | |
from contextlib import closing | |
import bgzip | |
parser = argparse.ArgumentParser() | |
parser.add_argument("cloudpath", help="location of file. Can be local or GCP bucket path (e.g. gs://key)") | |
args = parser.parse_args() | |
if args.cloudpath.startswith("gs://"): | |
from google.cloud.storage import Client | |
import gs_chunked_io as gscio | |
cloudpath = args.cloudpath.split("gs://", 1)[1] | |
bucket_name, key = cloudpath.split("/", 1) | |
blob = Client().bucket(bucket_name).get_blob(key) | |
raw = gscio.Reader(blob) | |
else: | |
raw = open(args.cloudpath, "rb") | |
with closing(raw): | |
with bgzip.BGZipReader(raw) as bgreader: | |
with io.BufferedReader(bgreader) as reader: | |
for line in reader: | |
if line.startswith(b"#"): | |
print(line.decode("ascii").strip()) | |
else: | |
break |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Print VCF header to stdout. This can be used for vcf.gz files stored either locally, or on Google cloud storage.