Skip to content

Instantly share code, notes, and snippets.

@sminot
Created April 4, 2022 19:48
Show Gist options
  • Save sminot/1294d4f93c3b4e45f1de56ab95d9a8ce to your computer and use it in GitHub Desktop.
Save sminot/1294d4f93c3b4e45f1de56ab95d9a8ce to your computer and use it in GitHub Desktop.
Extract the gene annotation table from the geneshot results HDF5
#!/usr/bin/env python3
import click
import os
import pandas as pd
# Set up the command line processor
@click.command()
@click.option("--input_fp", help="Provide the path to the geneshot output file containing all gene abundances (*.results.hdf5)")
def geneshot_extract_gene_annot(input_fp):
assert input_fp is not None, "Must provide path to the geneshot output file containing all gene abundances (*.results.hdf5)"
assert os.path.exists(input_fp), f"Cannot file file: {input_fp}"
assert input_fp.endswith(".results.hdf5"), f"Input file must be named *.results.hdf5"
print(f"Reading {input_fp}")
# Open the file
with pd.HDFStore(input_fp, "r") as store:
# Write out the gene annotation table
pd.read_hdf(store, "/annot/gene/all").to_csv(input_fp.replace(".results.hdf5", '') + ".gene_annot.csv", index=None)
if __name__ == "__main__":
geneshot_extract_gene_annot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment