Last active
March 1, 2024 14:43
-
-
Save LouisFaure/5493bbfc6f037179b0dfe59133b6571e to your computer and use it in GitHub Desktop.
Convert dropEst velocyto output to anndata
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import warnings | |
warnings.filterwarnings("ignore") | |
import logging | |
import rpy2.robjects as robjects | |
from rpy2.robjects.packages import importr | |
from anndata2ri.scipy2ri._r2py import rmat_to_spmat | |
import anndata as ann | |
import pandas as pd | |
# Include standard modules | |
import argparse | |
help = 'Convert mtx derived from dropEst to anndata objects.' | |
parser = argparse.ArgumentParser(description=help) | |
parser.add_argument("--file", "-f", help="dropEst rds containing matrices for spliced, unspliced and spanning.") | |
parser.add_argument("--output", "-o", help="output path and file name (wihout extension).") | |
parser.add_argument("--h5ad", help="save the anndata file as h5ad (default is loom).", action='store_true') | |
logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) | |
def dropEst2adata(rds): | |
readRDS = robjects.r['readRDS'] | |
logging.info("loading rds file") | |
ccm=readRDS(rds) | |
colnames = robjects.r("colnames") | |
rownames = robjects.r("rownames") | |
names = robjects.r("names") | |
def make_adata(mat): | |
return ann.AnnData(rmat_to_spmat(ccm.rx2(mat)).T, | |
obs=pd.DataFrame(index=list(colnames(ccm.rx2(mat)))), | |
var=pd.DataFrame(index=list(rownames(ccm.rx2(mat))))) | |
logging.info("generate anndata for each matrices") | |
adatas=[make_adata(mat) for mat in list(names(ccm))] | |
logging.info("concatenating") | |
adata=ann.AnnData.concatenate(*adatas,join="outer") | |
adata=ann.AnnData(adata[adatas[0].obs_names+"-0"].X, | |
obs=adatas[0].obs, | |
var=adata.var, | |
layers={"spliced":adata[adatas[0].obs_names+"-0"].X, | |
"unspliced":adata[adatas[0].obs_names+"-1"].X, | |
"ambiguous":adata[adatas[0].obs_names+"-2"].X}) | |
return adata | |
if __name__ == "__main__": | |
args = parser.parse_args() | |
if args.file is None: | |
parser.print_help(sys.stderr) | |
sys.exit(1) | |
if args.output is not None: | |
out = args.output | |
else: | |
out = "dropest_anndata" | |
adata = dropEst2adata(args.file) | |
if args.h5ad: | |
logging.info("writing to %s.h5ad"%out) | |
adata.write_h5ad(out+".h5ad") | |
else: | |
logging.info("writing %s.loom"%out) | |
adata.write_loom(out+".loom") | |
print(adata) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment