Skip to content

Instantly share code, notes, and snippets.

@wflynny
Created May 12, 2021 14:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wflynny/5cb18436e83ab455ccb3d82647f3cc3a to your computer and use it in GitHub Desktop.
Save wflynny/5cb18436e83ab455ccb3d82647f3cc3a to your computer and use it in GitHub Desktop.
Convert a GSE microarray to AnnData
import scanpy as sc
import pandas as pd
import GEOparse
def gse_to_adata(gse):
X = gse.pivot_samples("VALUE")
key = list(gse.gpls.keys())[0]
annotated = X.index.to_frame(index=False).merge(
gse.gpls[key].table[["ID", "Gene Symbol"]],
left_on="ID_REF", right_on="ID"
)
annotated.set_index("ID_REF", inplace=True)
annotated.dropna(subset=["Gene Symbol"], inplace=True)
annotated = annotated[~annotated["Gene Symbol"].str.contains("///")]
X = X.loc[annotated.index]
X.index = annotated["Gene Symbol"]
avg = X.median(axis=1)
expressed_probes = avg >= avg.quantile(0.25)
X = X.loc[expressed_probes]
X = X.groupby("Gene Symbol").median()
adata = sc.AnnData(
X.T.values,
obs=gse.phenotype_data,
var=X.index.to_frame()
)
return adata
if __name__ == "__main__":
g = GEOparser.get_GEO(geo="GSE51981", silent=True)
adata = gse_to_adata(g)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment