Created
May 10, 2024 19:27
-
-
Save aofarrel/ac8d7bb0c24bbdd25d61aca73b94a62a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Notes: | |
# * Works for SAMN, SAME, and SAMD BioSamples (should also work for SRS/ERS format) | |
# * Grabs date and location of sample isolation, host, sample source, and strain | |
# * edirect tools needs to be on $PATH, or you can use my Docker image for a pre-installed version: https://hub.docker.com/r/ashedpotatoes/sranwrp/tags | |
# * elink is known to randomly fail so this code doesn't use it -- however, without elink, you can't get the SRA reads (SRR/ERR/DRR) that make up a BioSample | |
import subprocess | |
biosamples = ["SAMEA6451356","SAMEA6451357","SAMEA6451358","SAMEA6451360","SAMEA6451361","SAMEA6451362","SAMEA6451363","SAMEA6451366","SAMEA6451367","SAMEA6451368","SAMEA6451369","SAMEA6451370","SAMEA6451371","SAMEA6451372","SAMEA6451373","SAMEA6451374","SAMEA6451375","SAMEA6451377"] | |
outs = [] | |
for accession in biosamples: | |
esearch = subprocess.Popen(["esearch", "-db", "biosample", "-query", f"{accession}"], stdout=subprocess.PIPE) | |
esummary = subprocess.Popen(["efetch", "-format", "docsum"], stdin=esearch.stdout, stdout=subprocess.PIPE) | |
xtract = subprocess.check_output('''xtract -pattern BioSample -element -SRA "(NA)" -block Id -if Id@db -equals "SRA" -SRA Id -block Ids -first Id -element "&SRA" \ | |
-DATE "(NA)" -block Attribute -if Attribute@harmonized_name -equals "collection_date" -DATE Attribute -block Attributes -element "&DATE" \ | |
-LOC "(NA)" -block Attribute -if Attribute@harmonized_name -equals "geo_loc_name" -LOC Attribute -block Attributes -element "&LOC" \ | |
-HOST "(NA)" -block Attribute -if Attribute@harmonized_name -equals "host" -HOST Attribute -block Attributes -element "&HOST" \ | |
-SOURCE "(NA)" -block Attribute -if Attribute@harmonized_name -equals "isolation_source" -SOURCE Attribute -block Attributes -element "&SOURCE" \ | |
-STRAIN "(NA)" -block Attribute -if Attribute@harmonized_name -equals "strain" -STRAIN Attribute -block Attributes -element "&STRAIN"''', stdin=esummary.stdout, text=True, shell=True) | |
outs.append(xtract) | |
print(xtract) | |
with open('metadata.tsv', 'w') as f: | |
for out in outs: | |
f.write("%s" % out) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment