Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avrilcoghlan/e52689b72c1d773ed73f6e479335a553 to your computer and use it in GitHub Desktop.
Save avrilcoghlan/e52689b72c1d773ed73f6e479335a553 to your computer and use it in GitHub Desktop.
Script to use the WormBase REST API to retrieve phenotypes (from RNAi, mutants) for an input list of C.elegans genes
import os
import sys
import requests # this is used to access json files
#====================================================================#
# use the wormbase REST API to retrieve the phenotypes (from mutants, RNAi) for a particular gene:
def retrieve_phenotypes_from_wormbase(gene):
server = "http://rest.wormbase.org"
ext = "/rest/field/gene/%s/phenotype" % gene
r = requests.get(server+ext, headers={ "Content-Type" : "application/json", "Accept" : ""})
if not r.ok:
r.raise_for_status()
sys.exit()
decoded = r.json()
# print(decoded)
# based on looking at the example http://rest.wormbase.org/rest/field/gene/WBGene00000079/phenotype
results = list() # a list of the phenotype info. results
phenotypes = decoded["phenotype"]
phenotypes = phenotypes["data"]
for phenotype in phenotypes:
phenotype = phenotype["phenotype"]
phenotype_id = phenotype["id"]
label = phenotype["label"]
result = (phenotype_id, label)
results.append(result)
return results
#====================================================================#
# read in the input list of C.elegans genes of interest:
def read_input_list_of_genes(input_genelist_file, output_file):
cnt = 0
# open the output file:
with open(output_file, 'w') as f:
# read in the list of C.elegans genes:
inputfileObj = open(input_genelist_file, "r")
for line in inputfileObj:
line = line.rstrip()
temp = line.split()
# 1 WBGene00000079
gene = temp[1] # e.g. WBGene00000079
cnt += 1
# get the phenotypes for this gene:
print(cnt,"Finding the phenotypes for gene",gene)
results = retrieve_phenotypes_from_wormbase(gene)
# write the phenotypes to the output file:
for result in results:
(phenotype_id, label) = result
output_line = "%s\t%s\t%s\n" % (gene, phenotype_id, label)
f.write(output_line)
#xxx what about if no phenotype?
inputfileObj.close()
#====================================================================#
def main():
# check the command-line arguments:
if len(sys.argv) != 3 or os.path.exists(sys.argv[1]) == False:
print("Usage: %s input_genelist_file output_file" % sys.argv[0])
sys.exit(1)
input_genelist_file = sys.argv[1] # input file with a list of C.elegans genes of interest
output_file = sys.argv[2]
# read in the input list of C.elegans genes of interest:
print("Reading in gene list...")
read_input_list_of_genes(input_genelist_file, output_file)
print("FINISHED\n")
#====================================================================#
if __name__=="__main__":
main()
#====================================================================#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment