Created
June 21, 2017 13:39
-
-
Save ptgolden/e35add577b0b018bb3b0bda883438009 to your computer and use it in GitHub Desktop.
Filter genes for whitelist on http://tintori.bio.unc.edu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/env python | |
import os | |
import json | |
import sys | |
import subprocess | |
whitelist_url = 'https://raw.githubusercontent.com/ptgolden/ma-browser/master/src/gene_whitelist.json' | |
if __name__ == '__main__': | |
if not os.path.exists('gene_whitelist.json'): | |
subprocess.call(['wget', '-nc', whitelist_url]) | |
with open('gene_whitelist.json', 'r') as fp: | |
whitelist = { gene for gene in json.load(fp) } | |
try: | |
csv_file = sys.argv[1] | |
except: | |
raise EnvironmentError('First argument must be a list of genes to filter') | |
genes = [ | |
gene for gene in open(csv_file, 'r') | |
if gene.split(',')[0] in whitelist | |
] | |
print(''.join(genes).strip()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment