Created
June 19, 2009 17:32
-
-
Save xwu/132744 to your computer and use it in GitHub Desktop.
Download necessary data for Trait-o-matic core
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sudo mkdir /var/trait | |
sudo wget http://hgdownload.cse.ucsc.edu/goldenPath/hg18/bigZips/hg18.2bit -O /var/trait/hg18.2bit | |
cd | |
# dbSNP (only two tables) | |
wget ftp://ftp.ncbi.nih.gov:21/snp/organisms/human_9606/database/organism_data/OmimVarLocusIdSNP.bcp.gz | |
wget ftp://ftp.ncbi.nih.gov:21/snp/organisms/human_9606/database/organism_data/b129/b129_SNPChrPosOnRef_36_3.bcp.gz | |
gunzip OmimVarLocusIdSNP.bcp.gz | |
gunzip b129_SNPChrPosOnRef_36_3.bcp.gz | |
# HapMap | |
wget -r -l1 --no-parent http://ftp.hapmap.org/frequencies/2009-02_phaseII+III/forward/non-redundant/ | |
rm -f ftp.hapmap.org/frequencies/2009-02_phaseII+III/forward/non-redundant/index.* | |
gunzip ftp.hapmap.org/frequencies/2009-02_phaseII+III/forward/non-redundant/allele_* | |
# -- load HapMap data immediately (this may take several hours in the background) | |
sudo python /usr/share/trait/hapmap_load_database.py ftp.hapmap.org/frequencies/2009-02_phaseII+III/forward/non-redundant/allele_* & | |
# morbidmap/OMIM | |
wget ftp://ftp.ncbi.nih.gov/repository/OMIM/morbidmap -O morbidmap.txt | |
# OMIM | |
wget ftp://ftp.ncbi.nih.gov/repository/OMIM/omim.txt.Z -O omim.txt.Z | |
gunzip omim.txt.Z | |
python /usr/share/trait/omim_print_variants.py omim.txt > omim.tsv | |
# refFlat/UCSC | |
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/refFlat.txt.gz -O refFlat.txt.gz | |
gunzip refFlat.txt.gz | |
# snp/UCSC | |
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/snp129.txt.gz -O snp129.txt.gz | |
gunzip snp129.txt.gz | |
# -- load these data immediately (note the use of password) | |
mysql -uupdater -pshakespeare -e "USE caliban; LOAD DATA LOCAL INFILE '~/snp129.txt' INTO TABLE snp129 FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n';" | |
# SNPedia (retrieval may take up to 30 min with no visual response) | |
python /usr/share/trait/snpedia.py > snpedia.txt | |
# -- clean up some descriptive text | |
sed -i'.bak' 's/ (None)//' snpedia.txt | |
awk 'BEGIN { FS = "\t" }; ($5 !~ /(^normal)|(^\?)/ || $5 ~ /;/)' snpedia.txt > snpedia.filtered.txt | |
python /usr/share/trait/snpedia_print_genotypes.py snpedia.filtered.txt > snpedia.tsv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment