Skip to content

Instantly share code, notes, and snippets.

@smsaladi
Created April 13, 2019 19:31
Show Gist options
  • Save smsaladi/0a8d298dfd7baa9d0cc1171695592db7 to your computer and use it in GitHub Desktop.
Save smsaladi/0a8d298dfd7baa9d0cc1171695592db7 to your computer and use it in GitHub Desktop.
Prepare domain databases for HH-suite
#!/bin/bash
# prep for NCBI CDD
# Version is in `cdd.info`
# wget ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/cdd.info
# 3.17 at the time of writing
# mkdir cdd
# wget ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/fasta.tar.gz
cd cdd
mkdir fas
# remove overlapping datasets
rm fas/pfam* fas/cdd*
ls fas/* | parallel "mv {} {.}"
mkdir hhsuite
cd hhsuite
ffindex_build -s cdd_fas.ff{data,index} ../fas/
ffindex_apply cdd_fas.ff{data,index} \
-i cdd_hhm.ffindex -d cdd_hhm.ffdata -- \
hhmake -add_cons -i stdin -o stdout -v 0 > build_hhm.txt &
cstranslate -x 0.3 -c 4 -b -I fas -f -i cdd_fas -o cdd_cs219 > build_cstranslate.txt
sort -k3 -n cdd_cs219.ffindex | cut -f1 > sorting.dat
ffindex_order sorting.dat cdd_hhm.ff{data,index} cdd_hhm_ordered.ff{data,index}
mv cdd_hhm_ordered.ffindex cdd_hhm.ffindex
mv cdd_hhm_ordered.ffdata cdd_hhm.ffdata
ffindex_order sorting.dat cdd_fas.ff{data,index} cdd_fas_ordered.ff{data,index}
mv cdd_fas_ordered.ffindex cdd_fas.ffindex
mv cdd_fas_ordered.ffdata cdd_fas.ffdata
# cd ..
# mv hhsuite cdd3_17
# tar cf - cdd3_17 | pigz -9 -p 40 > cdd3_17_hhsuite.tar.gz
# mv cdd3_17 cdd3_17_hhsuite.tar.gz ~/dbs/hhsuite
#!/bin/bash
# prep for hhsuite
# wget ftp://ftp.pantherdb.org/panther_library/current_release/PANTHER14.1_hmmscoring.tgz
# tar -I pigz -xf PANTHER14.1_hmmscoring.tgz
cd hmmscoring
cd PANTHER14.1
mkdir aln
ls books | parallel "ln -s ../books/{}/cluster.pir aln/{}"
mkdir hhsuite
cd hhsuite
ffindex_build -s panther_a2m.ff{data,index} ../aln/
ffindex_apply panther_a2m.ff{data,index} \
-i panther_hhm.ffindex -d panther_hhm.ffdata -- \
hhmake -add_cons -i stdin -o stdout -v 0 > build_hhm.txt &
cstranslate -x 0.3 -c 4 -b -I a2m -f -i panther_a2m -o panther_cs219 > build_cstranslate.txt
sort -k3 -n panther_cs219.ffindex | cut -f1 > sorting.dat
mv panther_a2m_ordered.ffindex panther_a2m.ffindex
mv panther_a2m_ordered.ffdata panther_a2m.ffdata
ffindex_order sorting.dat panther_hhm.ff{data,index} panther_hhm_ordered.ff{data,index}
mv panther_hhm_ordered.ffindex panther_hhm.ffindex
mv panther_hhm_ordered.ffdata panther_hhm.ffdata
ffindex_order sorting.dat panther_a2m.ff{data,index} panther_a2m_ordered.ff{data,index}
mv panther_a2m_ordered.ffindex panther_a2m.ffindex
mv panther_a2m_ordered.ffdata panther_a2m.ffdata
# mv hhsuite panther14_1
# tar cf - panther14_1 | pigz -9 -p 40 > panther14_1_hhsuite.tar.gz
# mv panther14_1 panther14_1_hhsuite.tar.gz ~/dbs/hhsuite
#!/bin/bash
# prep for smart
# download through EMBLEM Tech Transfer Website
# https://software.embl-em.de/software/18
# tar -I pigz -xf 574f0475e39ff_smart_01_06_2016.tar.gz
cd smart_01_06_2016
mkdir fas
ls aln | parallel "reformat.pl clu fas aln/{} fas/{.}" > reformat.out 2> retry.out
sed -i '/^$/d' retry.out
# check for unrelated errors
cat retry.out | grep -v '>'
cat retry.out | cut -f7 -d' ' | tr -d ',' | parallel "cp {} fas/{/.}"
mkdir hhsuite
cd hhsuite
ffindex_build -s smart_a2m.ff{data,index} ../fas/
ffindex_apply smart_a2m.ff{data,index} \
-i smart_hhm.ffindex -d smart_hhm.ffdata -- \
hhmake -add_cons -i stdin -o stdout -v 0 > build_hhm.txt &
cstranslate -x 0.3 -c 4 -b -I a2m -f -i smart_a2m -o smart_cs219 > build_cstranslate.txt
sort -k3 -n smart_cs219.ffindex | cut -f1 > sorting.dat
ffindex_order sorting.dat smart_hhm.ff{data,index} smart_hhm_ordered.ff{data,index}
mv smart_hhm_ordered.ffindex smart_hhm.ffindex
mv smart_hhm_ordered.ffdata smart_hhm.ffdata
ffindex_order sorting.dat smart_a2m.ff{data,index} smart_a2m_ordered.ff{data,index}
mv smart_a2m_ordered.ffindex smart_a2m.ffindex
mv smart_a2m_ordered.ffdata smart_a2m.ffdata
# cd ..
# mv hhsuite smart_01_06_2016
# tar cf - smart_01_06_2016 | pigz -9 -p 40 > smart_01_06_2016_hhsuite.tar.gz
# mv smart_01_06_2016 smart_01_06_2016_hhsuite.tar.gz ~/dbs/hhsuite
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment