Created
December 12, 2022 14:50
-
-
Save neftlon/8b5b7b65e4f62b2c5fa1c2d0deafce9e to your computer and use it in GitHub Desktop.
extract Neff scores from mmseqs2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
# This is a short script that uses a NON-STANDARD version of mmseqs2 | |
# to extract Neff scores from a profile. | |
# | |
# Please set the MMSEQS and REFORMAT enviroment variables to point to the | |
# respective executables! Otherwise, this script will try to use default | |
# parameters and fail ungracefully. | |
# | |
# This script takes MSAs in .a3m format as input and produces a file | |
# containing the Neff scores. The following programs must be available | |
# to make this script work: | |
# * mmseqs: At the point of writing, I am not aware of a way of | |
# extracting Neff scores from an MSA using plain mmseqs. Therefore I | |
# extended mmseqs such that there is an additional command called | |
# `profile2neff` which is capable of outputing the `neffM` fieled of a | |
# `Sequence`. | |
# * reformat.pl: AFAIK, mmseqs's preferred format for MSAs is .sto. The | |
# `reformat.pl` utility script written by Johannes Soeding, available | |
# as part of hh-suite is capable of transforming .a3m MSAs to .sto. | |
USAGE="usage: $0 <i:infile.a3m> <o:outneffs>\n\nExtract Neff scores from an MSA in .a3m format." | |
MMSEQS=${MMSEQS:=mmseqs} | |
REFORMAT=${REFORMAT:=reformat.pl} | |
if [ "$#" -ne 2 ]; then | |
echo "invalid number of arguments: $#" | |
echo -e $USAGE | |
exit 2 | |
fi | |
if ! command -v $MMSEQS &> /dev/null; then | |
echo "unable to find a mmseqs executable. | |
please provide a path to this executable by specifying the path in the MMSEQS enviroment variable." | |
exit | |
fi | |
if ! command -v $REFORMAT &> /dev/null; then | |
echo "unable to find the reformat.pl script. | |
please provide a path to this executable by specifying the path in the REFORMAT enviroment variable." | |
exit | |
fi | |
WORKDIR=${WORKDIR:="/tmp"} | |
# conversion .a3m -> .sto | |
$REFORMAT a3m sto $1 ${WORKDIR}/interm.sto | |
# create an MSA database | |
$MMSEQS convertmsa ${WORKDIR}/interm.sto ${WORKDIR}/msaDb | |
# generate profile from MSA | |
$MMSEQS msa2profile ${WORKDIR}/msaDb ${WORKDIR}/profileDb | |
# extract Neff scores using custom command | |
$MMSEQS profile2neff ${WORKDIR}/profileDb $2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment