Skip to content

Instantly share code, notes, and snippets.

@standage
Last active December 16, 2015 07:09
Show Gist options
  • Save standage/5396470 to your computer and use it in GitHub Desktop.
Save standage/5396470 to your computer and use it in GitHub Desktop.
Script for loading TSAs into xGDBvm
#!/usr/bin/env bash
# Copyright (c) 2013, Daniel S. Standage <daniel.standage@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# Usage statement
print_usage()
{
cat << EOF
Usage: xgdbvm-add-tsa.sh [options] tsa.fa tsa.gsq
Options:
-d MySQL database corresponding to the GDB; default is 'GDB001'
-h print this help message and exit
-l label for naming the MySQL tables to which TSA sequence and alignments
will be loaded; default is 'tsa', which creates tables 'tsa',
'gseg_tsa_good_pgs', 'gseg_tsa_good_pgs_exons', and
'gseg_tsa_good_pgs_introns'
-o output directory to which intermediate .sql files will be written;
default is current directory
-p MySQL password, if different from system default
-s directory containing xGDBvm scripts; default is '/xGDBvm/scripts'
-u MySQL username; default is 'gdbuser'
EOF
}
# Parse options
DB="GDB001"
LABEL="tsa"
OUTPATH="."
PASSWORD="xgdb"
SCRIPTDIR="/xGDBvm/scripts"
USERNAME="gdbuser"
while getopts "d:hl:o:p:s:u:" OPTION
do
case $OPTION in
d)
DB=$OPTARG
;;
h)
print_usage
exit 0
;;
l)
LABEL=$OPTARG
;;
o)
OUTPATH=$OPTARG
;;
p)
PASSWORD=$OPTARG
;;
s)
SCRIPTDIR=$OPTARG
;;
u)
USERNAME=$OPTARG
;;
esac
done
shift $((OPTIND-1))
if [[ $# != 2 ]]; then
echo -e "error: please provide 2 input files (sequence file in Fasta format and GeneSeqer alignment file)\n"
print_usage
exit 1
fi
FASTA=$1
GSQ=$2
if [ ! -r $FASTA ]; then
echo -e "error: sequence file $FASTA not readable\n"
exit 1
fi
if [ ! -r $GSQ ]; then
echo -e "error: alignment file $GSQ not readable\n"
exit 1
fi
# SQL filenames
TSASQL="$OUTPATH/${LABEL}.sql"
ALGNSQL="$OUTPATH/gseg_${LABEL}_good_pgs.sql"
# Parse TSA sequences
$SCRIPTDIR/xGDBload_SeqFromFasta.pl $LABEL $FASTA > $TSASQL
if [ ! -s $TSASQL ]; then
echo -e "error: error creating file '$TSASQL'"
exit 1
fi
# Parse TSA alignments
$SCRIPTDIR/xGDBload_PgsFromGSQ.pl -t "gseg_${LABEL}_good_pgs" \
$GSQ \
> $ALGNSQL
if [ ! -s $ALGNSQL ]; then
echo -e "error: error creating file '$ALGNSQL'"
exit 1
fi
# Load data into MySQL
read -d '' TABLESQL <<EOF
CREATE TABLE $LABEL
(
gi varchar(250) NOT NULL DEFAULT '',
acc varchar(32) NOT NULL DEFAULT '',
clone varchar(32) DEFAULT NULL,
locus varchar(32) DEFAULT NULL,
version tinyint(4) NOT NULL DEFAULT '0',
description text,
seq text NOT NULL,
type enum('F','T','U') DEFAULT 'U',
PRIMARY KEY (gi),
KEY est_accIND (acc),
KEY putINDclone (clone),
KEY putINDlocus (locus),
FULLTEXT KEY putFT_Desc (description)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
CREATE TABLE gseg_${LABEL}_good_pgs
(
uid int(10) unsigned NOT NULL AUTO_INCREMENT,
gi varchar(250) DEFAULT NULL,
E_O enum('+','-','?') NOT NULL DEFAULT '+',
sim float NOT NULL DEFAULT '0',
mlength int(10) unsigned NOT NULL DEFAULT '0',
cov float NOT NULL DEFAULT '0',
gseg_gi varchar(32) NOT NULL DEFAULT '',
G_O enum('+','-','?') NOT NULL DEFAULT '+',
l_pos int(10) unsigned NOT NULL DEFAULT '0',
r_pos int(10) unsigned NOT NULL DEFAULT '0',
pgs text NOT NULL,
pgs_lpos int(10) unsigned NOT NULL DEFAULT '0',
pgs_rpos int(10) unsigned NOT NULL DEFAULT '0',
gseg_gaps blob NOT NULL,
pgs_gaps blob NOT NULL,
isCognate enum('True','False') NOT NULL DEFAULT 'True',
pairUID varchar(50) NOT NULL DEFAULT '',
mergeNOTE text,
PRIMARY KEY (uid),
KEY gpiC (gseg_gi),
KEY giIND (gi),
KEY gputgpINDlpos (l_pos),
KEY gputgpINDrpos (r_pos)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
CREATE TABLE gseg_${LABEL}_good_pgs_exons
(
pgs_uid int(10) unsigned NOT NULL DEFAULT '0',
num int(10) unsigned NOT NULL DEFAULT '0',
gseg_start bigint(20) unsigned NOT NULL DEFAULT '0',
gseg_stop bigint(20) unsigned NOT NULL DEFAULT '0',
pgs_start bigint(20) unsigned NOT NULL DEFAULT '0',
pgs_stop bigint(20) unsigned NOT NULL DEFAULT '0',
score float NOT NULL DEFAULT '0',
KEY gputgpeINDpn (pgs_uid,num)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
CREATE TABLE gseg_${LABEL}_good_pgs_introns
(
pgs_uid int(10) unsigned NOT NULL DEFAULT '0',
num int(10) unsigned NOT NULL DEFAULT '0',
gseg_start bigint(20) unsigned NOT NULL DEFAULT '0',
gseg_stop bigint(20) unsigned NOT NULL DEFAULT '0',
Dscore float NOT NULL DEFAULT '0',
Dsim float NOT NULL DEFAULT '-1',
Ascore float NOT NULL DEFAULT '0',
Asim float NOT NULL DEFAULT '-1',
PRIMARY KEY (pgs_uid,num)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
EOF
# Create tables
echo "$TABLESQL" | mysql -u $USERNAME -p$PASSWORD $DB
# Populate TSA table
mysql -u $USERNAME -p$PASSWORD $DB < $TSASQL
TSACOUNT=$(echo "SELECT COUNT(*) AS 'Transcripts uploaded:' from $LABEL" | mysql -u $USERNAME -p$PASSWORD $DB)
echo $TSACOUNT
# Populate alignment tables
mysql -u $USERNAME -p$PASSWORD $DB < $ALGNSQL
ALGNCOUNT=$(echo "SELECT COUNT(*) AS 'TSA alignments uploaded:' from gseg_${LABEL}_good_pgs" | mysql -u $USERNAME -p$PASSWORD $DB)
echo $ALGNCOUNT
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment