Last active
December 16, 2015 07:09
-
-
Save standage/5396470 to your computer and use it in GitHub Desktop.
Script for loading TSAs into xGDBvm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright (c) 2013, Daniel S. Standage <daniel.standage@gmail.com> | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
# Usage statement | |
print_usage() | |
{ | |
cat << EOF | |
Usage: xgdbvm-add-tsa.sh [options] tsa.fa tsa.gsq | |
Options: | |
-d MySQL database corresponding to the GDB; default is 'GDB001' | |
-h print this help message and exit | |
-l label for naming the MySQL tables to which TSA sequence and alignments | |
will be loaded; default is 'tsa', which creates tables 'tsa', | |
'gseg_tsa_good_pgs', 'gseg_tsa_good_pgs_exons', and | |
'gseg_tsa_good_pgs_introns' | |
-o output directory to which intermediate .sql files will be written; | |
default is current directory | |
-p MySQL password, if different from system default | |
-s directory containing xGDBvm scripts; default is '/xGDBvm/scripts' | |
-u MySQL username; default is 'gdbuser' | |
EOF | |
} | |
# Parse options | |
DB="GDB001" | |
LABEL="tsa" | |
OUTPATH="." | |
PASSWORD="xgdb" | |
SCRIPTDIR="/xGDBvm/scripts" | |
USERNAME="gdbuser" | |
while getopts "d:hl:o:p:s:u:" OPTION | |
do | |
case $OPTION in | |
d) | |
DB=$OPTARG | |
;; | |
h) | |
print_usage | |
exit 0 | |
;; | |
l) | |
LABEL=$OPTARG | |
;; | |
o) | |
OUTPATH=$OPTARG | |
;; | |
p) | |
PASSWORD=$OPTARG | |
;; | |
s) | |
SCRIPTDIR=$OPTARG | |
;; | |
u) | |
USERNAME=$OPTARG | |
;; | |
esac | |
done | |
shift $((OPTIND-1)) | |
if [[ $# != 2 ]]; then | |
echo -e "error: please provide 2 input files (sequence file in Fasta format and GeneSeqer alignment file)\n" | |
print_usage | |
exit 1 | |
fi | |
FASTA=$1 | |
GSQ=$2 | |
if [ ! -r $FASTA ]; then | |
echo -e "error: sequence file $FASTA not readable\n" | |
exit 1 | |
fi | |
if [ ! -r $GSQ ]; then | |
echo -e "error: alignment file $GSQ not readable\n" | |
exit 1 | |
fi | |
# SQL filenames | |
TSASQL="$OUTPATH/${LABEL}.sql" | |
ALGNSQL="$OUTPATH/gseg_${LABEL}_good_pgs.sql" | |
# Parse TSA sequences | |
$SCRIPTDIR/xGDBload_SeqFromFasta.pl $LABEL $FASTA > $TSASQL | |
if [ ! -s $TSASQL ]; then | |
echo -e "error: error creating file '$TSASQL'" | |
exit 1 | |
fi | |
# Parse TSA alignments | |
$SCRIPTDIR/xGDBload_PgsFromGSQ.pl -t "gseg_${LABEL}_good_pgs" \ | |
$GSQ \ | |
> $ALGNSQL | |
if [ ! -s $ALGNSQL ]; then | |
echo -e "error: error creating file '$ALGNSQL'" | |
exit 1 | |
fi | |
# Load data into MySQL | |
read -d '' TABLESQL <<EOF | |
CREATE TABLE $LABEL | |
( | |
gi varchar(250) NOT NULL DEFAULT '', | |
acc varchar(32) NOT NULL DEFAULT '', | |
clone varchar(32) DEFAULT NULL, | |
locus varchar(32) DEFAULT NULL, | |
version tinyint(4) NOT NULL DEFAULT '0', | |
description text, | |
seq text NOT NULL, | |
type enum('F','T','U') DEFAULT 'U', | |
PRIMARY KEY (gi), | |
KEY est_accIND (acc), | |
KEY putINDclone (clone), | |
KEY putINDlocus (locus), | |
FULLTEXT KEY putFT_Desc (description) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
CREATE TABLE gseg_${LABEL}_good_pgs | |
( | |
uid int(10) unsigned NOT NULL AUTO_INCREMENT, | |
gi varchar(250) DEFAULT NULL, | |
E_O enum('+','-','?') NOT NULL DEFAULT '+', | |
sim float NOT NULL DEFAULT '0', | |
mlength int(10) unsigned NOT NULL DEFAULT '0', | |
cov float NOT NULL DEFAULT '0', | |
gseg_gi varchar(32) NOT NULL DEFAULT '', | |
G_O enum('+','-','?') NOT NULL DEFAULT '+', | |
l_pos int(10) unsigned NOT NULL DEFAULT '0', | |
r_pos int(10) unsigned NOT NULL DEFAULT '0', | |
pgs text NOT NULL, | |
pgs_lpos int(10) unsigned NOT NULL DEFAULT '0', | |
pgs_rpos int(10) unsigned NOT NULL DEFAULT '0', | |
gseg_gaps blob NOT NULL, | |
pgs_gaps blob NOT NULL, | |
isCognate enum('True','False') NOT NULL DEFAULT 'True', | |
pairUID varchar(50) NOT NULL DEFAULT '', | |
mergeNOTE text, | |
PRIMARY KEY (uid), | |
KEY gpiC (gseg_gi), | |
KEY giIND (gi), | |
KEY gputgpINDlpos (l_pos), | |
KEY gputgpINDrpos (r_pos) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
CREATE TABLE gseg_${LABEL}_good_pgs_exons | |
( | |
pgs_uid int(10) unsigned NOT NULL DEFAULT '0', | |
num int(10) unsigned NOT NULL DEFAULT '0', | |
gseg_start bigint(20) unsigned NOT NULL DEFAULT '0', | |
gseg_stop bigint(20) unsigned NOT NULL DEFAULT '0', | |
pgs_start bigint(20) unsigned NOT NULL DEFAULT '0', | |
pgs_stop bigint(20) unsigned NOT NULL DEFAULT '0', | |
score float NOT NULL DEFAULT '0', | |
KEY gputgpeINDpn (pgs_uid,num) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
CREATE TABLE gseg_${LABEL}_good_pgs_introns | |
( | |
pgs_uid int(10) unsigned NOT NULL DEFAULT '0', | |
num int(10) unsigned NOT NULL DEFAULT '0', | |
gseg_start bigint(20) unsigned NOT NULL DEFAULT '0', | |
gseg_stop bigint(20) unsigned NOT NULL DEFAULT '0', | |
Dscore float NOT NULL DEFAULT '0', | |
Dsim float NOT NULL DEFAULT '-1', | |
Ascore float NOT NULL DEFAULT '0', | |
Asim float NOT NULL DEFAULT '-1', | |
PRIMARY KEY (pgs_uid,num) | |
) ENGINE=MyISAM DEFAULT CHARSET=latin1; | |
EOF | |
# Create tables | |
echo "$TABLESQL" | mysql -u $USERNAME -p$PASSWORD $DB | |
# Populate TSA table | |
mysql -u $USERNAME -p$PASSWORD $DB < $TSASQL | |
TSACOUNT=$(echo "SELECT COUNT(*) AS 'Transcripts uploaded:' from $LABEL" | mysql -u $USERNAME -p$PASSWORD $DB) | |
echo $TSACOUNT | |
# Populate alignment tables | |
mysql -u $USERNAME -p$PASSWORD $DB < $ALGNSQL | |
ALGNCOUNT=$(echo "SELECT COUNT(*) AS 'TSA alignments uploaded:' from gseg_${LABEL}_good_pgs" | mysql -u $USERNAME -p$PASSWORD $DB) | |
echo $ALGNCOUNT |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment