Skip to content

Instantly share code, notes, and snippets.

View hyphaltip's full-sized avatar

Jason Stajich hyphaltip

View GitHub Profile
@hyphaltip
hyphaltip / parser_hmmer3_domtbl.pl
Created April 8, 2014 05:04
HMMER3 simple parsing
#Here is code for parsing
use strict;
use warnings;
my $hmmertable = shift @ARGV;
open(HMMERTABLE, $hmmertable) || die "$hmmertable: $!";
while(<HMMERTABLE>){
chomp;
next if /^\#/ || /^\s+$/;
my ($domain,$domacc,$tlen,$qname,$qacc,$qlen, $fullevalue,$fullscore,$fullbias,
@hyphaltip
hyphaltip / get_nearest_tree_neighbor.pl
Last active August 29, 2015 14:01
Get Nearest neighbor on tree
#!/usr/bin/perl
use strict;
use warnings;
use Bio::TreeIO;
my ($treefile,$taxon_name) = @ARGV;
my $in = Bio::TreeIO->new(-format => 'newick', -file => $treefile);
my $tree = $in->next_tree;
if( ! $tree ) {
die "cannot parse treefile $treefile and find a valid tree";
@hyphaltip
hyphaltip / get_isolate_info.pl
Last active August 29, 2015 14:06
Extract isolate metadata from accession names
#!env perl
use strict;
use warnings;
use Bio::DB::GenBank;
use Bio::SeqIO;
use Getopt::Long;
# remote retrieval of sequences from GenBank
my $db = Bio::DB::GenBank->new;
@hyphaltip
hyphaltip / cegma_format.pl
Last active August 29, 2015 14:07
process some sequences for CEGMA processing
#!env perl
use strict;
use warnings;
my $dir = shift || "marker_files";
my $odir = shift || "marker_hmm";
mkdir($odir) unless -d $odir;
opendir(DIR,$dir)|| die "cannot open $dir: $!";
my $locusct =1;
for my $file ( readdir(DIR) ) {
@hyphaltip
hyphaltip / vividMutant
Created December 17, 2014 06:26
vividMutant
>vividMutant
ATGAGCCATACCGTGAACTCGAGCACCATGAACCCATGGGAGGTTGAGGC
GTAACAGCAATACCACTATGACCCTCGAACCGCGCCCACGGCCAACCCTC
TCTTCTTCCATACGCTCTACGCTCCCGGCGGTTATGACATTATGGGCTAT
CTGATTCAGATTATGAACAGGCCAAACCCCCAAGTAGAACTGGGACCTGT
TGACACGTCATGCGCTCTGATTCTGTGCGACCTGAAGCAAAAAGACACGC
CAATTGTGTACGCCTCGGAAGCTTTTCTCTATATGACAGGATACAGCAAT
GCGGAGGTCTTGGGGAGAAACTGCCGTTTTCTTCAGTCACCCGACGGAAT
GGTCAAGCCGAAATCGACAAGGAAGTACGTCGACTCCAACACGATCAATA
CGATGAGGAAAGCGATTGATAGGAACGCCGAGGTGCAGGTTGAGGTGGTC
@hyphaltip
hyphaltip / Nc20H.expr.tab
Last active August 29, 2015 14:17
UNIX simple commands and sorting
gene_id bundle_id chr left right FPKM FPKM_conf_lo FPKM_conf_hi status
NCU10129 18585 supercont10.1 1166 2603 50.9314 36.6581 65.2046 OK
NCU09901 18586 supercont10.1 3197 4838 15.5736 7.68094 23.4663 OK
NCU11134 18588 supercont10.1 15929 16647 0 0 0 OK
NCU09904 18589 supercont10.1 17889 19368 67.7417 51.2807 84.2028 OK
NCU09903 18587 supercont10.1 9603 13334 31.5378 20.3061 42.7695 OK
NCU09908 18592 supercont10.1 43708 44551 0 0 0 OK
NCU09907 18591 supercont10.1 40949 42632 0 0 0 OK
NCU09906 18590 supercont10.1 35915 39245 139.627 115.994 163.26 OK
NCU09910 18594 supercont10.1 49271 51866 22.6875 13.1612 32.2138 OK
This file has been truncated, but you can view the full file.
>MLOC_42952.1 cdna:novel contig:220312v1:morex_contig_2669042:2:251:-1 gene:MLOC_42952 transcript:MLOC_42952.1
GAGCTACTCGGGACTCCGCCGCCGCAGTCTTTCTCGATGGCCTTCTGTTTTTTCATACCC
TTGACCTTGAATATCATGATTGCATAGCTGCGGTGGACACCAAGGGAGAGACNNGCCTGA
TATTCAGGGTCCCTGGTGGTCAGGTTTATGATTTTGGCCAGGCTGATGACTTTGTTCAAC
ACGTCTTATTCAAGGTTCCTGGTGGTCGGTTTCATTATTTTGGTGTGGCTCATGGTTAAG
GTAAAGGAAG
>MLOC_42947.1 cdna:novel contig:220312v1:morex_contig_2668822:118:275:1 gene:MLOC_42947 transcript:MLOC_42947.1
CCCTTGGGCTCGCCGGGTTTCAGCACCGACCGGGTGCCTCCCAACACCACCACCAGCCGC
GGCGCCACCGACCCATCCTCCTACTCGGACGACGATGGCGAGGCGGAGGTCGACCCCAAT
GTGCACCCCGAGGACGACGGCACCACCGTTATCCTCGA
# would be additional tests in t/LocalDB/DBFasta.t
# test out writing the Bio::PrimarySeq::Fasta objects with SeqIO
$db = Bio::DB::Fasta->new($test_dbdir, -reindex => 1);
my $out = Bio::SeqIO->new(-format => 'genbank');
# works
$primary_seq = Bio::Seq->new(-primary_seq => $db->get_Seq_by_acc('AW057119'));
# fails
#$primary_seq = $db->get_Seq_by_acc('AW057119');
# concat multiple nexus formatted files into one nexus format file
# assuming sequence IDs are the same across all files
# and data is all aligned and all seqs are present in all files
use Bio::AlignIO;
use Bio::SimpleAlign;
use strict;
my %seqs;
for my $file ( @ARGV ) {
my $in = Bio::AlignIO->new(-format=> 'nexus', -file => $file);
@hyphaltip
hyphaltip / GenPept retrieval
Created November 30, 2010 02:11
Get the full FASTA header or Full GenPept record for retrieval
#!/usr/bin/perl -w
# Jason Stajich jason<at>bioperl.org
use strict;
use Bio::DB::GenPept;
use Bio::DB::GenBank;
use Bio::SeqIO;
# get the FASTA formatted header for BLAST database
my $db = Bio::DB::GenBank->new(-format => 'fasta');
my $out = Bio::SeqIO->new(-format => 'fasta');