Jason Stajich hyphaltip

## Bio::DB::Fasta and writing out rich format
# would be additional tests in t/LocalDB/DBFasta.t
# test out writing the Bio::PrimarySeq::Fasta objects with SeqIO

$db = Bio::DB::Fasta->new($test_dbdir, -reindex => 1);
my $out = Bio::SeqIO->new(-format => 'genbank');
# works
$primary_seq = Bio::Seq->new(-primary_seq => $db->get_Seq_by_acc('AW057119'));
# fails
#$primary_seq = $db->get_Seq_by_acc('AW057119');

## gist:202528
# concat multiple nexus formatted files into one nexus format file
# assuming sequence IDs are the same across all files
# and data is all aligned and all seqs are present in all files

use Bio::AlignIO;
use Bio::SimpleAlign;
use strict;
my %seqs;
for my $file ( @ARGV ) {
 my $in = Bio::AlignIO->new(-format=> 'nexus', -file => $file);

## GenPept retrieval
#!/usr/bin/perl -w
# Jason Stajich jason<at>bioperl.org
use strict;
use Bio::DB::GenPept;
use Bio::DB::GenBank;
use Bio::SeqIO;

#  get the FASTA formatted header for BLAST database
my $db = Bio::DB::GenBank->new(-format => 'fasta');
my $out = Bio::SeqIO->new(-format => 'fasta');

## README
the .blast is tblastn from BLAST+ run with -max_intron_length 300 and the text or -outfmt 6 output is shown.

the wublast output is from tblastn run with -links and hspsepsmax  - you can see the two group as an HSP group (hits 3 and 4 out of the set).

## Description of problem.
Hi all,

  Cluless newbie here (first time touching Perl 48 hours ago...), for which apologies.

I'm trying to take a genbank file (.gb), and create a FASTA file with a specific identifier line for each sequence. Specifically, I want the "host" tag as the identifier. With the help of the Bioperl beginner readme and the HOWTO's (which are great!) I've worked out how to loop through my sequences and get the 'host' tag for each one. For some reason, I get two identifier lines for each sequence. I guess the problem is in the 'for' loop--it's running the stuff below it twice, once with the actual 'host' tag data and once with...nothing? Not sure.
I think I can work out how to use s/ and a regex just to delete the second identifier line, but that feels like I'm avoiding the problem instead of fixing it.  Any help appreciated!

Many thanks,

haywardjeremya@gmail.com

## sequence_ORF_finding.pl
#!/usr/bin/perl
use warnings;
use strict;
my $seq ="AGACAAGTCGGACGTTTCATCTGAGGGTTCTTCTGCCTCCGCACTTGGTGCACATCAGACAAGGCAATCA
TGGGGGACGCTCAGATGGCAGAGTTTGGAGCAGCAGCTTCTTACCTGCGAAAGTCAGATCGAGAGCGTCT
GGAAGCACAAACCCGTCCCTTTGATATGAAAAAGGAGTGTTTTGTGCCTGATCCAGATGAAGAGTATGTA
AAAGCTTCAATCGTCAGTCGTGAAGGTGACAAAGTCACTGTACAGACTGAGAAAAGAAAGACTGTAACTG
TAAAGGAAGCTGACATTCACCCCCAGAACCCTCCAAAGTTTGATAAAATTGAAGACATGGCAATGTTCAC
CTTCCTTCATGAGCCAGCCGTGCTGTTCAACCTCAAAGAGCGCTATGCAGCATGGATGATCTATACCTAC
TCAGGACTGTTTTGTGTCACTGTCAACCCCTACAAGTGGCTGCCGGTGTACAATCAGGAGGTGGTTGTAG

## mRNASeq.pl
#!/usr/bin/perl
use warnings;
use strict;

my $seq ="AGACAAGTCGGACGTTTCATCTGAGGGTTCTTCTGCCTCCGCACTTGGTGCACATCAGACAAGGCAATCA
TGGGGGACGCTCAGATGGCAGAGTTTGGAGCAGCAGCTTCTTACCTGCGAAAGTCAGATCGAGAGCGTCT
GGAAGCACAAACCCGTCCCTTTGATATGAAAAAGGAGTGTTTTGTGCCTGATCCAGATGAAGAGTATGTA
AAAGCTTCAATCGTCAGTCGTGAAGGTGACAAAGTCACTGTACAGACTGAGAAAAGAAAGACTGTAACTG
TAAAGGAAGCTGACATTCACCCCCAGAACCCTCCAAAGTTTGATAAAATTGAAGACATGGCAATGTTCAC
CTTCCTTCATGAGCCAGCCGTGCTGTTCAACCTCAAAGAGCGCTATGCAGCATGGATGATCTATACCTAC

## problem1_1.pl
#!/usr/bin/perl -w
use strict;
use warnings;

use Bio::SeqIO;
use Bio::Seq;
use Bio::AlignIO;

my $sequence;
my $seq_obj;

## Week2 #5-7
#!/usr/bin/perl
use strict;
use warnings;

my @seqnames = ("AAC35278", "AnCSMA", "AfCHSF", "AAF19257", "P30573-1");
my @seqs = ("LLIAITYYNEDKVLTARTLHGVMQNPAWQKIVVCLVFDGIDPVLATIGV-VMKKDVDGKE","AMCLVTCYSEGEEGIRTTLDSIALTPN-SHKSIVVICDGIIKVLRMMRD-TGSKRHNMAK", "ALCLVTCYSEGEEGIRTTLDSIAMTPN$

for ( my $i = 0; $i <= 4 ; $i++) {
print "Sequence name is $seqnames[$i]\n";
my @residues = split('-',$seqs[$i]);

## barcode.pl
#!/bin/perl -w
#sort sequences into two files according to 5' barcode

use strict;
use warnings;
use Bio::SeqIO;


my $file = 'trimmed_seq.fa';
my $in = Bio::SeqIO->new(-format => 'Fasta',
	# would be additional tests in t/LocalDB/DBFasta.t
	# test out writing the Bio::PrimarySeq::Fasta objects with SeqIO

	$db = Bio::DB::Fasta->new($test_dbdir, -reindex => 1);
	my $out = Bio::SeqIO->new(-format => 'genbank');
	# works
	$primary_seq = Bio::Seq->new(-primary_seq => $db->get_Seq_by_acc('AW057119'));
	# fails
	#$primary_seq = $db->get_Seq_by_acc('AW057119');
	# concat multiple nexus formatted files into one nexus format file
	# assuming sequence IDs are the same across all files
	# and data is all aligned and all seqs are present in all files

	use Bio::AlignIO;
	use Bio::SimpleAlign;
	use strict;
	my %seqs;
	for my $file ( @ARGV ) {
	my $in = Bio::AlignIO->new(-format=> 'nexus', -file => $file);
	#!/usr/bin/perl -w
	# Jason Stajich jason<at>bioperl.org
	use strict;
	use Bio::DB::GenPept;
	use Bio::DB::GenBank;
	use Bio::SeqIO;

	# get the FASTA formatted header for BLAST database
	my $db = Bio::DB::GenBank->new(-format => 'fasta');
	my $out = Bio::SeqIO->new(-format => 'fasta');
	the .blast is tblastn from BLAST+ run with -max_intron_length 300 and the text or -outfmt 6 output is shown.

	the wublast output is from tblastn run with -links and hspsepsmax - you can see the two group as an HSP group (hits 3 and 4 out of the set).
	Hi all,

	Cluless newbie here (first time touching Perl 48 hours ago...), for which apologies.

	I'm trying to take a genbank file (.gb), and create a FASTA file with a specific identifier line for each sequence. Specifically, I want the "host" tag as the identifier. With the help of the Bioperl beginner readme and the HOWTO's (which are great!) I've worked out how to loop through my sequences and get the 'host' tag for each one. For some reason, I get two identifier lines for each sequence. I guess the problem is in the 'for' loop--it's running the stuff below it twice, once with the actual 'host' tag data and once with...nothing? Not sure.
	I think I can work out how to use s/ and a regex just to delete the second identifier line, but that feels like I'm avoiding the problem instead of fixing it. Any help appreciated!

	Many thanks,

	haywardjeremya@gmail.com
	#!/usr/bin/perl
	use warnings;
	use strict;
	my $seq ="AGACAAGTCGGACGTTTCATCTGAGGGTTCTTCTGCCTCCGCACTTGGTGCACATCAGACAAGGCAATCA
	TGGGGGACGCTCAGATGGCAGAGTTTGGAGCAGCAGCTTCTTACCTGCGAAAGTCAGATCGAGAGCGTCT
	GGAAGCACAAACCCGTCCCTTTGATATGAAAAAGGAGTGTTTTGTGCCTGATCCAGATGAAGAGTATGTA
	AAAGCTTCAATCGTCAGTCGTGAAGGTGACAAAGTCACTGTACAGACTGAGAAAAGAAAGACTGTAACTG
	TAAAGGAAGCTGACATTCACCCCCAGAACCCTCCAAAGTTTGATAAAATTGAAGACATGGCAATGTTCAC
	CTTCCTTCATGAGCCAGCCGTGCTGTTCAACCTCAAAGAGCGCTATGCAGCATGGATGATCTATACCTAC
	TCAGGACTGTTTTGTGTCACTGTCAACCCCTACAAGTGGCTGCCGGTGTACAATCAGGAGGTGGTTGTAG
	#!/bin/perl -w
	#sort sequences into two files according to 5' barcode

	use strict;
	use warnings;
	use Bio::SeqIO;


	my $file = 'trimmed_seq.fa';
	my $in = Bio::SeqIO->new(-format => 'Fasta',