Skip to content

Instantly share code, notes, and snippets.

@cybersiddhu
Created March 12, 2012 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cybersiddhu/2023749 to your computer and use it in GitHub Desktop.
Save cybersiddhu/2023749 to your computer and use it in GitHub Desktop.
Fasta header of dictyBase proteins in NCBI format
#!/usr/bin/perl -w
use strict;
use Pod::Usage;
use Getopt::Long;
use Bio::SeqIO;
use Bio::PrimarySeq;
use IO::File;
my $mapfile;
GetOptions(
'h|help' => sub { pod2usage(1); },
'm|map=s' => \$mapfile
);
die "no map file given\n" if !$mapfile;
die "no fasta file given\n" if !$ARGV[0];
my $input = IO::File->new( $mapfile, 'r' ) or die "cannot open file:$!";
my %id2product;
while ( my $line = $input->getline ) {
chomp $line;
my @data = split /\t/, $line;
$id2product{ $data[0] } = $data[1];
}
$input->close;
my $seqio = Bio::SeqIO->new( -format => 'fasta', -file => $ARGV[0] );
my $seqout = Bio::SeqIO->new(
-format => 'fasta',
-file => '>discoideum_polypeptide.fa'
);
while ( my $seq = $seqio->next_seq ) {
my $seqid = $seq->id;
my ( $protein_id, $gene_id );
if ( $seqid =~ /\|/ ) {
( $protein_id, $gene_id ) = split /\|/, $seqid;
}
else {
$protein_id = $seqid;
my @data = split /\|/, $seq->desc;
$gene_id = $data[0];
}
my $primary_seq;
if ( not defined $id2product{$gene_id} ) {
$primary_seq = Bio::PrimarySeq->new(
-seq => $seq->seq,
-id => 'gnl|dictyBase|'.$protein_id,
-alphabet => 'protein'
);
}
else {
$primary_seq = Bio::PrimarySeq->new(
-seq => $seq->seq,
-id => 'gnl|dictyBase|'.$protein_id.'|',
-description => $id2product{$gene_id},
-alphabet => 'protein'
);
}
$seqout->write_seq($primary_seq);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment