Created
November 27, 2012 05:45
-
-
Save ChemicalJames/4152580 to your computer and use it in GitHub Desktop.
HW7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/perl -w | |
use strict; | |
use warnings; | |
use Bio::SeqIO; | |
my $seqio = Bio::SeqIO->new(-format=>'fasta',-file=>'ath_sRNA1.fa'); | |
my $adapter='CTGTAGGCACCATCAAT'; | |
open OUT,'>trimmed_seq.fa' or die "Cannot write to a file\n"; | |
my ($i,$j,$k) = 0; | |
while (my $seq = $seqio->next_seq) | |
{ | |
my $adapter_6nt = substr($adapter,0,6); | |
if ($seq->seq =~ "$adapter_6nt") | |
{ | |
my $pos = index($seq->seq, $adapter_6nt, -1); | |
my $str = substr($seq->seq, 0, $pos); | |
if($str =~ "N"||length($str)<18) #remove sequence less than 18nt | |
{ | |
$i++; #count sequences greater than 18nt | |
} | |
else | |
{ | |
print OUT ">",$seq->id,"\n"; | |
print OUT "$str\n"; | |
} | |
$j++; #count total number of sequences | |
} | |
$k++; #count number of sequences with the adapter | |
} | |
print "The total number of sequences in the file is $j.\nThe number of sequences with the 3'end adapter is $k.\nThe number of trimmed sequences greater than 18nt is $i.\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/perl -w | |
use warnings; | |
use strict; | |
use Bio::SeqIO; | |
my $seq; | |
my $seqn; | |
my $id; | |
my $id_strt; | |
my $rptm; | |
open OUT, '>normalized.txt' or die "Cannot write to a file\n"; | |
my $file = 'trimmed_seq.fa'; | |
my $file2 = 'ath_miRNA.fa'; | |
my $in = Bio::SeqIO->new(-format => 'fasta', | |
-file => $file); | |
my %unique = (); | |
while ( my $seq_obj = $in->next_seq) { | |
$seq= $seq_obj->seq; | |
if (exists $unique{$seq}) | |
{ | |
$unique{$seq} = $unique{$seq}+1; | |
} | |
else | |
{ | |
$unique{$seq}=1; | |
} | |
} | |
for my $name (sort keys %unique) { | |
} | |
my $in2 = Bio::SeqIO->new(-format => 'fasta', | |
-file => $file2); | |
while ( my $seq_objct = $in2->next_seq) { | |
$seqn= $seq_objct->seq; | |
$id= $seq_objct->display_id; | |
$id_strt= substr ($id, 0, 3); | |
for my $name (sort keys %unique) { | |
$seqn =~ tr/U/T/; | |
if (($name eq $seqn) && ($id_strt eq "ath")) | |
{ | |
$rptm = (($unique{$name})*10000000)/4979605; | |
print OUT "ID" , "\t", "Norm_exp","\n"; | |
print OUT $id, "\t", $rptm, "\n"; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/perl -w | |
#I copied parts of this script from a Perl website. | |
use warnings; | |
use strict; | |
use Bio::SeqIO; | |
use Statistics::Descriptive; | |
my$stat = Statistics::Descriptive::Full->new(); | |
my (%distrib); | |
my @bins = qw/18 19 20 21 22 23 24 25 26 27 28/; | |
my $fastaFile = 'trimmed_seq.fa'; | |
open (FASTA, "<$fastaFile"); | |
$/ = ">"; | |
my $junkFirstOne = <FASTA>; | |
while (<FASTA>) { | |
chomp; | |
my ($def,@seqlines) = split /\n/, $_; | |
my $seq = join '', @seqlines; | |
$stat->add_data(length($seq)); | |
} | |
%distrib = $stat->frequency_distribution(\@bins); | |
print "Total reads:\t" . $stat->count() . "\n"; | |
print "Total nt:\t" . $stat->sum() . "\n"; | |
print "Mean length:\t" . $stat->mean() . "\n"; | |
print "Median length:\t" . $stat->median() . "\n"; | |
print "Mode length:\t" . $stat->mode() . "\n"; | |
print "Max length:\t" . $stat->max() . "\n"; | |
print "Min length:\t" . $stat->min() . "\n"; | |
print "Length\t# Seqs\n"; | |
foreach (sort {$a <=> $b} keys %distrib) { | |
print "$_\t$distrib{$_}\n"; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/perl -w | |
use warnings; | |
use strict; | |
use Bio::SeqIO; | |
my $file = 'collapse_seq.fa'; | |
my $seq_in = Bio::SeqIO->new( -format => 'fasta', -file => $file); | |
while( my $seq1 = $seq_in->next_seq) { | |
my $seq = $seq1->seq; | |
my $unique = 0; $unique = $seq->length(); $unique++; | |
} | |
print "$The number of unique sequences is $unique.\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment