Skip to content

Instantly share code, notes, and snippets.

@ChemicalJames
Created November 27, 2012 05:45
Show Gist options
  • Save ChemicalJames/4152580 to your computer and use it in GitHub Desktop.
Save ChemicalJames/4152580 to your computer and use it in GitHub Desktop.
HW7
#!/bin/perl -w
use strict;
use warnings;
use Bio::SeqIO;
my $seqio = Bio::SeqIO->new(-format=>'fasta',-file=>'ath_sRNA1.fa');
my $adapter='CTGTAGGCACCATCAAT';
open OUT,'>trimmed_seq.fa' or die "Cannot write to a file\n";
my ($i,$j,$k) = 0;
while (my $seq = $seqio->next_seq)
{
my $adapter_6nt = substr($adapter,0,6);
if ($seq->seq =~ "$adapter_6nt")
{
my $pos = index($seq->seq, $adapter_6nt, -1);
my $str = substr($seq->seq, 0, $pos);
if($str =~ "N"||length($str)<18) #remove sequence less than 18nt
{
$i++; #count sequences greater than 18nt
}
else
{
print OUT ">",$seq->id,"\n";
print OUT "$str\n";
}
$j++; #count total number of sequences
}
$k++; #count number of sequences with the adapter
}
print "The total number of sequences in the file is $j.\nThe number of sequences with the 3'end adapter is $k.\nThe number of trimmed sequences greater than 18nt is $i.\n";
#!/bin/perl -w
use warnings;
use strict;
use Bio::SeqIO;
my $seq;
my $seqn;
my $id;
my $id_strt;
my $rptm;
open OUT, '>normalized.txt' or die "Cannot write to a file\n";
my $file = 'trimmed_seq.fa';
my $file2 = 'ath_miRNA.fa';
my $in = Bio::SeqIO->new(-format => 'fasta',
-file => $file);
my %unique = ();
while ( my $seq_obj = $in->next_seq) {
$seq= $seq_obj->seq;
if (exists $unique{$seq})
{
$unique{$seq} = $unique{$seq}+1;
}
else
{
$unique{$seq}=1;
}
}
for my $name (sort keys %unique) {
}
my $in2 = Bio::SeqIO->new(-format => 'fasta',
-file => $file2);
while ( my $seq_objct = $in2->next_seq) {
$seqn= $seq_objct->seq;
$id= $seq_objct->display_id;
$id_strt= substr ($id, 0, 3);
for my $name (sort keys %unique) {
$seqn =~ tr/U/T/;
if (($name eq $seqn) && ($id_strt eq "ath"))
{
$rptm = (($unique{$name})*10000000)/4979605;
print OUT "ID" , "\t", "Norm_exp","\n";
print OUT $id, "\t", $rptm, "\n";
}
}
}
#!/bin/perl -w
#I copied parts of this script from a Perl website.
use warnings;
use strict;
use Bio::SeqIO;
use Statistics::Descriptive;
my$stat = Statistics::Descriptive::Full->new();
my (%distrib);
my @bins = qw/18 19 20 21 22 23 24 25 26 27 28/;
my $fastaFile = 'trimmed_seq.fa';
open (FASTA, "<$fastaFile");
$/ = ">";
my $junkFirstOne = <FASTA>;
while (<FASTA>) {
chomp;
my ($def,@seqlines) = split /\n/, $_;
my $seq = join '', @seqlines;
$stat->add_data(length($seq));
}
%distrib = $stat->frequency_distribution(\@bins);
print "Total reads:\t" . $stat->count() . "\n";
print "Total nt:\t" . $stat->sum() . "\n";
print "Mean length:\t" . $stat->mean() . "\n";
print "Median length:\t" . $stat->median() . "\n";
print "Mode length:\t" . $stat->mode() . "\n";
print "Max length:\t" . $stat->max() . "\n";
print "Min length:\t" . $stat->min() . "\n";
print "Length\t# Seqs\n";
foreach (sort {$a <=> $b} keys %distrib) {
print "$_\t$distrib{$_}\n";
}
#!/bin/perl -w
use warnings;
use strict;
use Bio::SeqIO;
my $file = 'collapse_seq.fa';
my $seq_in = Bio::SeqIO->new( -format => 'fasta', -file => $file);
while( my $seq1 = $seq_in->next_seq) {
my $seq = $seq1->seq;
my $unique = 0; $unique = $seq->length(); $unique++;
}
print "$The number of unique sequences is $unique.\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment