ChemicalJames/adapter_trim.pl

## adapter_trim.pl
#!/bin/perl -w

use strict;
use warnings;
use Bio::SeqIO;

my $seqio = Bio::SeqIO->new(-format=>'fasta',-file=>'ath_sRNA1.fa');
my $adapter='CTGTAGGCACCATCAAT';

open OUT,'>trimmed_seq.fa' or die "Cannot write to a file\n";

my ($i,$j,$k) = 0;
while (my $seq = $seqio->next_seq)
{
my $adapter_6nt = substr($adapter,0,6);
if ($seq->seq =~ "$adapter_6nt")
{
my $pos = index($seq->seq, $adapter_6nt, -1);
my $str = substr($seq->seq, 0, $pos);
if($str =~ "N"||length($str)<18) #remove sequence less than 18nt
{
$i++; #count sequences greater than 18nt
}
else
{
print OUT ">",$seq->id,"\n";
print OUT "$str\n";
}
$j++; #count total number of sequences
}
$k++; #count number of sequences with the adapter
}
print "The total number of sequences in the file is $j.\nThe number of sequences with the 3'end adapter is $k.\nThe number of trimmed sequences greater than 18nt is $i.\n";

## normalized_reads.pl
#!/bin/perl -w

use warnings;
use strict;
use Bio::SeqIO;

my $seq;
my $seqn;
my $id;
my $id_strt;
my $rptm;

open OUT, '>normalized.txt' or die "Cannot write to a file\n";

my $file = 'trimmed_seq.fa';
my $file2 = 'ath_miRNA.fa';
my $in = Bio::SeqIO->new(-format => 'fasta',
                         -file => $file);
my %unique = ();
while ( my $seq_obj = $in->next_seq) {
$seq= $seq_obj->seq;

if (exists $unique{$seq})
{
$unique{$seq} = $unique{$seq}+1;
}
else
{
$unique{$seq}=1;
}
}

for my $name (sort keys %unique) {
}

my $in2 = Bio::SeqIO->new(-format => 'fasta',
                         -file => $file2);
while ( my $seq_objct = $in2->next_seq) {

$seqn= $seq_objct->seq;
$id= $seq_objct->display_id;
$id_strt= substr ($id, 0, 3);
for my $name (sort keys %unique) {
$seqn =~ tr/U/T/;

if (($name eq $seqn) && ($id_strt eq "ath"))
{
$rptm = (($unique{$name})*10000000)/4979605;
print OUT "ID" , "\t", "Norm_exp","\n";
print OUT $id, "\t", $rptm, "\n";
}
}
}

## sRNA_length.pl
#!/bin/perl -w
#I copied parts of this script from a Perl website.

use warnings;
use strict;
use Bio::SeqIO;
use Statistics::Descriptive;

my$stat = Statistics::Descriptive::Full->new();
my (%distrib);
my @bins = qw/18 19 20 21 22 23 24 25 26 27 28/;

my $fastaFile = 'trimmed_seq.fa';
open (FASTA, "<$fastaFile");
$/ = ">";

my $junkFirstOne = <FASTA>;

while (<FASTA>) {
        chomp;
        my ($def,@seqlines) = split /\n/, $_;
        my $seq = join '', @seqlines;
        $stat->add_data(length($seq));
}


%distrib = $stat->frequency_distribution(\@bins);

print "Total reads:\t" . $stat->count() . "\n";
print "Total nt:\t" . $stat->sum() . "\n";
print "Mean length:\t" . $stat->mean() . "\n";
print "Median length:\t" . $stat->median() . "\n";
print "Mode length:\t" . $stat->mode() . "\n";
print "Max length:\t" . $stat->max() . "\n";
print "Min length:\t" . $stat->min() . "\n";
print "Length\t# Seqs\n";
foreach (sort {$a <=> $b} keys %distrib) {
        print "$_\t$distrib{$_}\n";
}

## unique.pl
#!/bin/perl -w

use warnings;
use strict;
use Bio::SeqIO;

my $file = 'collapse_seq.fa';
my $seq_in  = Bio::SeqIO->new( -format => 'fasta', -file => $file);

while( my $seq1 = $seq_in->next_seq) {
 my $seq = $seq1->seq;

 my $unique = 0; $unique = $seq->length(); $unique++;
}
print "$The number of unique sequences is $unique.\n";
	#!/bin/perl -w

	use strict;
	use warnings;
	use Bio::SeqIO;

	my $seqio = Bio::SeqIO->new(-format=>'fasta',-file=>'ath_sRNA1.fa');
	my $adapter='CTGTAGGCACCATCAAT';

	open OUT,'>trimmed_seq.fa' or die "Cannot write to a file\n";

	my ($i,$j,$k) = 0;
	while (my $seq = $seqio->next_seq)
	{
	my $adapter_6nt = substr($adapter,0,6);
	if ($seq->seq =~ "$adapter_6nt")
	{
	my $pos = index($seq->seq, $adapter_6nt, -1);
	my $str = substr($seq->seq, 0, $pos);
	if($str =~ "N"\|\|length($str)<18) #remove sequence less than 18nt
	{
	$i++; #count sequences greater than 18nt
	}
	else
	{
	print OUT ">",$seq->id,"\n";
	print OUT "$str\n";
	}
	$j++; #count total number of sequences
	}
	$k++; #count number of sequences with the adapter
	}
	print "The total number of sequences in the file is $j.\nThe number of sequences with the 3'end adapter is $k.\nThe number of trimmed sequences greater than 18nt is $i.\n";
	#!/bin/perl -w
	#I copied parts of this script from a Perl website.

	use warnings;
	use strict;
	use Bio::SeqIO;
	use Statistics::Descriptive;

	my$stat = Statistics::Descriptive::Full->new();
	my (%distrib);
	my @bins = qw/18 19 20 21 22 23 24 25 26 27 28/;

	my $fastaFile = 'trimmed_seq.fa';
	open (FASTA, "<$fastaFile");
	$/ = ">";

	my $junkFirstOne = <FASTA>;

	while (<FASTA>) {
	chomp;
	my ($def,@seqlines) = split /\n/, $_;
	my $seq = join '', @seqlines;
	$stat->add_data(length($seq));
	}


	%distrib = $stat->frequency_distribution(\@bins);

	print "Total reads:\t" . $stat->count() . "\n";
	print "Total nt:\t" . $stat->sum() . "\n";
	print "Mean length:\t" . $stat->mean() . "\n";
	print "Median length:\t" . $stat->median() . "\n";
	print "Mode length:\t" . $stat->mode() . "\n";
	print "Max length:\t" . $stat->max() . "\n";
	print "Min length:\t" . $stat->min() . "\n";
	print "Length\t# Seqs\n";
	foreach (sort {$a <=> $b} keys %distrib) {
	print "$_\t$distrib{$_}\n";
	}