slava ilnytskyy slavailn

## bowtie2annotation.pl
#! /usr/bin/perl
# This script will take a standard bowtie alignment output file
# containing alignments of small RNAs to various datasets, like
# miRNA, piRNA, repeats, genes etc. Alignment was performed to retain
# multimatches in order to have information about various features this
# particular small RNA could be assigned to.
# Our goal is to turn alignment file into a table with the following columns:
# 1. sRNA id
# 2. sRNA sequence
# 3. miRNA

## extract_fasta_by_pattern.pl
#! /usr/bin/perl
# This script will extract fasta entries ids matching
# user specified regex.
# The list of regexes to be matched to fasta ids is
# stored in a different file.
use strict; use warnings;

my $fasta_file = shift or die "Please provide fasta file\n USAGE: $0 fastaFile queryFile\n";
my $query_file = shift or die "Please provide file with patterns to be matched\n USAGE: $0 fastaFile queryFile\n";

## build_fasta_hash.pl
#! /usr/bin/perl
use strict; use warnings;

# This script will parce a fasta file and create
# a hash where keys are fasta ids (everything after '>')
# and values are fasta sequences of arbitrary length (could be
# DNA, RNA, protein). I'm not checking the symbols in the sequence

my $fasta_file = shift or die "Please, provide a fasta file: $!\n";

## filter_by_length.pl
#! /usr/bin/perl

# This script will extract a subset of fasta or fastq reads
# in a specified size range and save them to file

use strict; use warnings;

use Bio::SeqIO;
use Number::Range;
use Getopt::Long;

## find_identical_fasta.pl
#! /usr/bin/perl
use strict; use warnings;

# This script will find identical fasta sequences with different
# identifiers and output them into a new file in the form of
# (id1|id2). All of the other fasta entries will output as is
# Example:
# >id1
# ATTCGGTCC
# >id2

## format_converter.pl
#! /usr/bin/perl

# Universal? bioperl format converter

use strict; use warnings;
use Bio::SeqIO;
use Getopt::Long;

my $usage = "format_converter.pl --in-file <input_file> --in-format <input_file_format>  --out-file <output_file>  --out-format <output_file_format>\n";
my $in_file;

## length_distribution.pl
#! /usr/bin/perl
use strict; use warnings;

# Get length distribution of sequences for any
# bioperl compatible format. Prints to STDOUT

use Bio::SeqIO;
use Number::Range;
use Getopt::Long;

## library_composition.pl
#! /usr/bin/perl
use strict; use warnings;

my $config = shift; # Specify configuration file
my $trimmed_reads; # Store file name with trimmed reads in fastq format
my @paths; # Store full path to each bowtie index
my @unaligned_files; # Store files with reads that could not be aligned by bowtie
my @aligned_files; # Store files with aligned sequences in fastq format, only needed to count reads
open ( my $config_in, "<", $config ) or die "Cannot open configuration file: $!\n";

## match_fasta_files.pl
#! /usr/bin/perl

# Match 2 fasta files by id or by sequence
# print out common and unique entries

use strict; use warnings;
use Getopt::Long;
use List::MoreUtils qw(any);

# Variables available via command line options

## methyl_caller.pl
#!/usr/bin/perl

#####################################################################
# This script is used to obtain methylation data in more            #
# concise and usable form from Bismark methylation_extractor        #
# output files. Basically it will take a methylation extractor      #
# file, analyze it and retrieve only those C positions covered by   #
# at least 10 reads. Furthermore it will count number of methylated #
# and unmethylated cytosines at that position and calculate percent #
# methylation.                                                      #
	#! /usr/bin/perl
	# This script will take a standard bowtie alignment output file
	# containing alignments of small RNAs to various datasets, like
	# miRNA, piRNA, repeats, genes etc. Alignment was performed to retain
	# multimatches in order to have information about various features this
	# particular small RNA could be assigned to.
	# Our goal is to turn alignment file into a table with the following columns:
	# 1. sRNA id
	# 2. sRNA sequence
	# 3. miRNA
	#! /usr/bin/perl
	# This script will extract fasta entries ids matching
	# user specified regex.
	# The list of regexes to be matched to fasta ids is
	# stored in a different file.
	use strict; use warnings;

	my $fasta_file = shift or die "Please provide fasta file\n USAGE: $0 fastaFile queryFile\n";
	my $query_file = shift or die "Please provide file with patterns to be matched\n USAGE: $0 fastaFile queryFile\n";
	#! /usr/bin/perl
	use strict; use warnings;

	# This script will parce a fasta file and create
	# a hash where keys are fasta ids (everything after '>')
	# and values are fasta sequences of arbitrary length (could be
	# DNA, RNA, protein). I'm not checking the symbols in the sequence

	my $fasta_file = shift or die "Please, provide a fasta file: $!\n";
	#! /usr/bin/perl

	# This script will extract a subset of fasta or fastq reads
	# in a specified size range and save them to file

	use strict; use warnings;

	use Bio::SeqIO;
	use Number::Range;
	use Getopt::Long;
	#! /usr/bin/perl
	use strict; use warnings;

	# This script will find identical fasta sequences with different
	# identifiers and output them into a new file in the form of
	# (id1\|id2). All of the other fasta entries will output as is
	# Example:
	# >id1
	# ATTCGGTCC
	# >id2
	#! /usr/bin/perl

	# Universal? bioperl format converter

	use strict; use warnings;
	use Bio::SeqIO;
	use Getopt::Long;

	my $usage = "format_converter.pl --in-file <input_file> --in-format <input_file_format> --out-file <output_file> --out-format <output_file_format>\n";
	my $in_file;
	#! /usr/bin/perl
	use strict; use warnings;

	# Get length distribution of sequences for any
	# bioperl compatible format. Prints to STDOUT

	use Bio::SeqIO;
	use Number::Range;
	use Getopt::Long;
	#! /usr/bin/perl
	use strict; use warnings;

	my $config = shift; # Specify configuration file
	my $trimmed_reads; # Store file name with trimmed reads in fastq format
	my @paths; # Store full path to each bowtie index
	my @unaligned_files; # Store files with reads that could not be aligned by bowtie
	my @aligned_files; # Store files with aligned sequences in fastq format, only needed to count reads
	open ( my $config_in, "<", $config ) or die "Cannot open configuration file: $!\n";
	#! /usr/bin/perl

	# Match 2 fasta files by id or by sequence
	# print out common and unique entries

	use strict; use warnings;
	use Getopt::Long;
	use List::MoreUtils qw(any);

	# Variables available via command line options
	#!/usr/bin/perl

	#####################################################################
	# This script is used to obtain methylation data in more #
	# concise and usable form from Bismark methylation_extractor #
	# output files. Basically it will take a methylation extractor #
	# file, analyze it and retrieve only those C positions covered by #
	# at least 10 reads. Furthermore it will count number of methylated #
	# and unmethylated cytosines at that position and calculate percent #
	# methylation. #