Skip to content

Instantly share code, notes, and snippets.

View slavailn's full-sized avatar

slava ilnytskyy slavailn

View GitHub Profile
#! /usr/bin/perl
# This script will take a standard bowtie alignment output file
# containing alignments of small RNAs to various datasets, like
# miRNA, piRNA, repeats, genes etc. Alignment was performed to retain
# multimatches in order to have information about various features this
# particular small RNA could be assigned to.
# Our goal is to turn alignment file into a table with the following columns:
# 1. sRNA id
# 2. sRNA sequence
# 3. miRNA
#! /usr/bin/perl
# This script will extract fasta entries ids matching
# user specified regex.
# The list of regexes to be matched to fasta ids is
# stored in a different file.
use strict; use warnings;
my $fasta_file = shift or die "Please provide fasta file\n USAGE: $0 fastaFile queryFile\n";
my $query_file = shift or die "Please provide file with patterns to be matched\n USAGE: $0 fastaFile queryFile\n";
#! /usr/bin/perl
use strict; use warnings;
# This script will parce a fasta file and create
# a hash where keys are fasta ids (everything after '>')
# and values are fasta sequences of arbitrary length (could be
# DNA, RNA, protein). I'm not checking the symbols in the sequence
my $fasta_file = shift or die "Please, provide a fasta file: $!\n";
#! /usr/bin/perl
# This script will extract a subset of fasta or fastq reads
# in a specified size range and save them to file
use strict; use warnings;
use Bio::SeqIO;
use Number::Range;
use Getopt::Long;
#! /usr/bin/perl
use strict; use warnings;
# This script will find identical fasta sequences with different
# identifiers and output them into a new file in the form of
# (id1|id2). All of the other fasta entries will output as is
# Example:
# >id1
# ATTCGGTCC
# >id2
#! /usr/bin/perl
# Universal? bioperl format converter
use strict; use warnings;
use Bio::SeqIO;
use Getopt::Long;
my $usage = "format_converter.pl --in-file <input_file> --in-format <input_file_format> --out-file <output_file> --out-format <output_file_format>\n";
my $in_file;
#! /usr/bin/perl
use strict; use warnings;
# Get length distribution of sequences for any
# bioperl compatible format. Prints to STDOUT
use Bio::SeqIO;
use Number::Range;
use Getopt::Long;
#! /usr/bin/perl
use strict; use warnings;
my $config = shift; # Specify configuration file
my $trimmed_reads; # Store file name with trimmed reads in fastq format
my @paths; # Store full path to each bowtie index
my @unaligned_files; # Store files with reads that could not be aligned by bowtie
my @aligned_files; # Store files with aligned sequences in fastq format, only needed to count reads
open ( my $config_in, "<", $config ) or die "Cannot open configuration file: $!\n";
#! /usr/bin/perl
# Match 2 fasta files by id or by sequence
# print out common and unique entries
use strict; use warnings;
use Getopt::Long;
use List::MoreUtils qw(any);
# Variables available via command line options
#!/usr/bin/perl
#####################################################################
# This script is used to obtain methylation data in more #
# concise and usable form from Bismark methylation_extractor #
# output files. Basically it will take a methylation extractor #
# file, analyze it and retrieve only those C positions covered by #
# at least 10 reads. Furthermore it will count number of methylated #
# and unmethylated cytosines at that position and calculate percent #
# methylation. #