Skip to content

Instantly share code, notes, and snippets.

@slavailn
Created April 26, 2016 06:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slavailn/780375904356b92faa62a3cdbc6385ee to your computer and use it in GitHub Desktop.
Save slavailn/780375904356b92faa62a3cdbc6385ee to your computer and use it in GitHub Desktop.
#!/usr/bin/perl
#####################################################################
# This script is used to obtain methylation data in more #
# concise and usable form from Bismark methylation_extractor #
# output files. Basically it will take a methylation extractor #
# file, analyze it and retrieve only those C positions covered by #
# at least 10 reads. Furthermore it will count number of methylated #
# and unmethylated cytosines at that position and calculate percent #
# methylation. #
#####################################################################
use strict; use warnings;
use List::MoreUtils ':all';
# Open methylation extractor file specified in command line
# Specify global variables:
my $command_line_parameter = shift;
my $file;
my @positions;
my @unique_positions;
my %meth_cytosine;
my @meth_array;
my @methylated;
my @unmethylated;
# Subroutines:
#sub_help;
if ( $command_line_parameter eq "--help" || $command_line_parameter eq "-h" )
{
&sub_help;
}
else
{
$file = $command_line_parameter;
}
# Open methyl extractor file for reading;
open ( METH_EXT, "<$file" ) or die "Cannot open file: $!\n";
###################################################################
# This loop will will build an array of unique genomic positions
# where cytosine methylation could be detected;
while (<METH_EXT>)
{
chomp;
if ( $_ !~ m/^Bis/ )
{
my ($col1, $col2, $col3, $col4, $col5) = split( "\t", $_ );
push ( @positions, join ("\t", $col3, $col4) );
}
}
@unique_positions = uniq @positions;
close METH_EXT;
#####################################################################
#####################################################################
# This loop will populate the hash of arrays, where key corressponds
# the unique position in the genome and the value is an array of
# cutysine methylation calls, shown either as Z (methylated) or z
# (unmethylated).
open ( METH_EXT, "<$file" ) or die "Cannot open file: $!\n";
my @meth_ext_lines = <METH_EXT>;
foreach my $position ( @unique_positions )
{
my ( $uniq_chr, $uniq_pos ) = split( "\t", $position );
foreach my $meth_ext_line ( @meth_ext_lines )
{
chomp $meth_ext_line;
my ( $orig_col1, $orig_col2, $orig_col3, $orig_col4, $orig_col5 ) = split( "\t", $meth_ext_line );
if ( $uniq_chr eq $orig_col3 && $uniq_pos == $orig_col4 )
{
push( @meth_array, $orig_col5 );
}
}
$meth_cytosine{$position} = [@meth_array];
@meth_array = ();
}
########################################################################
########################################################################
# This loop will select only those cytosine positions covered by more or
# equal to 10 reads, than output the position coordinates, numbers of methylated, unmethylated
# cytosines and percent methylation associated with these particular Cs.
open( OUT_FILE, ">methyl_call.txt" ) or die "Cannot write to file: $!";
for my $key ( keys %meth_cytosine )
{
if ( scalar(@{ $meth_cytosine{$key} }) >= 10 )
{
my @cytosines = @{ $meth_cytosine{$key} };
foreach my $meth_call ( @cytosines )
{
if ( $meth_call eq "Z" )
{
push( @methylated, $meth_call );
}
elsif ( $meth_call eq "z" )
{
push( @unmethylated, $meth_call );
}
}
my $number_methylated = scalar( @methylated );
my $number_unmethylated = scalar( @unmethylated );
my $percent_methylated = $number_methylated/($number_methylated + $number_unmethylated)*100;
@methylated = ();
@unmethylated = ();
print OUT_FILE "$key\t$number_methylated\t$number_unmethylated\t$percent_methylated\n";
}
}
close OUT_FILE;
sub sub_help
{
# This subroutine print help message when
# --help of -h is specified in command line
print "\n";
print "****************************************************\n";
print "Usage: perl methyl_caller.pl [options] <file_name>\n";
print "Options --help or -h will cause the program to output this\nhelp message\n";
print "The output file will be saved in working directory as methyl_call.txt\n";
print "****************************************************\n\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment