Created
April 26, 2016 06:32
-
-
Save slavailn/780375904356b92faa62a3cdbc6385ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
##################################################################### | |
# This script is used to obtain methylation data in more # | |
# concise and usable form from Bismark methylation_extractor # | |
# output files. Basically it will take a methylation extractor # | |
# file, analyze it and retrieve only those C positions covered by # | |
# at least 10 reads. Furthermore it will count number of methylated # | |
# and unmethylated cytosines at that position and calculate percent # | |
# methylation. # | |
##################################################################### | |
use strict; use warnings; | |
use List::MoreUtils ':all'; | |
# Open methylation extractor file specified in command line | |
# Specify global variables: | |
my $command_line_parameter = shift; | |
my $file; | |
my @positions; | |
my @unique_positions; | |
my %meth_cytosine; | |
my @meth_array; | |
my @methylated; | |
my @unmethylated; | |
# Subroutines: | |
#sub_help; | |
if ( $command_line_parameter eq "--help" || $command_line_parameter eq "-h" ) | |
{ | |
&sub_help; | |
} | |
else | |
{ | |
$file = $command_line_parameter; | |
} | |
# Open methyl extractor file for reading; | |
open ( METH_EXT, "<$file" ) or die "Cannot open file: $!\n"; | |
################################################################### | |
# This loop will will build an array of unique genomic positions | |
# where cytosine methylation could be detected; | |
while (<METH_EXT>) | |
{ | |
chomp; | |
if ( $_ !~ m/^Bis/ ) | |
{ | |
my ($col1, $col2, $col3, $col4, $col5) = split( "\t", $_ ); | |
push ( @positions, join ("\t", $col3, $col4) ); | |
} | |
} | |
@unique_positions = uniq @positions; | |
close METH_EXT; | |
##################################################################### | |
##################################################################### | |
# This loop will populate the hash of arrays, where key corressponds | |
# the unique position in the genome and the value is an array of | |
# cutysine methylation calls, shown either as Z (methylated) or z | |
# (unmethylated). | |
open ( METH_EXT, "<$file" ) or die "Cannot open file: $!\n"; | |
my @meth_ext_lines = <METH_EXT>; | |
foreach my $position ( @unique_positions ) | |
{ | |
my ( $uniq_chr, $uniq_pos ) = split( "\t", $position ); | |
foreach my $meth_ext_line ( @meth_ext_lines ) | |
{ | |
chomp $meth_ext_line; | |
my ( $orig_col1, $orig_col2, $orig_col3, $orig_col4, $orig_col5 ) = split( "\t", $meth_ext_line ); | |
if ( $uniq_chr eq $orig_col3 && $uniq_pos == $orig_col4 ) | |
{ | |
push( @meth_array, $orig_col5 ); | |
} | |
} | |
$meth_cytosine{$position} = [@meth_array]; | |
@meth_array = (); | |
} | |
######################################################################## | |
######################################################################## | |
# This loop will select only those cytosine positions covered by more or | |
# equal to 10 reads, than output the position coordinates, numbers of methylated, unmethylated | |
# cytosines and percent methylation associated with these particular Cs. | |
open( OUT_FILE, ">methyl_call.txt" ) or die "Cannot write to file: $!"; | |
for my $key ( keys %meth_cytosine ) | |
{ | |
if ( scalar(@{ $meth_cytosine{$key} }) >= 10 ) | |
{ | |
my @cytosines = @{ $meth_cytosine{$key} }; | |
foreach my $meth_call ( @cytosines ) | |
{ | |
if ( $meth_call eq "Z" ) | |
{ | |
push( @methylated, $meth_call ); | |
} | |
elsif ( $meth_call eq "z" ) | |
{ | |
push( @unmethylated, $meth_call ); | |
} | |
} | |
my $number_methylated = scalar( @methylated ); | |
my $number_unmethylated = scalar( @unmethylated ); | |
my $percent_methylated = $number_methylated/($number_methylated + $number_unmethylated)*100; | |
@methylated = (); | |
@unmethylated = (); | |
print OUT_FILE "$key\t$number_methylated\t$number_unmethylated\t$percent_methylated\n"; | |
} | |
} | |
close OUT_FILE; | |
sub sub_help | |
{ | |
# This subroutine print help message when | |
# --help of -h is specified in command line | |
print "\n"; | |
print "****************************************************\n"; | |
print "Usage: perl methyl_caller.pl [options] <file_name>\n"; | |
print "Options --help or -h will cause the program to output this\nhelp message\n"; | |
print "The output file will be saved in working directory as methyl_call.txt\n"; | |
print "****************************************************\n\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment