Skip to content

Instantly share code, notes, and snippets.

@ShujiaHuang
Created July 22, 2014 21:17
Show Gist options
  • Save ShujiaHuang/fbee0a58554075ed5bf0 to your computer and use it in GitHub Desktop.
Save ShujiaHuang/fbee0a58554075ed5bf0 to your computer and use it in GitHub Desktop.
Find the Overlap region between in the two file
# Author : Shujia Huang
# Date : 2013-01-10
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
my ( $posFile, $regionFile );
GetOptions (
"i=s" => \$posFile,
"r=s" => \$regionFile,
);
Usage() unless $posFile && $regionFile;
my %region;
my %index;
GetRegion ( $regionFile, \%region, \%index );
open IN, $posFile or die "Cannot open file $posFile: $!\n";
while ( <IN> ){
#chr10 105186 105186 T C chr10 105186 T C ......
chomp;
my @tmp = split;
my $refId = $tmp[0];
next if ( !exists $index{$refId} );
my $flag = 1;
my $isOverlap = 0;
for( my $i = $index{$refId}; $i < @{$region{$refId}}; ++$i ) {
next if ( $tmp[1] > $region{$refId}->[$i][1] );
last if ( $tmp[2] < $region{$refId}->[$i][0] );
if ( $flag ) {
$flag = 0;
$index{$refId} = $i;
}
$isOverlap = 1;
last;
#$length=OverlapLength($tmp[1],$tmp[2],$region{$refId}->[$i][0],$region{$refId}->[$i][1]);
}
if ( $isOverlap ) {
print join "\t", @tmp ; print "\n";
}
}
close IN;
###################################################################
###################################################################
sub GetRegion {
my ( $file, $region, $index ) = @_;
open I, $file or die "Cannot open file $file : $!\n";
while ( <I> ) {
# chr1 mRNA 65298905 65432187 - NM_002227 JAK1
chomp;
my @tmp = split;
push ( @{ $$region{$tmp[0]} }, [ @tmp[2,3], $_ ] );
}
close I;
for my $refId ( keys %$region ) {
@{ $$region{$refId} } = sort {$a->[0] <=> $b->[0]} @{ $$region{$refId} };
$$index{ $refId } = 0;
}
}
sub OverlapLength {
my ($reg1,$reg2,$tar1,$tar2)= @_;
my $length = 0;
if($tar1>=$reg1 && $tar1<=$reg2 && $tar2>=$reg2)
{
$length=$reg2-$tar1+1;
}elsif($tar1>=$reg1 && $tar1<=$reg2 && $tar2<=$reg2)
{
$length=$tar2-$tar1+1;
}elsif($tar2>=$reg1 && $tar2<=$reg2 && $tar1<=$reg1)
{
$length=$tar2-$reg1+1;
}else
{
$length=$reg2-$reg1+1;
}
return $length;
}
################################################################
sub IsOvlap {
#
my ( $start1, $end1, $start2, $end2 ) = @_;
my $isOvlp = 0;
$isOvlp = 1 if ( $end1 >= $start2 and $start1 <= $end2 );
return $isOvlp;
}
sub OvlapReg {
# Reture the overlap region.
my ( $start1, $end1, $start2, $end2 ) = @_;
my ( $start, $end ) = ( 0, 0 );
if ( $end1 >= $start2 and $end1 <= $end2 and $start1 <= $start2 ) {
$start = $start2;
$end = $end1;
}
elsif ( $end1 >= $start2 and $end1 <= $end2 and $start1 > $start2 ) {
die "The end position is bigger than start position : $end1 > $start1\n" if ( $start1 > $end1 );
$start = $start1;
$end = $end1;
}
elsif ( $start1 <= $start2 and $end1 > $end2 ) {
die "The end position is bigger than start position : $end2 > $start2\n" if ( $start2 > $end2 );
$start = $start2;
$end = $end2;
}
elsif ( $start1 <= $end2 and $end1 > $end2 ) {
$start = $start1;
$end = $end2;
}
return ( $start, $end );
}
sub Usage {
print STDERR <<U;
Version : 0.0.1 ( 2012-01-10 )
Author : Shujia Huang
Created : 2013/01/10
Last Modify :
Usage : perl $0 [Options] -i [cout infile] > output
Options :
-i [str] Position file. Should be sorted! ( Find the positions which in the Region file "-r". )
-r [str] Region file. Not required sorting.
U
exit(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment