Last active
March 22, 2022 14:36
-
-
Save mdondrup/6dc0ba31a034342dae0e164e4b8ce477 to your computer and use it in GitHub Desktop.
Anser to https://www.biostars.org/p/9515563/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
my %h = (); | |
my $range = 5; # configures the maximum distance | |
keys(%h) = 72553; # pre-size hash to number of expected entries, should not be too large | |
# read file into a hash first, | |
# we don't trust the sorting order of the file, | |
# if that uses too much memory, the file needed to | |
# be sorted by species name and the code adapted | |
<>; # skip header | |
while(<>){ | |
chomp; | |
next if /^\s*$/; # skip empty | |
my ($s,undef) = split; | |
my ($prefix,$index) = split "_peg_", $s; | |
die "invalid index $index" unless $index =~ /^\d+$/; # only digits | |
push @{$h{"$prefix"}}, {index=>$index, line=>$_} | |
} | |
## check the specific condition for each gene and per species | |
foreach my $k (sort(keys(%h))){ | |
my $v = $h{$k}; | |
# compare all against all | |
while (my $i = pop (@$v)) { # 1/2 of all comparisons | |
PAIR: foreach my $j (@$v) { | |
if (abs($i->{'index'} - $j->{'index'}) <= $range) { | |
print $i->{'line'},"\n",$j->{'line'},"\n"; | |
} | |
} | |
} | |
} | |
__END__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Protein Ids domain | |
Abiotrophia_defectiva_peg_0144 wzz | |
Abiotrophia_defectiva_peg_0198 wxy | |
Abiotrophia_defectiva_peg_0200 wzz | |
Abiotrophia_defectiva_peg_0215 wca | |
Abyssicoccus_albus_peg_1185 wzz | |
Abyssicoccus_albus_peg_1189 wzx | |
Abyssicoccus_albus_peg_1200 wza | |
Abyssicoccus_albus_peg_1322 wca | |
Abyssicoccus_albus_peg_1324 wbb | |
Bradyrhizobium_elkanii_peg_6717 wac | |
Bradyrhizobium_elkanii_peg_6718 wzx | |
Bradyrhizobium_elkanii_peg_6721 waa | |
Bradyrhizobium_elkanii_peg_6752 wca | |
Bradyrhizobium_elkanii_peg_6780 wvx | |
Abyssicoccus_albus_123_peg_1185 wzz | |
Abyssicoccus_albus_123_peg_1189 wzx | |
Abyssicoccus_albus_123_peg_1200 wza | |
Abyssicoccus_albus_123_peg_1322 wca | |
Abyssicoccus_albus_123_peg_1324 wbb |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
usage:
perl biostars_p_9515563.pl biostars_p_9515563_test.txt