-
-
Save anonymous/db7f9dcb00f4dd817ea1 to your computer and use it in GitHub Desktop.
My first Perl6 program, AKA how-to-work-around-things-u-dont-understand
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use v6; | |
# This is taken from Rosettacode, it translate Int to romans strings | |
# It is buggy, but I don't know why | |
my %symbols = | |
1 => "I", 5 => "V", 10 => "X", 50 => "L", 100 => "C", | |
500 => "D", 1_000 => "M"; | |
my @subtractors = | |
1_000, 100, 500, 100, 100, 10, 50, 10, 10, 1, 5, 1, 1, 0; | |
multi sub roman (0) { '' } | |
multi sub roman (Int $n) { | |
for @subtractors -> $cut, $minus { | |
$n >= $cut | |
and return %symbols{$cut} ~ roman($n - $cut); | |
$n >= $cut - $minus | |
and return %symbols{$minus} ~ roman($n + $minus); | |
} | |
} | |
sub find-from-roman (Str:D $roman, $in-list) returns Int { | |
# 1st WORKAROUND: Rosettacode is buggy, we need this or we get erratic behaviour | |
chomp($roman); | |
my $n = 0; | |
my Int $retval; | |
for |$in-list -> $cmp { | |
if (($cmp ~~ m/ <-[IVX]> <$roman> [ <-[IVX]> || $$ ] /) || ($cmp ~~ m/ <-[IVX]> <$roman> [ <-[IVX]> || $$ ] /)) { | |
if (!defined($retval)) { | |
$retval = $n; | |
} else { | |
die "Cannot decode titles for ambiguity"; | |
} | |
} | |
$n++; | |
} | |
return $retval; | |
} | |
#| Annotate the synthetic chromosome according to reference annotation: | |
#| annotations input and outputs should be passed through stdin and stdout | |
sub MAIN ($header, $binsize, $comparisondir) { | |
# We want comparisons to be chosen intelligently using information from $header | |
my $head-array = do { | |
my $head-file = $header.IO.open(:r) | |
or die "Could not open $header"; | |
eager do for $head-file.lines -> $line { | |
my $info = $line.split("\t")[0]; | |
$info.split('~').List; | |
} | |
} | |
# Get the list of all chromosomes, why eager? | |
my $all-chromosomes = eager gather { | |
my Str $last-chr = ""; | |
# Why flat ? | |
for |$head-array -> $arr ($chr, $bin) { | |
if ($chr ne $last-chr) { | |
take $chr; | |
$last-chr = $chr; | |
} | |
} | |
} | |
$*ERR.say($all-chromosomes.perl); | |
# Get the lists of synthetic chromosomes | |
# Why eager? | |
my $syn-chromosomes = eager gather { | |
my Str $last-chr = ""; | |
# Why flat? | |
for |$head-array -> $arr ($chr, $bin) { | |
if (($chr ~~ m:i/Syn<[IVXM]>+/) && ($chr ne $last-chr)) { | |
take $chr; | |
$last-chr = $chr; | |
} | |
} | |
} | |
$*ERR.say($syn-chromosomes.perl); | |
# WORKAROUND: To avoid erratic behaviour | |
my $syn-chromosomes-index-built-from-roman = eager (0..16).map( { $_ ?? find-from-roman(roman($_), $syn-chromosomes) !! Int } ); | |
my $all-chromosomes-index-built-from-roman = eager (0..16).map( { $_ ?? find-from-roman(roman($_), $all-chromosomes) !! Int } ); | |
$*ERR.say($syn-chromosomes-index-built-from-roman.perl); | |
$*ERR.say($all-chromosomes-index-built-from-roman.perl); | |
# Why eager? | |
my $all-compfiles = eager $comparisondir.IO.dir(test => /:i '.' txt $/)>>.basename; | |
# Why flat? | |
my $compfiles = eager gather for |$syn-chromosomes -> $name { | |
# Find the most similar name in compfile, to do so | |
# extract the roman numeral | |
my $num = ($name ~~ m/(<[IVX]>+)/)[0].Str or die "Could not extract roman number"; | |
my $element = find-from-roman($num, $all-compfiles); | |
if (defined($element)) { | |
take $all-compfiles[$element]; | |
} else { | |
warn "$name chromosome not found"; | |
} | |
} | |
$*ERR.say($compfiles.perl); | |
my Int $bins = $binsize.Int; | |
# Higly parallel chromosome loader (still super-slow) | |
# Why eager and flat? | |
my $comp-prom = eager do for |$compfiles -> $comparison { start { | |
my $compa-file = ("$comparisondir/" ~ $comparison).IO.open(:r) | |
or die "Could not open $comparison"; | |
$compa-file.get xx 2; | |
my $syn-str = $compa-file.get.chomp; | |
my $syn-index = $syn-str.index("\t"); | |
my $syn-title = $syn-str.substr(0, $syn-index); | |
# This is slow as hell | |
my $syn-prom = start { eager $syn-str.substr($syn-index+1).split("\t")>>.Int }; | |
$compa-file.get xx 3; | |
my $ref-str = $compa-file.get.chomp; | |
my $ref-index = $ref-str.index("\t"); | |
my $ref-title = $ref-str.substr(0, $ref-index); | |
# This is slow as hell | |
my $ref-prom = start { eager $ref-str.substr($ref-index+1).split("\t")>>.Int }; | |
my ($ref, $syn) = await $ref-prom, $syn-prom; | |
my $resarray = Array.new(); | |
$resarray[0] = 0; | |
loop (my $i = 0; $i < $ref.elems; $i++) { | |
$resarray[$ref[$i]] = $syn[$i]; | |
} | |
($syn-title, $ref-title, $resarray); | |
} } | |
my $annot-array = do for $*IN.lines -> $line { | |
# Why .List? | |
$line.split("\t").List; | |
} | |
my ($syn-titles, $ref-titles, $comp-arrays); | |
# This is a global mess for fixing [Z] | |
if ($comp-prom.elems > 1) { | |
my @comp-results = await |$comp-prom; | |
($syn-titles, $ref-titles, $comp-arrays) = [Z] @comp-results; | |
} else { | |
my @comp-results = await |$comp-prom; | |
my $comp-list; | |
($syn-titles, $ref-titles, $comp-list) = @comp-results; | |
$syn-titles = ($syn-titles).List; $ref-titles = ($ref-titles).List; $comp-arrays = ($comp-list,).List; | |
} | |
$*ERR.say($syn-titles.perl); | |
# To avoid erratic behaviour | |
my $syn-titles-index-built-from-roman = eager (0..16).map( { $_ ?? find-from-roman(roman($_), $syn-titles) !! Int } ); | |
$*ERR.say($syn-titles-index-built-from-roman.perl); | |
# Why eager and flat? | |
my $news = eager gather for |$annot-array -> $arr ($chr, $type, $st, $en, $mean) { | |
my $titnum; | |
if (my $num = ($chr ~~ m/chr(\d+)/)[0]) { | |
$titnum = $syn-titles-index-built-from-roman[$num.Str.Int]; # find-from-roman(roman($num.Str.Int), $syn-titles); | |
$num = $num.Str.Int; | |
} | |
if (defined($titnum)) { | |
my Int $new-st = $comp-arrays[$titnum][$st]; | |
my Int $new-en = $comp-arrays[$titnum][$en]; | |
my Int $c-length = $new-en - $new-st; | |
my Int $o-length = $en - $st; | |
warn "Lenght is different: ($new-en - $new-st) = $c-length vs ($en - $st) = $o-length" if ($c-length != $o-length); | |
my Int $new-mean = Int(($new-en + $new-st) / 2); | |
my Int $headnum = $syn-chromosomes-index-built-from-roman[$num]; #find-from-roman(roman($num), $syn-chromosomes); | |
die "Lost annotation $chr $mean" unless defined($headnum); | |
take ($syn-chromosomes[$headnum], $new-mean).List; | |
$*ERR.say(($syn-chromosomes[$headnum], $new-mean).List); | |
} else { | |
if ($chr eq "chrM") { | |
take ("ChrM", $mean).List; | |
$*ERR.say(("ChrM", $mean).List); | |
} else { | |
my $headnum = $all-chromosomes-index-built-from-roman[$num]; # find-from-roman(roman($num), $all-chromosomes); | |
if (defined($headnum)) { | |
take ($all-chromosomes[$headnum], $mean).List; | |
$*ERR.say(($all-chromosomes[$headnum], $mean).List); | |
} else { | |
warn "Lost annotation $chr $num {roman($num)}"; | |
} | |
} | |
} | |
} | |
die "Bins is not a multiple of 2, not yet implemented" unless ($bins/2 == Int($bins/2)); | |
my $diff = Int($bins/2); | |
# Why flat? | |
for |$head-array -> $arr ($chr, $bin) { | |
say "$chr~$bin", "\t", $news.grep( { (.[0] eq $chr) && ($bin - $diff <= .[1] < $bin + $diff) } ).elems; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment