Skip to content

Instantly share code, notes, and snippets.

@holli-holzer
Created May 13, 2020 21:00
Show Gist options
  • Save holli-holzer/7958fcbe6b14ee9a44bd589d42568672 to your computer and use it in GitHub Desktop.
Save holli-holzer/7958fcbe6b14ee9a44bd589d42568672 to your computer and use it in GitHub Desktop.
Fasta::Parser draft
subset NonEmptyStr of Str where *.chars > 0;
subset FastaStr of Str where * ~~ / ^ <[ ACGT ]>+ $/;
subset ReadableFile where { .IO.e && .IO.f && .IO.r };
class Fasta::Sequence
{
has NonEmptyStr $.data is required;
has NonEmptyStr $.description is required;
has Bool $.validate = False;
submethod BUILD( :$!data, :$!description, :$!validate )
{
die "Bad data at sequence <$!description>"
if $!validate and $!data !~~ FastaStr;
}
method base-count { $!data.comb.Bag }
}
class Fasta::Parser does Iterable
{
has Bool $.validate;
has Iterable $.lines;
multi sub fasta( ReadableFile $file, $validate = False --> Fasta::Parser ) is export {
Fasta::Parser.from-file( $file, $validate ) }
multi sub fasta( Str $fasta, $validate = False --> Fasta::Parser ) is export {
Fasta::Parser.from-str( $fasta, :$validate ) }
method from-file( ReadableFile $file, $validate = False --> Fasta::Parser ) {
Fasta::Parser.new( lines => $file.IO.lines( :chomp ), :$validate ) }
method from-str( Str $fasta, $validate = False --> Fasta::Parser ) {
Fasta::Parser.new( lines => $fasta.lines, :$validate ) }
method sequences( --> Seq )
{
sub new-sequence( $description, $data ) {
take Fasta::Sequence.new( :$description, :$data, :$.validate )
unless $description eq $data eq "" }
my $description = "";
my $sequence-data = "";
lazy gather
{
with $!lines
{
for @$!lines -> $line
{
if $line.starts-with( '>' )
{
new-sequence( $description, $sequence-data );
$description = $line.substr( 1, * );
$sequence-data = "";
}
else
{
$sequence-data ~= $line;
}
}
new-sequence( $description, $sequence-data );
}
}
}
method iterator( --> Iterator ) {
self.sequences().iterator }
method Hash( --> Hash ) {
% = self.sequences.map({ .description => .data }) }
method Array( --> Array ) {
self.sequences.eager.Array;
}
}
=begin pod
=head1 NAME
Fasta::File - blah blah blah
=head1 SYNOPSIS
=begin code :lang<perl6>
use Fasta::File;
=end code
=head1 DESCRIPTION
Fasta::File is ...
=head1 AUTHOR
Markus "Holli" Holzer <holli.holzer@gmail.com>
=head1 COPYRIGHT AND LICENSE
Copyright 2020 Markus "Holli" Holzer
This library is free software; you can redistribute it and/or modify it under the Artistic License 2.0.
=end pod
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment