eemp/custom_fasta_proc.pl

## custom_fasta_proc.pl
#!/usr/bin/perl

use strict;
use warnings;

use Data::Dumper;

die "Usage: $0 FASTA_FILE" if !$ARGV[0];

# read the fasta file
my @lines;
open(FH, $ARGV[0]);
chomp(@lines = <FH>);
close(FH);

# output updates
my $curr_id;
my $curr_seq;
foreach my $l (@lines) {
    # the regex below can be updated along with the logic
    # the regex list of possible codes in a seq can be expanded based on https://en.wikipedia.org/wiki/FASTA_format
    # or can match ids if it will always follow a particular format
    if($l =~ m{^[ACGTU]+$}) {
        $curr_seq .= $l;
    }
    else {
        print "$curr_id\n$curr_seq\n" if ($curr_id && $curr_seq); # at this point we have an id and the entire seq to go with that id
        $curr_id = $l;
        $curr_seq = "";
    }
}

print "$curr_id\n$curr_seq\n" if ($curr_id && $curr_seq); # last one
	#!/usr/bin/perl

	use strict;
	use warnings;

	use Data::Dumper;

	die "Usage: $0 FASTA_FILE" if !$ARGV[0];

	# read the fasta file
	my @lines;
	open(FH, $ARGV[0]);
	chomp(@lines = <FH>);
	close(FH);

	# output updates
	my $curr_id;
	my $curr_seq;
	foreach my $l (@lines) {
	# the regex below can be updated along with the logic
	# the regex list of possible codes in a seq can be expanded based on https://en.wikipedia.org/wiki/FASTA_format
	# or can match ids if it will always follow a particular format
	if($l =~ m{^[ACGTU]+$}) {
	$curr_seq .= $l;
	}
	else {
	print "$curr_id\n$curr_seq\n" if ($curr_id && $curr_seq); # at this point we have an id and the entire seq to go with that id
	$curr_id = $l;
	$curr_seq = "";
	}
	}

	print "$curr_id\n$curr_seq\n" if ($curr_id && $curr_seq); # last one