Instantly share code, notes, and snippets.
iansealy/rewrite_tc_tags_randomly.pl
Created Jan 9, 2015
#!/usr/bin/env perl | |
# PODNAME: rewrite_tc_tags_randomly.pl | |
# ABSTRACT: Rewrite transcript counting read tags (in random fashion) | |
## Author : is1 | |
## Maintainer : is1 | |
## Created : 2014-09-19 | |
use warnings; | |
use strict; | |
use autodie; | |
use Getopt::Long; | |
use Pod::Usage; | |
use Carp; | |
use version; our $VERSION = qv('v0.1.0'); | |
use Readonly; | |
# Constants | |
Readonly our @TAGS => qw( | |
NAAAA NAACC NAAGG NAATT | |
NCCAA NCCCC NCCGG NCCTT | |
NGGAA NGGCC NGGGG NGGTT | |
NTTAA NTTCC NTTGG NTTTT | |
); | |
# Default options | |
my $tag_count = 1; | |
my ( $debug, $help, $man ); | |
# Get and check command line options | |
get_and_check_options(); | |
# Read SAM from STDIN and output edited SAM to STDOUT | |
my %tag_index_for; | |
while ( my $line = <> ) { | |
my @fields = split /\t/xms, $line; | |
my $read_name = $fields[0]; | |
my $replacement_tag; | |
if ( exists $tag_index_for{$read_name} ) { | |
$replacement_tag = $TAGS[ $tag_index_for{$read_name} ]; | |
delete $tag_index_for{$read_name}; | |
} | |
else { | |
my $tag_index = int rand $tag_count; | |
$tag_index_for{$read_name} = $tag_index; | |
$replacement_tag = $TAGS[$tag_index]; | |
} | |
$line =~ s/\A (\S+?) \#\S+/$1\#$replacement_tag/xms; | |
print $line or croak "Failed to print SAM\n"; | |
} | |
# Get and check command line options | |
sub get_and_check_options { | |
# Get options | |
GetOptions( | |
'tag_count=i' => \$tag_count, | |
'debug' => \$debug, | |
'help' => \$help, | |
'man' => \$man, | |
) or pod2usage(2); | |
# Documentation | |
if ($help) { | |
pod2usage(1); | |
} | |
elsif ($man) { | |
pod2usage( -verbose => 2 ); | |
} | |
if ( $tag_count < 1 || $tag_count > scalar @TAGS ) { | |
pod2usage( sprintf "--tag_count must be integer between 1 and %d\n", | |
scalar @TAGS ); | |
} | |
return; | |
} | |
__END__ | |
=pod | |
=encoding UTF-8 | |
=head1 NAME | |
rewrite_tc_tags_randomly.pl | |
Rewrite transcript counting read tags (in random fashion) | |
=head1 VERSION | |
version 0.1.0 | |
=head1 DESCRIPTION | |
This script rewrites transcript counting read tags randomly (from a preassigned | |
set of tags). Up to 16 tags (i.e. 16 pseudosamples) can be assigned. The script | |
expects SAM format on STDIN and also outputs SAM format. | |
=head1 EXAMPLES | |
samtools view -h input.bam \ | |
| perl rewrite_tc_tags_randomly.pl --tag_count 10 \ | |
| samtools view -Sb - \ | |
> output.bam | |
=head1 USAGE | |
rewrite_tc_tags_randomly.pl | |
[--tag_count int] | |
[--debug] | |
[--help] | |
[--man] | |
=head1 OPTIONS | |
=over 8 | |
=item B<--tag_count INT> | |
Number of tags to use randomly. | |
=item B<--debug> | |
Print debugging information. | |
=item B<--help> | |
Print a brief help message and exit. | |
=item B<--man> | |
Print this script's manual page and exit. | |
=back | |
=head1 DEPENDENCIES | |
None | |
=head1 AUTHOR | |
=over 4 | |
=item * | |
Ian Sealy <ian.sealy@sanger.ac.uk> | |
=back | |
=head1 COPYRIGHT AND LICENSE | |
This software is Copyright (c) 2014 by Genome Research Ltd. | |
This is free software, licensed under: | |
The GNU General Public License, Version 3, June 2007 | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment