Instantly share code, notes, and snippets.
Created
January 9, 2015 11:46
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save iansealy/b7eddc355e34ec3f71e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# PODNAME: rewrite_tc_tags_randomly.pl | |
# ABSTRACT: Rewrite transcript counting read tags (in random fashion) | |
## Author : is1 | |
## Maintainer : is1 | |
## Created : 2014-09-19 | |
use warnings; | |
use strict; | |
use autodie; | |
use Getopt::Long; | |
use Pod::Usage; | |
use Carp; | |
use version; our $VERSION = qv('v0.1.0'); | |
use Readonly; | |
# Constants | |
Readonly our @TAGS => qw( | |
NAAAA NAACC NAAGG NAATT | |
NCCAA NCCCC NCCGG NCCTT | |
NGGAA NGGCC NGGGG NGGTT | |
NTTAA NTTCC NTTGG NTTTT | |
); | |
# Default options | |
my $tag_count = 1; | |
my ( $debug, $help, $man ); | |
# Get and check command line options | |
get_and_check_options(); | |
# Read SAM from STDIN and output edited SAM to STDOUT | |
my %tag_index_for; | |
while ( my $line = <> ) { | |
my @fields = split /\t/xms, $line; | |
my $read_name = $fields[0]; | |
my $replacement_tag; | |
if ( exists $tag_index_for{$read_name} ) { | |
$replacement_tag = $TAGS[ $tag_index_for{$read_name} ]; | |
delete $tag_index_for{$read_name}; | |
} | |
else { | |
my $tag_index = int rand $tag_count; | |
$tag_index_for{$read_name} = $tag_index; | |
$replacement_tag = $TAGS[$tag_index]; | |
} | |
$line =~ s/\A (\S+?) \#\S+/$1\#$replacement_tag/xms; | |
print $line or croak "Failed to print SAM\n"; | |
} | |
# Get and check command line options | |
sub get_and_check_options { | |
# Get options | |
GetOptions( | |
'tag_count=i' => \$tag_count, | |
'debug' => \$debug, | |
'help' => \$help, | |
'man' => \$man, | |
) or pod2usage(2); | |
# Documentation | |
if ($help) { | |
pod2usage(1); | |
} | |
elsif ($man) { | |
pod2usage( -verbose => 2 ); | |
} | |
if ( $tag_count < 1 || $tag_count > scalar @TAGS ) { | |
pod2usage( sprintf "--tag_count must be integer between 1 and %d\n", | |
scalar @TAGS ); | |
} | |
return; | |
} | |
__END__ | |
=pod | |
=encoding UTF-8 | |
=head1 NAME | |
rewrite_tc_tags_randomly.pl | |
Rewrite transcript counting read tags (in random fashion) | |
=head1 VERSION | |
version 0.1.0 | |
=head1 DESCRIPTION | |
This script rewrites transcript counting read tags randomly (from a preassigned | |
set of tags). Up to 16 tags (i.e. 16 pseudosamples) can be assigned. The script | |
expects SAM format on STDIN and also outputs SAM format. | |
=head1 EXAMPLES | |
samtools view -h input.bam \ | |
| perl rewrite_tc_tags_randomly.pl --tag_count 10 \ | |
| samtools view -Sb - \ | |
> output.bam | |
=head1 USAGE | |
rewrite_tc_tags_randomly.pl | |
[--tag_count int] | |
[--debug] | |
[--help] | |
[--man] | |
=head1 OPTIONS | |
=over 8 | |
=item B<--tag_count INT> | |
Number of tags to use randomly. | |
=item B<--debug> | |
Print debugging information. | |
=item B<--help> | |
Print a brief help message and exit. | |
=item B<--man> | |
Print this script's manual page and exit. | |
=back | |
=head1 DEPENDENCIES | |
None | |
=head1 AUTHOR | |
=over 4 | |
=item * | |
Ian Sealy <ian.sealy@sanger.ac.uk> | |
=back | |
=head1 COPYRIGHT AND LICENSE | |
This software is Copyright (c) 2014 by Genome Research Ltd. | |
This is free software, licensed under: | |
The GNU General Public License, Version 3, June 2007 | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment