Skip to content

Instantly share code, notes, and snippets.

@iansealy
Created January 9, 2015 11:46
Show Gist options
  • Save iansealy/b7eddc355e34ec3f71e9 to your computer and use it in GitHub Desktop.
Save iansealy/b7eddc355e34ec3f71e9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# PODNAME: rewrite_tc_tags_randomly.pl
# ABSTRACT: Rewrite transcript counting read tags (in random fashion)
## Author : is1
## Maintainer : is1
## Created : 2014-09-19
use warnings;
use strict;
use autodie;
use Getopt::Long;
use Pod::Usage;
use Carp;
use version; our $VERSION = qv('v0.1.0');
use Readonly;
# Constants
Readonly our @TAGS => qw(
NAAAA NAACC NAAGG NAATT
NCCAA NCCCC NCCGG NCCTT
NGGAA NGGCC NGGGG NGGTT
NTTAA NTTCC NTTGG NTTTT
);
# Default options
my $tag_count = 1;
my ( $debug, $help, $man );
# Get and check command line options
get_and_check_options();
# Read SAM from STDIN and output edited SAM to STDOUT
my %tag_index_for;
while ( my $line = <> ) {
my @fields = split /\t/xms, $line;
my $read_name = $fields[0];
my $replacement_tag;
if ( exists $tag_index_for{$read_name} ) {
$replacement_tag = $TAGS[ $tag_index_for{$read_name} ];
delete $tag_index_for{$read_name};
}
else {
my $tag_index = int rand $tag_count;
$tag_index_for{$read_name} = $tag_index;
$replacement_tag = $TAGS[$tag_index];
}
$line =~ s/\A (\S+?) \#\S+/$1\#$replacement_tag/xms;
print $line or croak "Failed to print SAM\n";
}
# Get and check command line options
sub get_and_check_options {
# Get options
GetOptions(
'tag_count=i' => \$tag_count,
'debug' => \$debug,
'help' => \$help,
'man' => \$man,
) or pod2usage(2);
# Documentation
if ($help) {
pod2usage(1);
}
elsif ($man) {
pod2usage( -verbose => 2 );
}
if ( $tag_count < 1 || $tag_count > scalar @TAGS ) {
pod2usage( sprintf "--tag_count must be integer between 1 and %d\n",
scalar @TAGS );
}
return;
}
__END__
=pod
=encoding UTF-8
=head1 NAME
rewrite_tc_tags_randomly.pl
Rewrite transcript counting read tags (in random fashion)
=head1 VERSION
version 0.1.0
=head1 DESCRIPTION
This script rewrites transcript counting read tags randomly (from a preassigned
set of tags). Up to 16 tags (i.e. 16 pseudosamples) can be assigned. The script
expects SAM format on STDIN and also outputs SAM format.
=head1 EXAMPLES
samtools view -h input.bam \
| perl rewrite_tc_tags_randomly.pl --tag_count 10 \
| samtools view -Sb - \
> output.bam
=head1 USAGE
rewrite_tc_tags_randomly.pl
[--tag_count int]
[--debug]
[--help]
[--man]
=head1 OPTIONS
=over 8
=item B<--tag_count INT>
Number of tags to use randomly.
=item B<--debug>
Print debugging information.
=item B<--help>
Print a brief help message and exit.
=item B<--man>
Print this script's manual page and exit.
=back
=head1 DEPENDENCIES
None
=head1 AUTHOR
=over 4
=item *
Ian Sealy <ian.sealy@sanger.ac.uk>
=back
=head1 COPYRIGHT AND LICENSE
This software is Copyright (c) 2014 by Genome Research Ltd.
This is free software, licensed under:
The GNU General Public License, Version 3, June 2007
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment