Skip to content

Instantly share code, notes, and snippets.

@soh-i
Created November 6, 2012 06:24
Show Gist options
  • Save soh-i/4022964 to your computer and use it in GitHub Desktop.
Save soh-i/4022964 to your computer and use it in GitHub Desktop.
pileup formatからmultiple alignment形式を得る
  • pileupは、1行が1ポジションのデータであり、Genome viewer的な表示(マルチプルアラインメント形式っぽく)には、base string columnの行列を入れ替えて表示する必要がある。

pileup形式

chr1	10258	G	4	..^M.^M.	XBEE	54,49,1,1
chr1	10259	T	4	....	DBRR	55,50,2,2
chr1	10260	G	4	....	RBUU	56,51,3,3
chr1	10261	A	4	G...	QB^^	57,52,4,4
chr1	10262	A	4	gg..	WB\\	58,53,5,5
chr1	10263	A	4	....	^B^^	59,54,6,6
chr1	10264	C	4	....	[Baa	60,55,7,7
chr1	10265	C	8	....^M.^M.^M.^M.	`BaaEEEE	61,56,8,8,1,1,1,1
chr1	10266	A	10	........^M.^M.	RB[\UUUU??	62,57,9,9,2,2,2,2,1,1
chr1	10267	A	10	..........	NBXZ[[[[AA	63,58,10,10,3,3,3,3,2,2
chr1	10268	A	10	..........	FBXZ[[[[BB	64,59,11,11,4,4,4,4,3,3
chr1	10269	A	10	..N.......	XBD\]]]]EE	65,60,12,12,5,5,5,5,4,4
chr1	10270	T	11	..........^M.	QBZaaaaa`aB	66,61,13,13,6,6,6,6,5,5,1
chr1	10271	T	11	...........	OB\aaaaaaaE	67,62,14,14,7,7,7,7,6,6,2
chr1	10272	G	11	...........	OBaba`aa`Wa	68,63,15,15,8,8,8,8,7,7,3
chr1	10273	C	11	...........	[Baaaaabaab	69,64,16,16,9,9,9,9,8,8,4
chr1	10274	G	11	...........	YBaba`bbaab	70,65,17,17,10,10,10,10,9,9,5
chr1	10275	A	11	...........	BBaba__b`^a	71,66,18,18,11,11,11,11,10,10,6
chr1	10276	T	11	...........	BBabaaaa`aa	72,67,19,19,12,12,12,12,11,11,7
chr1	10277	A	11	...........	BBaa]Waaa_\	73,68,20,20,13,13,13,13,12,12,8
chr1	10297	C	20	................,..^M.	`aaZ_a__`b`aaaaW\a^E	40,40,33,33,33,33,32,32,28,18,14,14,14,14,12,8,4,3,3,1
chr1	10298	T	20	................,...	aaa:aa``aaaaaaa_X__U	41,41,34,34,34,34,33,33,29,19,15,15,15,15,13,9,5,4,4,2
chr1	10299	T	21	................,...^M,	_aa7aa``aa^aaaaaR``aE	42,42,35,35,35,35,34,34,30,20,16,16,16,16,14,10,6,5,5,3,1%              

Genome viewer的な感じで表示する(samtools tview)

1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
  .  .  .  G  g  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  .  .  .  .  g  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  ^  .  .  .  .  .  .  .  .  .  .  N  .  .  .  .  .  .  .  .  .  .  .
  M  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  .                    ^  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  ^                    M  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  M                    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
  .                    ^  .  .  .  .  .  .  .  .  .  .  .  .  .  .  .
                       M  ^  .  .  .  .  .  .  .  .  .  .  .  .  .  .
                       .  M  .  .  .  .  .  .  .  .  .  .  .  .  .  .
                       ^  .           ^  .  .  .  .  .  .  .  .  .  .
                       M  ^           M                       .  .  .
                       .  M           .                       .  .  .
                       ^  .                                   .  .  .
                       M                                      .  .  .
                       .                                      .  .  .
                                                              ,  ,  ,
                                                              .  .  .
                                                              .  .  .
                                                              ^  .  .
                                                              M     ^
                                                              .     M
                                                                    ,

コード

#!/usr/bin/env perl

use strict;
use warnings;
use Data::Dumper;

my $max_height = 0;
my $max_width  = 0;

my $in_pileup = shift || die "Usage: perl $0 in.pileup";
open my $FH, '<', $in_pileup or die;
my @matrix;
while ( <$FH> ) {
    chomp;
    next unless $_;
    
    my @entries = split /\t/, $_;
    
    my @row = split //, $entries[4];
    push @matrix,  [ @row ];

    $max_height++;
    if ( $max_width < $#row + 1 ) {
        $max_width = $#row + 1;
    }
}
for ( 1..$max_height ) {
    printf "%3d", $_;
}
print "\n";
for my $j ( 0 .. $max_width-1 ) {
    for my $i ( 0 .. $max_height-1 ) {
        if ( $matrix[$i][$j] ) {
            printf "%3s", $matrix[$i][$j];
        } else {
            printf "%3s", ' ';
        }
    }
    print "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment