Skip to content

Instantly share code, notes, and snippets.

@cybersiddhu
Created August 21, 2012 15:33
Show Gist options
  • Save cybersiddhu/3416632 to your computer and use it in GitHub Desktop.
Save cybersiddhu/3416632 to your computer and use it in GitHub Desktop.
Prepare reference sequences for JBrowse part2
# $seq_rel is the path relative to $outDir
my $seq_rel = "seq";
my $compress;
my $chunk_size = 20000;
my $opt = optargs;
if ( !-e $opt->{conf} ) {
die usage "JBrowse config file $opt->{conf} do not exist";
}
my $seqdir = catfile( $opt->{out}, $seq_rel );
make_path $seqdir;
my $json_str = do { local ( @ARGV, $/ ) = $opt->{conf}; <> };
my $perl_str = JSON->new->decode($json_str);
my $db = Bio::DB::SeqFeature::Store->new( %{ $perl_str->{db_args} } );
my @features = $db->features( -type => $opt->{reftype} );
die "cannot fetch any reference features\n" if !@features;
my $refseqs;
for my $seqfeature (@features) {
my $refinfo = {
name => $seqfeature->name,
id => $seqfeature->seq_id,
start => $seqfeature->start - 1,
end => $seqfeature->end,
length => $seqfeature->length
};
my $refdir = catdir( $seqdir, $refinfo->{name} );
export_seq_chunks( $refdir, $compress, $chunk_size, $db,
$seqfeature->seq_id, $seqfeature->start, $seqfeature->end );
$refinfo->{seqDir} = $refdir;
$refinfo->{SeqChunkSize} = $chunk_size;
push @$refseqs, $refinfo;
}
die "No reference sequence found !!!!\n" if !@$refseqs;
sub export_seq_chunks {
my ( $refdir, $compress, $chunk_size, $db, $seq_id, $start, $end ) = @_;
make_path $refdir;
$start = 1 if $start < 1;
my $chunk_start = $start;
while ( $chunk_start <= $end ) {
my $chunk_end = $chunk_start + $chunk_size - 1;
my $chunk_num = floor( ( $chunk_start - 1 ) / $chunk_size );
my $seg = $db->segment( $seq_id, $chunk_start, $chunk_end );
if ( !$seg ) {
die
"Seq export query failed, please inform the developers of this error\n";
}
if ( $seg->start != $chunk_start ) {
die "requested $chunk_start .. $chunk_end; got "
. $seg->start . " .. "
. $seg->end;
}
$chunk_start = $chunk_end + 1;
if ( my $sequence = $seg->seq->seq ) { #we have the sequence to write
my $output
= IO::File->new( catfile( $refdir, $chunk_num . '.txt' ),
'w' );
$output->print($sequence);
$output->close;
}
else {
warn "$seq_id has no sequence attached !!!!!\n";
}
}
}
my $refseq_json = catfile( $seqdir, 'refSeqs.json' );
JsonGenerator::modifyJsonFile( $refseq_json, \&modify_refseq_json );
my $seq_track_name = 'DNA';
my $track_json = catfile( $opt->{out}, 'trackList.json' );
JsonGenerator::modifyJsonFile( $track_json, \&modify_track_json );
sub modify_refseq_json {
my $old = shift;
my %refs = map { $_->{name}, $_ } @$refseqs;
for my $i ( 0 .. $#$old ) {
if ( defined $refs{ $old->[$i]->{name} } ) {
$old->[$i] = delete $refs{ $old->[$i]->{name} };
}
}
for my $newref (@$refseqs) {
push @$old, $newref if defined $refs{ $newref->{name} };
}
return $old;
}
sub modify_track_json {
my $track_list = shift;
if ( !$track_list ) {
$track_list = { formatVersion => 1, tracks => [] };
}
my $tracks = $track_list->{tracks};
my $end = @$tracks ? $#$tracks : 0;
TRACKS:
for my $i ( 0 .. $end ) {
if ( $tracks->[$i]->{label} eq $seq_track_name ) {
last TRACKS;
}
}
$tracks->[$end] = {
'label' => $seq_track_name,
'key' => $seq_track_name,
'type' => "SequenceTrack",
'chunkSize' => $chunk_size,
'urlTemplate' => "$seq_rel/{refseq}/"
};
return $track_list;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment