Skip to content

Instantly share code, notes, and snippets.

@ypandit
Created August 31, 2012 20:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ypandit/3558631 to your computer and use it in GitHub Desktop.
Save ypandit/3558631 to your computer and use it in GitHub Desktop.
Script to get gene information (DDB-ID, Gene products and Synonyms)
DB1:
dsn: dbi:Oracle:host=<hostname>;port=1521;sid=<sid>
user: <username>
password: <password>
DB2:
dsn: dbi:Oracle:host=<hostname>;port=1521;sid=<sid>
user: <username>
password: <password>
use strict;
use warnings;
use Bio::Chado::Schema;
use Bio::DB::SeqFeature::Store;
use Bio::DB::SeqFeature::Store::GFF3Loader;
use YAML::Tiny;
use lib 'lib';
use MOD::SGD;
my $conf = YAML::Tiny->read('conf.yaml');
my $verbose = 1;
my $tempFile = File::Temp->new();
my $data_dir = './data';
my $out_file = IO::File->new( 'gene_information.txt', 'w' );
my $gp_schema = MOD::SGD->connect(
$conf->[0]->{DB1}->{dsn},
$conf->[0]->{DB1}->{user},
$conf->[0]->{DB1}->{password}
);
my $syn_schema = Bio::Chado::Schema->connect(
$conf->[0]->{DB2}->{dsn},
$conf->[0]->{DB2}->{user},
$conf->[0]->{DB2}->{password}
);
$syn_schema->source('Sequence::Synonym')->name('synonym_');
my $gene_rs = $syn_schema->resultset('Sequence::Feature')->search(
{ 'type.name' => 'gene',
'me.is_deleted' => 0,
'organism.common_name' => 'dicty'
},
{ join => [qw/organism type/],
select => [qw/feature_id uniquename/],
prefetch => 'dbxref'
}
);
opendir( DIR, $data_dir ) or die $!;
$tempFile = File::Temp->new();
my $gff3_db = Bio::DB::SeqFeature::Store->new(
-adaptor => 'DBI::SQLite',
-dsn => "dbi:SQLite:dbname=$tempFile",
-create => 1
);
my $loader = Bio::DB::SeqFeature::Store::GFF3Loader->new(
-store => $gff3_db,
-verbose => 1,
-fast => 1
);
while ( my $file = readdir(DIR) ) {
$loader->load( $data_dir . $file );
}
my @genes = $gff3_db->get_features_by_type('gene');
for my $gene (@genes) {
my $ddb_id = $gene->seq_id;
my @attributes = $gene->attributes;
my $gene_id = $attributes[1][0];
GENE:
while ( my $row = $gene_rs->next ) {
if ( $gene_id = $row->dbxref->accession ) {
my $gene_name = $row->uniquename;
my $gp_row = $gp_schema->resultset('LocusGp')->search(
{ locus_no => $row->feature_id, },
{ rows => 1,
prefetch => 'locus_gene_product'
}
)->single;
if ( !$gp_row ) {
#print STDERR "NO gene product name for $gene_id\n" if $verbose;
next;
}
my $gene_product = $gp_row->locus_gene_product->gene_product;
if ( $gene_product eq 'unknown'
or $gene_product eq 'pseudogene' )
{
#print STDERR "Skipping gene product $gene_product\n" if $verbose;
next;
}
my $syn_rs
= $syn_schema->resultset('Sequence::Synonym')
->search(
{ 'feature_synonyms.feature_id' => $row->feature_id, },
{ join => 'feature_synonyms' } );
my $syn_name = "";
if ( !$syn_rs ) {
print STDERR "NO synonym for $gene_id\n";
next;
}
else {
while ( my $syn_row = $syn_rs->next ) {
$syn_name = $syn_name . $syn_row->name . ",";
}
}
$syn_name =~ s/(,)+$//;
$out_file->write(
"$ddb_id\t$gene_id\t$gene_name\t$gene_product\t$syn_name\n");
print STDOUT "$gene_id\t$gene_name\n";
}
else {
next GENE;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment