Skip to content

Instantly share code, notes, and snippets.

@cybersiddhu
Created March 12, 2012 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cybersiddhu/2023747 to your computer and use it in GitHub Desktop.
Save cybersiddhu/2023747 to your computer and use it in GitHub Desktop.
generate a tab delimited file mapping dictyBase Gene ID to its gene product name
#!/usr/bin/perl -w
use strict;
use Pod::Usage;
use IO::File;
use Getopt::Long;
use Bio::Chado::Schema;
use YAML qw/LoadFile/;
use Log::Log4perl qw/:easy/;
use Log::Log4perl::Appender;
use Log::Log4perl::Layout::PatternLayout;
use Path::Class::File;
use Cwd;
use Time::Piece;
use lib '../../lib';
use MOD::SGD;
my ( $dsn, $user, $pass, $mdsn, $muser, $mpass, $verbose, $config, $logging );
my ( $luser, $lpass, $ldsn );
my $count = 0;
my $t = localtime;
GetOptions(
'h|help' => sub { pod2usage(1); },
'dsn=s' => \$dsn,
'u|user=s' => \$user,
'p|pass|password=s' => \$pass,
'mdsn=s' => \$mdsn,
'ldsn=s' => \$ldsn,
'luser=s' => \$luser,
'lpass=s' => \$lpass,
'mu|muser=s' => \$muser,
'mp|mpass|mpassword=s' => \$mpass,
'verbose' => \$verbose,
'c|config:s' => \$config,
);
$mdsn = $dsn if !$mdsn;
$ldsn = $mdsn if !$ldsn;
$luser = $muser if !$luser;
$lpass = $mpass if !$lpass;
if ($config) {
my $str = LoadFile($config);
my $db = $str->{database};
if ($db) {
$dsn = $db->{dsn} || $dsn;
$user = $db->{user} || $user;
$pass = $db->{password} || $pass;
}
my $legacy = $str->{legacy};
if ($legacy) {
$ldsn = $legacy->{dsn} || $ldsn;
$luser = $legacy->{user} || $luser;
$lpass = $legacy->{password} || $lpass;
}
}
pod2usage "dsn not given" if !$dsn;
my $file = $ARGV[0] || 'geneid2product.txt';
my $output = IO::File->new( $file, 'w' ) or die "cannot open file:$!";
my $logger = setup_logger() if $verbose;
#database connection
my $schema
= Bio::Chado::Schema->connect( $dsn, $user, $pass, { LongTruncOk => 1 } );
my $sgd_schema = MOD::SGD->connect( $ldsn, $luser, $lpass );
my $source = $schema->source('Sequence::Feature');
$source->remove_column('is_obsolete');
$source->add_column(
is_deleted => {
data_type => 'boolean',
default_value => 'false',
is_nullable => 0,
size => 1
}
);
my $gene_rs = $schema->resultset('Sequence::Feature')->search(
{ 'type.name' => 'gene',
'me.is_deleted' => 0,
'organism.common_name' => 'dicty'
},
{ join => [qw/organism type/],
select => [qw/feature_id uniquename/],
prefetch => 'dbxref'
}
);
GENE:
while ( my $row = $gene_rs->next ) {
my $gene_id = $row->dbxref->accession;
my $gp_row = $sgd_schema->resultset('LocusGp')->search(
{ locus_no => $row->feature_id, },
{ rows => 1,
prefetch => 'locus_gene_product'
}
)->single;
if ( !$gp_row ) {
$logger->warn("no gene product name for $gene_id")
if $verbose;
next GENE;
}
my $gene_product = $gp_row->locus_gene_product->gene_product;
if ( $gene_product eq 'unknown' or $gene_product eq 'pseudogene') {
$logger->warn("skipping gene product $gene_product") if $verbose;
next GENE;
}
$output->print("$gene_id\t$gene_product\n");
}
$logger->info("Done writing the map file") if $verbose;
$output->close;
sub setup_file_logger {
my $file = shift;
my $appender = Log::Log4perl::Appender->new(
'Log::Log4perl::Appender::File',
filename => $file,
mode => 'clobber'
);
my $layout = Log::Log4perl::Layout::PatternLayout->new(
"[%d{MM-dd-yyyy hh:mm}] %p > %F{1}:%L - %m%n");
my $log = Log::Log4perl->get_logger('file');
$appender->layout($layout);
$log->add_appender($appender);
$log->level($DEBUG);
$log;
}
sub setup_logger {
my $appender
= Log::Log4perl::Appender->new(
'Log::Log4perl::Appender::ScreenColoredLevels',
stderr => 1 );
my $layout = Log::Log4perl::Layout::PatternLayout->new(
"[%d{MM-dd-yyyy hh:mm}] %p > %F{1}:%L - %m%n");
my $log = Log::Log4perl->get_logger('screen');
$appender->layout($layout);
$log->add_appender($appender);
$log->level($DEBUG);
$log;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment