Created
March 12, 2012 18:16
-
-
Save cybersiddhu/2023747 to your computer and use it in GitHub Desktop.
generate a tab delimited file mapping dictyBase Gene ID to its gene product name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
use strict; | |
use Pod::Usage; | |
use IO::File; | |
use Getopt::Long; | |
use Bio::Chado::Schema; | |
use YAML qw/LoadFile/; | |
use Log::Log4perl qw/:easy/; | |
use Log::Log4perl::Appender; | |
use Log::Log4perl::Layout::PatternLayout; | |
use Path::Class::File; | |
use Cwd; | |
use Time::Piece; | |
use lib '../../lib'; | |
use MOD::SGD; | |
my ( $dsn, $user, $pass, $mdsn, $muser, $mpass, $verbose, $config, $logging ); | |
my ( $luser, $lpass, $ldsn ); | |
my $count = 0; | |
my $t = localtime; | |
GetOptions( | |
'h|help' => sub { pod2usage(1); }, | |
'dsn=s' => \$dsn, | |
'u|user=s' => \$user, | |
'p|pass|password=s' => \$pass, | |
'mdsn=s' => \$mdsn, | |
'ldsn=s' => \$ldsn, | |
'luser=s' => \$luser, | |
'lpass=s' => \$lpass, | |
'mu|muser=s' => \$muser, | |
'mp|mpass|mpassword=s' => \$mpass, | |
'verbose' => \$verbose, | |
'c|config:s' => \$config, | |
); | |
$mdsn = $dsn if !$mdsn; | |
$ldsn = $mdsn if !$ldsn; | |
$luser = $muser if !$luser; | |
$lpass = $mpass if !$lpass; | |
if ($config) { | |
my $str = LoadFile($config); | |
my $db = $str->{database}; | |
if ($db) { | |
$dsn = $db->{dsn} || $dsn; | |
$user = $db->{user} || $user; | |
$pass = $db->{password} || $pass; | |
} | |
my $legacy = $str->{legacy}; | |
if ($legacy) { | |
$ldsn = $legacy->{dsn} || $ldsn; | |
$luser = $legacy->{user} || $luser; | |
$lpass = $legacy->{password} || $lpass; | |
} | |
} | |
pod2usage "dsn not given" if !$dsn; | |
my $file = $ARGV[0] || 'geneid2product.txt'; | |
my $output = IO::File->new( $file, 'w' ) or die "cannot open file:$!"; | |
my $logger = setup_logger() if $verbose; | |
#database connection | |
my $schema | |
= Bio::Chado::Schema->connect( $dsn, $user, $pass, { LongTruncOk => 1 } ); | |
my $sgd_schema = MOD::SGD->connect( $ldsn, $luser, $lpass ); | |
my $source = $schema->source('Sequence::Feature'); | |
$source->remove_column('is_obsolete'); | |
$source->add_column( | |
is_deleted => { | |
data_type => 'boolean', | |
default_value => 'false', | |
is_nullable => 0, | |
size => 1 | |
} | |
); | |
my $gene_rs = $schema->resultset('Sequence::Feature')->search( | |
{ 'type.name' => 'gene', | |
'me.is_deleted' => 0, | |
'organism.common_name' => 'dicty' | |
}, | |
{ join => [qw/organism type/], | |
select => [qw/feature_id uniquename/], | |
prefetch => 'dbxref' | |
} | |
); | |
GENE: | |
while ( my $row = $gene_rs->next ) { | |
my $gene_id = $row->dbxref->accession; | |
my $gp_row = $sgd_schema->resultset('LocusGp')->search( | |
{ locus_no => $row->feature_id, }, | |
{ rows => 1, | |
prefetch => 'locus_gene_product' | |
} | |
)->single; | |
if ( !$gp_row ) { | |
$logger->warn("no gene product name for $gene_id") | |
if $verbose; | |
next GENE; | |
} | |
my $gene_product = $gp_row->locus_gene_product->gene_product; | |
if ( $gene_product eq 'unknown' or $gene_product eq 'pseudogene') { | |
$logger->warn("skipping gene product $gene_product") if $verbose; | |
next GENE; | |
} | |
$output->print("$gene_id\t$gene_product\n"); | |
} | |
$logger->info("Done writing the map file") if $verbose; | |
$output->close; | |
sub setup_file_logger { | |
my $file = shift; | |
my $appender = Log::Log4perl::Appender->new( | |
'Log::Log4perl::Appender::File', | |
filename => $file, | |
mode => 'clobber' | |
); | |
my $layout = Log::Log4perl::Layout::PatternLayout->new( | |
"[%d{MM-dd-yyyy hh:mm}] %p > %F{1}:%L - %m%n"); | |
my $log = Log::Log4perl->get_logger('file'); | |
$appender->layout($layout); | |
$log->add_appender($appender); | |
$log->level($DEBUG); | |
$log; | |
} | |
sub setup_logger { | |
my $appender | |
= Log::Log4perl::Appender->new( | |
'Log::Log4perl::Appender::ScreenColoredLevels', | |
stderr => 1 ); | |
my $layout = Log::Log4perl::Layout::PatternLayout->new( | |
"[%d{MM-dd-yyyy hh:mm}] %p > %F{1}:%L - %m%n"); | |
my $log = Log::Log4perl->get_logger('screen'); | |
$appender->layout($layout); | |
$log->add_appender($appender); | |
$log->level($DEBUG); | |
$log; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment