Skip to content

Instantly share code, notes, and snippets.

@ypandit
Created October 26, 2012 21:25
Show Gist options
  • Save ypandit/3961651 to your computer and use it in GitHub Desktop.
Save ypandit/3961651 to your computer and use it in GitHub Desktop.
Script to check if GO ID exists in association with Gene ID
database:
dsn: "dbi:Oracle:"
user: ''
passwd: ''
package GAFLoader;
use strict;
use warnings;
use Bio::Chado::Schema;
use IO::String;
use LWP::UserAgent;
use Moose;
use MooseX::Attribute::Dependent;
use YAML::Tiny;
with 'MooseX::Getopt';
has '_ebi_base_url' => (
is => 'ro',
isa => 'Str',
default => 'http://www.ebi.ac.uk/QuickGO/GAnnotation?format=gaf&protein='
);
has '_ua' => (
is => 'ro',
isa => 'LWP::UserAgent',
default => sub { LWP::UserAgent->new },
lazy => 1
);
has 'schema' => (
is => 'rw',
isa => 'Bio::Chado::Schema',
dependency => All [ '_dsn', '_user', '_passwd' ]
);
has 'config_file' => (
is => 'rw',
isa => 'Str',
trigger => sub {
my ($self) = @_;
my $conf = YAML::Tiny->read( $self->config_file );
$self->_dsn( $conf->[0]->{database}->{dsn} );
$self->_user( $conf->[0]->{database}->{user} );
$self->_passwd( $conf->[0]->{database}->{passwd} );
$self->schema(
Bio::Chado::Schema->connect(
$self->_dsn, $self->_user,
$self->_passwd, { LongReadLen => 2**25 }
)
);
},
required => 1
);
has [qw/_dsn _user _passwd/] => (
is => 'rw',
isa => 'Str',
default => "",
lazy => 1,
dependency => All ['config_file']
);
sub parse_go_from_gaf {
my ( $self, $gaf ) = @_;
my @go_new;
my $io = IO::String->new();
$io->open($gaf);
while ( my $line = $io->getline ) {
chomp($line);
next if $line =~ /^!/;
my @row_vals = split( "\t", $line );
print $row_vals[4] . "\n";
push( @go_new, $row_vals[4] );
}
return @go_new;
}
sub get_go_for_gene {
my ( $self, $gene_id ) = @_;
my $go_rs = $self->schema->resultset('Sequence::Feature')->search(
);
}
sub get_gaf_from_ebi {
my ( $self, $gene_id ) = @_;
my $response
= $self->_ua->get( $self->_ebi_base_url . $gene_id )->decoded_content;
return $response;
}
sub compare_go {
}
sub run {
my ($self) = @_;
my $gene_rs = $self->schema->resultset('Sequence::Feature')->search(
{ 'type.name' => 'gene',
'organism.common_name' => 'dicty'
},
{ join => [qw/type organism/],
select => [qw/feature_id uniquename/],
prefetch => 'dbxref'
}
);
while ( my $gene = $gene_rs->next ) {
my $gaf = $self->get_gaf_from_ebi( $gene->dbxref->accession ); # Get GAF from EBI for given Gene ID
my @go_new = $self->parse_go_from_gaf($gaf); # Get GO IDs in an array for a given GAF of a Gene ID
my @go_exist = $self->get_go_for_gene( $gene->dbxref->accession ); # Query using BCS to check what GO IDs are associated with given Gene ID
$self->compare_go( @go_new, @go_exist );
}
}
1;
package main;
GAFLoader->new_with_options->run;
SELECT GENE_ID.accession gene_id, GENE.uniquename gene, 'GO' || ':' || GO_ID.accession go_id
FROM feature_cvterm fcvt
JOIN feature GENE ON GENE.feature_id = fcvt.feature_id
JOIN dbxref GENE_ID ON GENE_ID.dbxref_id = GENE.dbxref_id
JOIN cvterm type ON type.cvterm_id = GENE.type_id
JOIN cvterm GO ON GO.cvterm_id = fcvt.cvterm_id
JOIN cv ON cv.cv_id = GO.cv_id
JOIN dbxref GO_ID ON GO_ID.dbxref_id = GO.dbxref_id
JOIN db ON db.db_id = GO_ID.db_id
JOIN organism ON organism.organism_id = GENE.organism_id
WHERE cv.name IN('molecular_function', 'biological_process', 'cellular_component')
AND organism.common_name = 'dicty'
AND db.name = 'GO'
AND GO.is_obsolete = 0
AND GENE.uniquename = 'mhcA'
/* GENE_ID.accession = '' */
ORDER BY GO_ID.accession, GENE_ID.accession ASC
;
@ypandit
Copy link
Author

ypandit commented Oct 26, 2012

To run the script, fill up the required valies in config.yaml; then

perl GAFLoader.pl --config_file config.yaml

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment