Skip to content

Instantly share code, notes, and snippets.

@j1n3l0
Created May 20, 2010 16:01
Show Gist options
  • Save j1n3l0/407735 to your computer and use it in GitHub Desktop.
Save j1n3l0/407735 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
#
# Helper functions for interacting with the IKMC Targeting Repository
#
# Authors:: Darren Oakley (mailto:do2@sanger.ac.uk)
# Nelo Onyiah (mailto:io1@sanger.ac.uk)
#
use strict;
use warnings FATAL => 'all';
use JSON;
use REST::Client;
#
# Create a data object of the alleles and products we need to load
# - in your script this should come from your database.
#
# For NorCOMM products, use pipeline_id = 3.
#
my $DOMAIN = 'localhost:3000';
my $USER = 'user';
my $PASS = 'pass';
my $alleles_and_products = [
{
pipeline_id => 3,
mgi_accession_id => "MGI:123456",
project_design_id => 2,
cassette => "L1L2_gt2",
backbone => "L3L4_pZero_kan",
assembly => "NCBIM37",
chromosome => "1",
strand => "+",
design_type => "Knock Out",
design_subtype => "Frameshift",
homology_arm_start => 10,
homology_arm_end => 10000,
cassette_start => 50,
cassette_end => 500,
loxp_start => 1000,
loxp_end => 1500,
targeting_vectors => [
{
name => 'PRPGD001',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ name => 'EPD00001', allele_symbol_superscript => 'tm1a' },
{ name => 'EPD00002', allele_symbol_superscript => 'tm1a' },
{ name => 'EPD00003', allele_symbol_superscript => 'tm1a' },
],
},
{
name => 'PRPGD002',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ name => 'EPD00004', allele_symbol_superscript => 'tm1a' },
{ name => 'EPD00005', allele_symbol_superscript => 'tm1a' },
{ name => 'EPD00006', allele_symbol_superscript => 'tm1a' },
],
}
],
},
{
pipeline_id => 3,
mgi_accession_id => "MGI:123456",
project_design_id => 2,
cassette => "L1L2_gt2",
backbone => "L3L4_pZero_kan",
assembly => "NCBIM37",
chromosome => "1",
strand => "+",
design_type => "Knock Out",
design_subtype => "Frameshift",
homology_arm_start => 10,
homology_arm_end => 10000,
cassette_start => 50,
cassette_end => 500,
loxp_start => undef,
loxp_end => undef,
targeting_vectors => [
{
name => 'PRPGD001',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ name => 'EPD00007', allele_symbol_superscript => 'tm1a' },
{ name => 'EPD00008', allele_symbol_superscript => 'tm1a' },
],
},
],
}
];
#
# Now iterate over the alleles/products and load them
#
process_alleles_and_products( $alleles_and_products );
exit 0;
#
# Wrapper function to handle the whole data loading process
#
sub process_alleles_and_products {
my ($alleles_and_products) = @_;
foreach my $allele_data ( @{$alleles_and_products} ) {
# extract and remove the genbank file and product data
my $genbank_file_data = $allele_data->{genbank_file};
my $targeting_vectors_data = $allele_data->{targeting_vectors};
delete $allele_data->{genbank_file} if $allele_data->{genbank_file};
delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors};
# Find,Update,Create the allele/molecular_structure
my $allele = find_create_update_allele($allele_data);
if ( $allele_data->{genbank_file} ) {
# Find,Update,Create the genbank_files
$genbank_file_data->{molecular_structure_id} = $allele->{id};
my $genbank_file = find_create_update_genbank($genbank_file_data);
}
foreach my $vector_data ( @{$targeting_vectors_data} ) {
# extract and remove the es cell data
my $es_cell_clones_data = $vector_data->{es_cells};
delete $vector_data->{es_cells} if $vector_data->{es_cells};
# Find,Update,Create the vector
my $vector =
find_create_update_vector( $vector_data, $allele->{id} );
foreach my $clone_data ( @{$es_cell_clones_data} ) {
# Find,Update,Create the clone
$clone_data->{molecular_structure_id} = $allele->{id};
$clone_data->{targeting_vector_id} = $vector->{id};
my $clone = find_create_update_clone($clone_data);
}
}
sleep(1);
}
}
#
# Generic helper functions
#
sub compare {
my ( $original, $new ) = @_;
# Checks every key in $original to see if it's the same in $new.
# Returns 1 if the same, 0 if not.
foreach my $field ( keys %{$original} ) {
if ( defined $original->{$field} ) {
return 0 unless defined $new->{$field};
return 0 unless $original->{$field} eq $new->{$field};
}
else {
return 0 if defined $new->{$field};
}
}
return 1;
}
sub request {
my ( $method, $url, $data ) = @_;
die "Method $method unknown when requesting URL $url"
unless $method =~ m/DELETE|GET|POST|PUT/;
my @args = $data if $data;
my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } );
# Set the Content-Type and call the method with @args
$client->addHeader( content_type => "application/json" );
$client->$method( $url, @args );
# A small update message
warn join( " ", $method, $url, '-', $client->responseCode ), "\n";
# Handle failures here -- only code 200 | 201 are OK
die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent
unless $client->responseCode =~ m/20[01]/;
return $client->responseContent;
}
sub find {
my ( $search_url, $error_string ) = @_;
my $response = from_json( request( "GET", $search_url ) );
if ( scalar( @{$response} ) > 1 ) { die $error_string . "\n"; }
elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; }
else { return undef; }
}
sub create {
my ( $data, $type, $controller ) = @_;
my $json = to_json( { $type => $data } );
my $return = from_json( request( "POST", "/$controller.json", $json ) );
return $return;
}
sub update {
my ( $data, $type, $controller ) = @_;
my $json = to_json( { $type => $data } );
my $return =
from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) );
return $return;
}
#
# Allele / Molecular Structure Methods
#
sub find_allele {
my ($allele_data) = @_;
my $loxp_start =
$allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null';
my $loxp_end = $allele_data->{loxp_end} ? $allele_data->{loxp_end} : 'null';
my $search_url =
"/alleles.json"
. "?mgi_accession_id="
. $allele_data->{mgi_accession_id}
. "&project_design_id="
. $allele_data->{project_design_id}
. "&loxp_start="
. $loxp_start
. "&loxp_end="
. $loxp_end;
my $error_string =
"Error: found more than one allele for "
. $allele_data->{mgi_accession_id}
. ", design_id: "
. $allele_data->{project_design_id}
. ", loxp: "
. $loxp_start . "-"
. $loxp_end;
return find( $search_url, $error_string );
}
sub find_create_update_allele {
my ($allele_data) = @_;
my $allele = find_allele($allele_data);
if ( defined $allele ) {
# We already have an allele entry, see if an update is required
my $update_required = compare( $allele_data, $allele );
unless ($update_required) {
$allele_data->{id} = $allele->{id};
$allele = update( $allele_data, 'molecular_structure', 'alleles' );
}
}
else {
# We need to create our allele entry
$allele = create( $allele_data, 'molecular_structure', 'alleles' );
}
return $allele;
}
#
# Genbank File Methods
#
sub find_genbank {
my ($genbank_data) = @_;
my $search_url =
"/genbank_files.json"
. "?molecular_structure_id="
. $genbank_data->{molecular_structure_id};
my $error_string = "Error: found more than one genbank_file for "
. $genbank_data->{molecular_structure_id};
return find( $search_url, $error_string );
}
sub find_create_update_genbank {
my ($genbank_data) = @_;
my $genbank = find_genbank($genbank_data);
if ( defined $genbank ) {
# We already have an genbank entry, see if an update is required
my $update_required = compare( $genbank_data, $genbank );
unless ($update_required) {
$genbank_data->{id} = $genbank->{id};
$genbank = update( $genbank_data, 'genbank_file', 'genbank_files' );
}
}
else {
# We need to create our genbank entry
$genbank = create( $genbank_data, 'genbank_file', 'genbank_files' );
}
return $genbank;
}
#
# Targeting Vector Methods
#
sub find_vector {
my ($vector_data) = @_;
my $search_url =
"/targeting_vectors.json" . "?name=" . $vector_data->{name};
my $error_string =
"Error: found more than one vector called " . $vector_data->{name};
return find( $search_url, $error_string );
}
sub find_create_update_vector {
my ( $vector_data, $molecular_structure_id ) = @_;
my $vector = find_vector($vector_data);
if ( defined $vector ) {
# We already have an vector entry, see if an update is required
my $update_required = compare( $vector_data, $vector );
unless ($update_required) {
$vector_data->{id} = $vector->{id};
$vector_data->{molecular_structure_id} =
$vector->{molecular_structure_id};
$vector =
update( $vector_data, 'targeting_vector', 'targeting_vectors' );
}
}
else {
# We need to create our vector entry
$vector_data->{molecular_structure_id} = $molecular_structure_id;
$vector =
create( $vector_data, 'targeting_vector', 'targeting_vectors' );
}
return $vector;
}
#
# ES Cell Clone Methods
#
sub find_clone {
my ($clone_data) = @_;
my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name};
my $error_string =
"Error: found more than one es cell called " . $clone_data->{name};
return find( $search_url, $error_string );
}
sub find_create_update_clone {
my ($clone_data) = @_;
my $clone = find_clone($clone_data);
if ( defined $clone ) {
# We already have an clone entry, see if an update is required
my $update_required = compare( $clone_data, $clone );
unless ($update_required) {
$clone_data->{id} = $clone->{id};
$clone = update( $clone_data, 'es_cell', 'es_cells' );
}
}
else {
# We need to create our clone entry
$clone = create( $clone_data, 'es_cell', 'es_cells' );
}
return $clone;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment