Created
May 20, 2010 16:01
-
-
Save j1n3l0/407735 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# Helper functions for interacting with the IKMC Targeting Repository | |
# | |
# Authors:: Darren Oakley (mailto:do2@sanger.ac.uk) | |
# Nelo Onyiah (mailto:io1@sanger.ac.uk) | |
# | |
use strict; | |
use warnings FATAL => 'all'; | |
use JSON; | |
use REST::Client; | |
# | |
# Create a data object of the alleles and products we need to load | |
# - in your script this should come from your database. | |
# | |
# For NorCOMM products, use pipeline_id = 3. | |
# | |
my $DOMAIN = 'localhost:3000'; | |
my $USER = 'user'; | |
my $PASS = 'pass'; | |
my $alleles_and_products = [ | |
{ | |
pipeline_id => 3, | |
mgi_accession_id => "MGI:123456", | |
project_design_id => 2, | |
cassette => "L1L2_gt2", | |
backbone => "L3L4_pZero_kan", | |
assembly => "NCBIM37", | |
chromosome => "1", | |
strand => "+", | |
design_type => "Knock Out", | |
design_subtype => "Frameshift", | |
homology_arm_start => 10, | |
homology_arm_end => 10000, | |
cassette_start => 50, | |
cassette_end => 500, | |
loxp_start => 1000, | |
loxp_end => 1500, | |
targeting_vectors => [ | |
{ | |
name => 'PRPGD001', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ name => 'EPD00001', allele_symbol_superscript => 'tm1a' }, | |
{ name => 'EPD00002', allele_symbol_superscript => 'tm1a' }, | |
{ name => 'EPD00003', allele_symbol_superscript => 'tm1a' }, | |
], | |
}, | |
{ | |
name => 'PRPGD002', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ name => 'EPD00004', allele_symbol_superscript => 'tm1a' }, | |
{ name => 'EPD00005', allele_symbol_superscript => 'tm1a' }, | |
{ name => 'EPD00006', allele_symbol_superscript => 'tm1a' }, | |
], | |
} | |
], | |
}, | |
{ | |
pipeline_id => 3, | |
mgi_accession_id => "MGI:123456", | |
project_design_id => 2, | |
cassette => "L1L2_gt2", | |
backbone => "L3L4_pZero_kan", | |
assembly => "NCBIM37", | |
chromosome => "1", | |
strand => "+", | |
design_type => "Knock Out", | |
design_subtype => "Frameshift", | |
homology_arm_start => 10, | |
homology_arm_end => 10000, | |
cassette_start => 50, | |
cassette_end => 500, | |
loxp_start => undef, | |
loxp_end => undef, | |
targeting_vectors => [ | |
{ | |
name => 'PRPGD001', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ name => 'EPD00007', allele_symbol_superscript => 'tm1a' }, | |
{ name => 'EPD00008', allele_symbol_superscript => 'tm1a' }, | |
], | |
}, | |
], | |
} | |
]; | |
# | |
# Now iterate over the alleles/products and load them | |
# | |
process_alleles_and_products( $alleles_and_products ); | |
exit 0; | |
# | |
# Wrapper function to handle the whole data loading process | |
# | |
sub process_alleles_and_products { | |
my ($alleles_and_products) = @_; | |
foreach my $allele_data ( @{$alleles_and_products} ) { | |
# extract and remove the genbank file and product data | |
my $genbank_file_data = $allele_data->{genbank_file}; | |
my $targeting_vectors_data = $allele_data->{targeting_vectors}; | |
delete $allele_data->{genbank_file} if $allele_data->{genbank_file}; | |
delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors}; | |
# Find,Update,Create the allele/molecular_structure | |
my $allele = find_create_update_allele($allele_data); | |
if ( $allele_data->{genbank_file} ) { | |
# Find,Update,Create the genbank_files | |
$genbank_file_data->{molecular_structure_id} = $allele->{id}; | |
my $genbank_file = find_create_update_genbank($genbank_file_data); | |
} | |
foreach my $vector_data ( @{$targeting_vectors_data} ) { | |
# extract and remove the es cell data | |
my $es_cell_clones_data = $vector_data->{es_cells}; | |
delete $vector_data->{es_cells} if $vector_data->{es_cells}; | |
# Find,Update,Create the vector | |
my $vector = | |
find_create_update_vector( $vector_data, $allele->{id} ); | |
foreach my $clone_data ( @{$es_cell_clones_data} ) { | |
# Find,Update,Create the clone | |
$clone_data->{molecular_structure_id} = $allele->{id}; | |
$clone_data->{targeting_vector_id} = $vector->{id}; | |
my $clone = find_create_update_clone($clone_data); | |
} | |
} | |
sleep(1); | |
} | |
} | |
# | |
# Generic helper functions | |
# | |
sub compare { | |
my ( $original, $new ) = @_; | |
# Checks every key in $original to see if it's the same in $new. | |
# Returns 1 if the same, 0 if not. | |
foreach my $field ( keys %{$original} ) { | |
if ( defined $original->{$field} ) { | |
return 0 unless defined $new->{$field}; | |
return 0 unless $original->{$field} eq $new->{$field}; | |
} | |
else { | |
return 0 if defined $new->{$field}; | |
} | |
} | |
return 1; | |
} | |
sub request { | |
my ( $method, $url, $data ) = @_; | |
die "Method $method unknown when requesting URL $url" | |
unless $method =~ m/DELETE|GET|POST|PUT/; | |
my @args = $data if $data; | |
my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } ); | |
# Set the Content-Type and call the method with @args | |
$client->addHeader( content_type => "application/json" ); | |
$client->$method( $url, @args ); | |
# A small update message | |
warn join( " ", $method, $url, '-', $client->responseCode ), "\n"; | |
# Handle failures here -- only code 200 | 201 are OK | |
die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent | |
unless $client->responseCode =~ m/20[01]/; | |
return $client->responseContent; | |
} | |
sub find { | |
my ( $search_url, $error_string ) = @_; | |
my $response = from_json( request( "GET", $search_url ) ); | |
if ( scalar( @{$response} ) > 1 ) { die $error_string . "\n"; } | |
elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; } | |
else { return undef; } | |
} | |
sub create { | |
my ( $data, $type, $controller ) = @_; | |
my $json = to_json( { $type => $data } ); | |
my $return = from_json( request( "POST", "/$controller.json", $json ) ); | |
return $return; | |
} | |
sub update { | |
my ( $data, $type, $controller ) = @_; | |
my $json = to_json( { $type => $data } ); | |
my $return = | |
from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) ); | |
return $return; | |
} | |
# | |
# Allele / Molecular Structure Methods | |
# | |
sub find_allele { | |
my ($allele_data) = @_; | |
my $loxp_start = | |
$allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null'; | |
my $loxp_end = $allele_data->{loxp_end} ? $allele_data->{loxp_end} : 'null'; | |
my $search_url = | |
"/alleles.json" | |
. "?mgi_accession_id=" | |
. $allele_data->{mgi_accession_id} | |
. "&project_design_id=" | |
. $allele_data->{project_design_id} | |
. "&loxp_start=" | |
. $loxp_start | |
. "&loxp_end=" | |
. $loxp_end; | |
my $error_string = | |
"Error: found more than one allele for " | |
. $allele_data->{mgi_accession_id} | |
. ", design_id: " | |
. $allele_data->{project_design_id} | |
. ", loxp: " | |
. $loxp_start . "-" | |
. $loxp_end; | |
return find( $search_url, $error_string ); | |
} | |
sub find_create_update_allele { | |
my ($allele_data) = @_; | |
my $allele = find_allele($allele_data); | |
if ( defined $allele ) { | |
# We already have an allele entry, see if an update is required | |
my $update_required = compare( $allele_data, $allele ); | |
unless ($update_required) { | |
$allele_data->{id} = $allele->{id}; | |
$allele = update( $allele_data, 'molecular_structure', 'alleles' ); | |
} | |
} | |
else { | |
# We need to create our allele entry | |
$allele = create( $allele_data, 'molecular_structure', 'alleles' ); | |
} | |
return $allele; | |
} | |
# | |
# Genbank File Methods | |
# | |
sub find_genbank { | |
my ($genbank_data) = @_; | |
my $search_url = | |
"/genbank_files.json" | |
. "?molecular_structure_id=" | |
. $genbank_data->{molecular_structure_id}; | |
my $error_string = "Error: found more than one genbank_file for " | |
. $genbank_data->{molecular_structure_id}; | |
return find( $search_url, $error_string ); | |
} | |
sub find_create_update_genbank { | |
my ($genbank_data) = @_; | |
my $genbank = find_genbank($genbank_data); | |
if ( defined $genbank ) { | |
# We already have an genbank entry, see if an update is required | |
my $update_required = compare( $genbank_data, $genbank ); | |
unless ($update_required) { | |
$genbank_data->{id} = $genbank->{id}; | |
$genbank = update( $genbank_data, 'genbank_file', 'genbank_files' ); | |
} | |
} | |
else { | |
# We need to create our genbank entry | |
$genbank = create( $genbank_data, 'genbank_file', 'genbank_files' ); | |
} | |
return $genbank; | |
} | |
# | |
# Targeting Vector Methods | |
# | |
sub find_vector { | |
my ($vector_data) = @_; | |
my $search_url = | |
"/targeting_vectors.json" . "?name=" . $vector_data->{name}; | |
my $error_string = | |
"Error: found more than one vector called " . $vector_data->{name}; | |
return find( $search_url, $error_string ); | |
} | |
sub find_create_update_vector { | |
my ( $vector_data, $molecular_structure_id ) = @_; | |
my $vector = find_vector($vector_data); | |
if ( defined $vector ) { | |
# We already have an vector entry, see if an update is required | |
my $update_required = compare( $vector_data, $vector ); | |
unless ($update_required) { | |
$vector_data->{id} = $vector->{id}; | |
$vector_data->{molecular_structure_id} = | |
$vector->{molecular_structure_id}; | |
$vector = | |
update( $vector_data, 'targeting_vector', 'targeting_vectors' ); | |
} | |
} | |
else { | |
# We need to create our vector entry | |
$vector_data->{molecular_structure_id} = $molecular_structure_id; | |
$vector = | |
create( $vector_data, 'targeting_vector', 'targeting_vectors' ); | |
} | |
return $vector; | |
} | |
# | |
# ES Cell Clone Methods | |
# | |
sub find_clone { | |
my ($clone_data) = @_; | |
my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name}; | |
my $error_string = | |
"Error: found more than one es cell called " . $clone_data->{name}; | |
return find( $search_url, $error_string ); | |
} | |
sub find_create_update_clone { | |
my ($clone_data) = @_; | |
my $clone = find_clone($clone_data); | |
if ( defined $clone ) { | |
# We already have an clone entry, see if an update is required | |
my $update_required = compare( $clone_data, $clone ); | |
unless ($update_required) { | |
$clone_data->{id} = $clone->{id}; | |
$clone = update( $clone_data, 'es_cell', 'es_cells' ); | |
} | |
} | |
else { | |
# We need to create our clone entry | |
$clone = create( $clone_data, 'es_cell', 'es_cells' ); | |
} | |
return $clone; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment