Skip to content

Instantly share code, notes, and snippets.

@Ovid
Last active October 2, 2023 11:05
Show Gist options
  • Save Ovid/a2e828965ce3586a83bfedd7e86a8d20 to your computer and use it in GitHub Desktop.
Save Ovid/a2e828965ce3586a83bfedd7e86a8d20 to your computer and use it in GitHub Desktop.
Molecular Assembly Number In Pure Perl
#!/usr/bin/env perl
use v5.14.0;
use warnings;
use JSON::PP qw(decode_json);
use Data::Dumper;
use Getopt::Long;
GetOptions(
'perl' => \my $perl,
'verbose' => \my $verbose,
) or die "Bad options";
my $molecule = join ' ', @ARGV or die "Usage: $0 molecule";
my $inchi = get_inchi( $molecule, $verbose );
my $ma_data = get_ma_data( $inchi, $verbose );
if ($perl) {
local $Data::Dumper::Indent = 1;
local $Data::Dumper::Sortkeys = 1;
local $Data::Dumper::Terse = 1;
print Dumper( decode_json($ma_data) );
}
else {
print $ma_data;
}
sub uri_encode {
my $molecule = shift;
my %escapes = map { chr($_) => sprintf( "%%%02X", $_ ) } 0 .. 255;
return join '', @escapes{ split //, $molecule };
}
sub get_inchi {
my ( $molecule, $verbose ) = @_;
my $encoded_molecule = uri_encode($molecule);
my $url = "https://cactus.nci.nih.gov/chemical/structure/$encoded_molecule/stdinchi";
my $command = qq{curl -s $url};
if ($verbose) {
warn $command;
}
chomp( my $inchi = `$command` );
if ( !$inchi ) {
warn "Could not determine InChI for $molecule";
exit 1;
}
elsif ( $inchi !~ /\AInChI=/ ) {
warn "Could not determine InChI for '$molecule'";
exit 2;
}
if ($verbose) {
warn "InChI: $inchi";
}
return $inchi;
}
sub get_ma_data {
my ( $inchi, $verbose ) = @_;
my $command
= qq{curl -s -G https://croninburgh.chem.gla.ac.uk/batch_lookup --data-urlencode i0="$inchi" --data-urlencode n=1};
if ($verbose) {
warn $command;
}
my $response = `$command`;
if ( !$response ) {
warn "Could not determine molecular assembly index for $molecule";
exit 2;
}
return $response;
}
__END__
=head1 NAME
ma.pl - Given a molecule name, print the molecular assembly number
=head1 USAGE
$ perl ma.pl tryptophan
[{"MA":11,"inchi":"InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)/t9-/m0/s1","method":"exact"}]
$ perl ma.pl iso-propyl cyanide
[{"MA":3,"inchi":"InChI=1S/C4H7N/c1-4(2)3-5/h4H,1-2H3","method":"exact"}]
Given a molecule name, this script attempts to print out the molecular assembly data. (The C<MA>
number is the assembly index.
This data is currently being used, amongst other things, for searching for extraterrestrial
life.
http://molecular-assembly.com/
=head1 OPTIONS
=over 4
=item B<--perl,-p>
Print the data as Perl code.
=item B<--verbose,-v>
Print out the commands being run, along with InChI code found (if any)
=back
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment