Skip to content

Instantly share code, notes, and snippets.

Last active October 2, 2023 11:05
Show Gist options
  • Save Ovid/a2e828965ce3586a83bfedd7e86a8d20 to your computer and use it in GitHub Desktop.
Save Ovid/a2e828965ce3586a83bfedd7e86a8d20 to your computer and use it in GitHub Desktop.
Molecular Assembly Number In Pure Perl
#!/usr/bin/env perl
use v5.14.0;
use warnings;
use JSON::PP qw(decode_json);
use Data::Dumper;
use Getopt::Long;
'perl' => \my $perl,
'verbose' => \my $verbose,
) or die "Bad options";
my $molecule = join ' ', @ARGV or die "Usage: $0 molecule";
my $inchi = get_inchi( $molecule, $verbose );
my $ma_data = get_ma_data( $inchi, $verbose );
if ($perl) {
local $Data::Dumper::Indent = 1;
local $Data::Dumper::Sortkeys = 1;
local $Data::Dumper::Terse = 1;
print Dumper( decode_json($ma_data) );
else {
print $ma_data;
sub uri_encode {
my $molecule = shift;
my %escapes = map { chr($_) => sprintf( "%%%02X", $_ ) } 0 .. 255;
return join '', @escapes{ split //, $molecule };
sub get_inchi {
my ( $molecule, $verbose ) = @_;
my $encoded_molecule = uri_encode($molecule);
my $url = "$encoded_molecule/stdinchi";
my $command = qq{curl -s $url};
if ($verbose) {
warn $command;
chomp( my $inchi = `$command` );
if ( !$inchi ) {
warn "Could not determine InChI for $molecule";
exit 1;
elsif ( $inchi !~ /\AInChI=/ ) {
warn "Could not determine InChI for '$molecule'";
exit 2;
if ($verbose) {
warn "InChI: $inchi";
return $inchi;
sub get_ma_data {
my ( $inchi, $verbose ) = @_;
my $command
= qq{curl -s -G --data-urlencode i0="$inchi" --data-urlencode n=1};
if ($verbose) {
warn $command;
my $response = `$command`;
if ( !$response ) {
warn "Could not determine molecular assembly index for $molecule";
exit 2;
return $response;
=head1 NAME - Given a molecule name, print the molecular assembly number
=head1 USAGE
$ perl tryptophan
$ perl iso-propyl cyanide
Given a molecule name, this script attempts to print out the molecular assembly data. (The C<MA>
number is the assembly index.
This data is currently being used, amongst other things, for searching for extraterrestrial
=head1 OPTIONS
=over 4
=item B<--perl,-p>
Print the data as Perl code.
=item B<--verbose,-v>
Print out the commands being run, along with InChI code found (if any)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment