terrycojones/gist:94b8c1e0401a51c9f652

## gistfile1.pl
package AC::Name;

use AC::Location;

my $thisYearFull = `date +%Y`;
# TODO: fix this on Jan 1st, 2100.
my $thisYear = $thisYearFull - 2000;

my $DEFAULT_SUBTYPE = 'H3N2';

# The values in this hash will be subject to canonicalization, so don't try
# to do that here, just get them into a form we can uniformly deal with in
# one place (i.e., below, not here).
my %globalRewrites = (
    'AX147 (A/WYOMING/3/2003)'                    => 'A/WYOMING/3/2003 AX147',
    'X-147 (A/WYOMING/3/2003)'                    => 'A/WYOMING/3/2003 AX147',
    'X147 A/WYOMONG/3/2003'                       => 'A/WYOMING/3/2003 AX147',
    'X147 A/WYOMING/3/2003'                       => 'A/WYOMING/3/2003 AX147',
    'IVR134 (A/WYOM)'                             => 'A/WYOMING/3/2003 IVR-134',
    'IVR 134 (A/WYOM)'                            => 'A/WYOMING/3/2003 IVR-134',
    'IVR-134 (A/WYOM)'                            => 'A/WYOMING/3/2003 IVR-134',
    'IVR 135 ( A/KUM/102/2002)'                   => 'A/KUMAMOTO/102/2002 IVR-135',
    'IVR135 (A/KUMA)'                             => 'A/KUMAMOTO/102/2002 IVR-135',
    'IVR 135 (A/KUMA)'                            => 'A/KUMAMOTO/102/2002 IVR-135',
    'IVR-135 (A/KUMA)'                            => 'A/KUMAMOTO/102/2002 IVR-135',
    "IVR-136  (A/C'CHURCH/28)"                    => 'A/CHRISTCHURCH/28/2003 IVR-136',
    "IVR-137 A/C'CHUURCH/28/2003"                 => 'A/CHRISTCHURCH/28/2003 IVR-137',
    "IVR-137(A/C'CHURCH/28)"                      => 'A/CHRISTCHURCH/28/2003 IVR-137',
    'IVR-137 (A/CHRISTCHURCH/28/2003)'            => 'A/CHRISTCHURCH/28/2003 IVR-137',
    'IVR-137 A/CHRISTCHURCH/28/2003'              => 'A/CHRISTCHURCH/28/2003 IVR-137',
    'IVR138 (A/WELLINGTON/1/2004)'                => 'A/WELLINGTON/1/2004 IVR-138',
    'IVR-138 (A/WELLINGTON/1/2004)'               => 'A/WELLINGTON/1/2004 IVR-138',
    'IVR 138 (A/WELLINGTON/1/2004)'               => 'A/WELLINGTON/1/2004 IVR-138',
    'IVR139 (A/WELLINGTON/1/2004)'                => 'A/WELLINGTON/1/2004 IVR-139',
    'IVR 139 (A/WELLINGTON/1/2004)'               => 'A/WELLINGTON/1/2004 IVR-139',
    'IVR-139 (A/WELLINGTON/1/2004)'               => 'A/WELLINGTON/1/2004 IVR-139',
    'A/PERTH/263/003'                             => 'A/PERTH/263/2003',
    'A/ANHUI/397/20032'                           => 'A/ANHUI/397/2003',
    'A/ANHUI/377/20032'                           => 'A/ANHUI/377/2003',
    'VN-A/DUCK/VIETNAM/NIVR-1/2003'               => 'A/DUCK/VIETNAM/NIVR-1/2003',
    '**A/CHRISTCHURCH/413/2004'                   => 'A/CHRISTCHURCH/413/2004',
    '**A/MALAYSIA/2400/2004'                      => 'A/MALAYSIA/2400/2004',
    'ALYON/1313/06'                               => 'A/LYON/1313/2006',
    'A/HONG KONGK/25572004'                       => 'A/HONG KONG/2557/2004',
    'A/HONG/KONG/1550/2002'                       => 'A/HONG-KONG/1550/2002',
    'A/BRISBANE/5/2002*'                          => 'A/BRISBANE/05/2002',
    'A/CAILIFORNIA/7/2004'                        => 'A/CALIFORNIA/7/2004',
    'A/ULAN/UDE/01/2001'                          => 'A/ULAN UDE/01/2001',
    'A.BRISBANE/332/2003'                         => 'A/BRISBANE/332/2003',
    'A/PHILIPPINES/472//2002'                     => 'A/PHILIPPINES/472/2002',
    'A/HONG KONG/136//03'                         => 'A/HONG-KONG/136/2003',
    "A/ISRAEL/3'/03"                              => 'A/ISRAEL/3/2003',
    'A/LYON/CHU/52.58/06'                         => 'A/LYON-CHU/52.58/06',
    'A/LYON/CHU/52.339/06'                        => 'A/LYON-CHU/52.339/06',
    );

sub canonicalize_str($){
    # Get rid of chars that are not allowed to appear as part of a strain/serum name.
    # Anything we don't like becomes a '-'.
    my ($s) = @_;

    # Note that in the following if I combine some subs into one line, the RHS ----
    # results in an "Ambiguous range in transliteration operator" error from perl.

    $s = uc($s);
    $s =~ s/[^0-9A-Z]/-/g;
    $s =~ s/-+/-/g;
    $s =~ s/^-+//;
    $s =~ s/-+$//;
    $s;
}

sub canonicalize_name($$;$){
    my ($name, $matrix, $default_subtype) = @_;
    $default_subtype = $DEFAULT_SUBTYPE unless defined $default_subtype;
    my ($letter, $species, $location, $middle, $year, $rest, $subtype);
    # print STDERR "Canonicalize '$name' -> ";

    # Look for a global rewrite.
    $name = uc($globalRewrites{$name}) if (exists $globalRewrites{uc($name)} || exists $globalRewrites{$name});

    if ($name =~ /^(.*)\(\s*(H\s*\d+\s*N\s*\d+)\s*\)\s*$/i){
        $name = $1;
        $subtype = $2;
        $name =~ s/ +$//;
        $subtype =~ s/\s//g;
        $subtype = canonicalize_str($subtype);
    }
    else {
        $subtype = $default_subtype;
    }

    if (0){
        # Nothing. This is here for indenting purposes - so all regexes below are equally indented.
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                       ([^/]*)         # Species
                       /
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       \s*$            # No rest
                  :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, '');
        #print STDERR "Part 1\n";
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                       ([^/]*)         # Species
                       /
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       \s+             # Some form of whitespace separating the rest.
                       (.*)            # Rest can be anything.
                  :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, $6);
        #print STDERR "Part 2\n";
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                       ([^/]*)         # Species
                       /
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                                       # No space before rest.
                       ([^\d/].*)      # Rest cannot start with a digit or slash.
                  :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, $6);
        #print STDERR "Part 3\n";
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                                       # No species part.
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       \s*$
                                       # No rest.
                     :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, '');
        #print STDERR "Part 4\n";
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                                       # No species part.
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       \s+             # Some form of whitespace separating the rest.
                       (.*)            # Rest can be anything.
                     :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, $5);
        #print STDERR "Part 5\n";
    }
    elsif ($name =~ m:^([AB])          # Strain type
                       /
                                       # No species part.
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       ([^\d/].*)      # Rest cannot start with a digit or slash.
                     :xi){
        ($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, $5);
        #print STDERR "Part 6\n";
    }
    elsif ($name =~ m:/.*/:){
        # Anything with at least 2 slashes might have been meant to be a name.
        # Leave $letter undefined here.
        $matrix->warning("'$name' looks like it might be a strain/serum name, but it could not be parsed.");
    }

    if (defined $letter){
        #print STDERR "$name = L=$letter, S=$species, L=$location, M=$middle, Y=$year, R=$rest\n";
        ($letter, $species, $location, $middle, $year, $rest) = map {
            canonicalize_str($_) } ($letter, $species, $location, $middle, $year, $rest);


        $species = AC::Species::canonicalizeSpecies($species);
        $location = AC::Location::canonicalizeLocation($location);

        # Don't allow the middle part to start with leading zeroes.
        $middle =~ s/^0+//;
        # Don't allow the middle part to start or end with a hyphen.
        $middle =~ s/-$//;
        $middle =~ s/^-//;

        # The rest part had better not have any slashes.
        $rest =~ y|/|-|;

        # Tack anything that appears at the end (after the date) onto the middle part.
        if ($rest ne '-' && $rest ne ''){
            if (substr($rest, 0, 1) ne '-'){
                $middle .= '-';
            }
            $rest =~ s/-$//;
            $middle .= $rest;
        }

        $year = canonicalize_year($year, $matrix);

        $name = "${letter}_$subtype/$species/$location/$middle/$year";
    }
    else {
        #print STDERR "NON-CANON: '$name' -> ";
        my $orig = $name;
        $name = canonicalize_str($name);
        #print STDERR "'$name'.\n";
        $matrix->warning("Non-canonical name '$orig' -> '$name'.");
    }

    # print STDERR "canonicalized to '$name'.\n";

    return $name;
}

sub canonicalize_year($;$)
{
    my ($year, $matrix) = @_;
    # make 4-digit year
    if (length($year) == 2) {
        if ($year > $thisYear) {
            # This must be in the 20th century. Otherwise it would have to be in the future.
            $year = "19$year";
        }
        else {
            $year = "20$year";
        }
    }
    elsif (length($year) == 4 && $year =~ /^(19|20)\d\d$/) {
        if (int($year) > $thisYearFull || int($year) < 1968) {
            if (defined $matrix) {
                print STDERR "Found an implausible year: $year.\n";
                $matrix->warning("Found an implausible year: $year.");
            }
        }
    }
    else {
        if (defined $matrix) {
            print STDERR "Found an implausible or wrong-length year: $year.\n";
            $matrix->warning("Found an implausible or wrong-length year ($year).");
        }
    }
    return $year;
}

sub canonicalize_location_and_year_in_name($;$)
{
    my ($name, $matrix) = @_;
    my ($letter, $species, $location, $middle, $year, $rest, $subtype);

    if (0){
        # Nothing. This is here for indenting purposes - so all regexes below are equally indented.
    }
    elsif ($name =~ m:^([AB])          # Strain type
                                       # No Species
                       /
                       ([^/]*)         # Location
                       /
                       ([^/]*)         # Middle part
                       /
                       (\d\d|\d\d\d\d) # Year
                       (.*)$            # No rest
                  :xi){
        ($letter, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5);
    }

    if (defined $letter) {
        if (length($year) == 2) {
            if (int($year) > 68) {
                $year = "19$year";
            }
            elsif (int($year) <= $thisYear) {
                $year = "20$year";
            }
        }
        $location = AC::Location::canonicalizeLocation($location);
        $letter = uc($letter);
        $middle = uc($middle);
        $rest = uc($rest);
        if (defined $species) {
            $name = "$letter/$species/$location/$middle/$year$rest";
        }
        else {
            $name = "$letter/$location/$middle/$year$rest";
        }
    }
    return $name;
}

1;
	package AC::Name;

	use AC::Location;

	my $thisYearFull = `date +%Y`;
	# TODO: fix this on Jan 1st, 2100.
	my $thisYear = $thisYearFull - 2000;

	my $DEFAULT_SUBTYPE = 'H3N2';

	# The values in this hash will be subject to canonicalization, so don't try
	# to do that here, just get them into a form we can uniformly deal with in
	# one place (i.e., below, not here).
	my %globalRewrites = (
	'AX147 (A/WYOMING/3/2003)' => 'A/WYOMING/3/2003 AX147',
	'X-147 (A/WYOMING/3/2003)' => 'A/WYOMING/3/2003 AX147',
	'X147 A/WYOMONG/3/2003' => 'A/WYOMING/3/2003 AX147',
	'X147 A/WYOMING/3/2003' => 'A/WYOMING/3/2003 AX147',
	'IVR134 (A/WYOM)' => 'A/WYOMING/3/2003 IVR-134',
	'IVR 134 (A/WYOM)' => 'A/WYOMING/3/2003 IVR-134',
	'IVR-134 (A/WYOM)' => 'A/WYOMING/3/2003 IVR-134',
	'IVR 135 ( A/KUM/102/2002)' => 'A/KUMAMOTO/102/2002 IVR-135',
	'IVR135 (A/KUMA)' => 'A/KUMAMOTO/102/2002 IVR-135',
	'IVR 135 (A/KUMA)' => 'A/KUMAMOTO/102/2002 IVR-135',
	'IVR-135 (A/KUMA)' => 'A/KUMAMOTO/102/2002 IVR-135',
	"IVR-136 (A/C'CHURCH/28)" => 'A/CHRISTCHURCH/28/2003 IVR-136',
	"IVR-137 A/C'CHUURCH/28/2003" => 'A/CHRISTCHURCH/28/2003 IVR-137',
	"IVR-137(A/C'CHURCH/28)" => 'A/CHRISTCHURCH/28/2003 IVR-137',
	'IVR-137 (A/CHRISTCHURCH/28/2003)' => 'A/CHRISTCHURCH/28/2003 IVR-137',
	'IVR-137 A/CHRISTCHURCH/28/2003' => 'A/CHRISTCHURCH/28/2003 IVR-137',
	'IVR138 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-138',
	'IVR-138 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-138',
	'IVR 138 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-138',
	'IVR139 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-139',
	'IVR 139 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-139',
	'IVR-139 (A/WELLINGTON/1/2004)' => 'A/WELLINGTON/1/2004 IVR-139',
	'A/PERTH/263/003' => 'A/PERTH/263/2003',
	'A/ANHUI/397/20032' => 'A/ANHUI/397/2003',
	'A/ANHUI/377/20032' => 'A/ANHUI/377/2003',
	'VN-A/DUCK/VIETNAM/NIVR-1/2003' => 'A/DUCK/VIETNAM/NIVR-1/2003',
	'**A/CHRISTCHURCH/413/2004' => 'A/CHRISTCHURCH/413/2004',
	'**A/MALAYSIA/2400/2004' => 'A/MALAYSIA/2400/2004',
	'ALYON/1313/06' => 'A/LYON/1313/2006',
	'A/HONG KONGK/25572004' => 'A/HONG KONG/2557/2004',
	'A/HONG/KONG/1550/2002' => 'A/HONG-KONG/1550/2002',
	'A/BRISBANE/5/2002*' => 'A/BRISBANE/05/2002',
	'A/CAILIFORNIA/7/2004' => 'A/CALIFORNIA/7/2004',
	'A/ULAN/UDE/01/2001' => 'A/ULAN UDE/01/2001',
	'A.BRISBANE/332/2003' => 'A/BRISBANE/332/2003',
	'A/PHILIPPINES/472//2002' => 'A/PHILIPPINES/472/2002',
	'A/HONG KONG/136//03' => 'A/HONG-KONG/136/2003',
	"A/ISRAEL/3'/03" => 'A/ISRAEL/3/2003',
	'A/LYON/CHU/52.58/06' => 'A/LYON-CHU/52.58/06',
	'A/LYON/CHU/52.339/06' => 'A/LYON-CHU/52.339/06',
	);

	sub canonicalize_str($){
	# Get rid of chars that are not allowed to appear as part of a strain/serum name.
	# Anything we don't like becomes a '-'.
	my ($s) = @_;

	# Note that in the following if I combine some subs into one line, the RHS ----
	# results in an "Ambiguous range in transliteration operator" error from perl.

	$s = uc($s);
	$s =~ s/[^0-9A-Z]/-/g;
	$s =~ s/-+/-/g;
	$s =~ s/^-+//;
	$s =~ s/-+$//;
	$s;
	}

	sub canonicalize_name($$;$){
	my ($name, $matrix, $default_subtype) = @_;
	$default_subtype = $DEFAULT_SUBTYPE unless defined $default_subtype;
	my ($letter, $species, $location, $middle, $year, $rest, $subtype);
	# print STDERR "Canonicalize '$name' -> ";

	# Look for a global rewrite.
	$name = uc($globalRewrites{$name}) if (exists $globalRewrites{uc($name)} \|\| exists $globalRewrites{$name});

	if ($name =~ /^(.)\(\s(H\s\d+\sN\s\d+)\s\)\s*$/i){
	$name = $1;
	$subtype = $2;
	$name =~ s/ +$//;
	$subtype =~ s/\s//g;
	$subtype = canonicalize_str($subtype);
	}
	else {
	$subtype = $default_subtype;
	}

	if (0){
	# Nothing. This is here for indenting purposes - so all regexes below are equally indented.
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	([^/]*) # Species
	/
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	\s*$ # No rest
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, '');
	#print STDERR "Part 1\n";
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	([^/]*) # Species
	/
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	\s+ # Some form of whitespace separating the rest.
	(.*) # Rest can be anything.
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, $6);
	#print STDERR "Part 2\n";
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	([^/]*) # Species
	/
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	# No space before rest.
	([^\d/].*) # Rest cannot start with a digit or slash.
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5, $6);
	#print STDERR "Part 3\n";
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	# No species part.
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	\s*$
	# No rest.
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, '');
	#print STDERR "Part 4\n";
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	# No species part.
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	\s+ # Some form of whitespace separating the rest.
	(.*) # Rest can be anything.
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, $5);
	#print STDERR "Part 5\n";
	}
	elsif ($name =~ m:^([AB]) # Strain type
	/
	# No species part.
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	([^\d/].*) # Rest cannot start with a digit or slash.
	:xi){
	($letter, $species, $location, $middle, $year, $rest) = ($1, 'HUMAN', $2, $3, $4, $5);
	#print STDERR "Part 6\n";
	}
	elsif ($name =~ m:/.*/:){
	# Anything with at least 2 slashes might have been meant to be a name.
	# Leave $letter undefined here.
	$matrix->warning("'$name' looks like it might be a strain/serum name, but it could not be parsed.");
	}

	if (defined $letter){
	#print STDERR "$name = L=$letter, S=$species, L=$location, M=$middle, Y=$year, R=$rest\n";
	($letter, $species, $location, $middle, $year, $rest) = map {
	canonicalize_str($_) } ($letter, $species, $location, $middle, $year, $rest);


	$species = AC::Species::canonicalizeSpecies($species);
	$location = AC::Location::canonicalizeLocation($location);

	# Don't allow the middle part to start with leading zeroes.
	$middle =~ s/^0+//;
	# Don't allow the middle part to start or end with a hyphen.
	$middle =~ s/-$//;
	$middle =~ s/^-//;

	# The rest part had better not have any slashes.
	$rest =~ y\|/\|-\|;

	# Tack anything that appears at the end (after the date) onto the middle part.
	if ($rest ne '-' && $rest ne ''){
	if (substr($rest, 0, 1) ne '-'){
	$middle .= '-';
	}
	$rest =~ s/-$//;
	$middle .= $rest;
	}

	$year = canonicalize_year($year, $matrix);

	$name = "${letter}_$subtype/$species/$location/$middle/$year";
	}
	else {
	#print STDERR "NON-CANON: '$name' -> ";
	my $orig = $name;
	$name = canonicalize_str($name);
	#print STDERR "'$name'.\n";
	$matrix->warning("Non-canonical name '$orig' -> '$name'.");
	}

	# print STDERR "canonicalized to '$name'.\n";

	return $name;
	}

	sub canonicalize_year($;$)
	{
	my ($year, $matrix) = @_;
	# make 4-digit year
	if (length($year) == 2) {
	if ($year > $thisYear) {
	# This must be in the 20th century. Otherwise it would have to be in the future.
	$year = "19$year";
	}
	else {
	$year = "20$year";
	}
	}
	elsif (length($year) == 4 && $year =~ /^(19\|20)\d\d$/) {
	if (int($year) > $thisYearFull \|\| int($year) < 1968) {
	if (defined $matrix) {
	print STDERR "Found an implausible year: $year.\n";
	$matrix->warning("Found an implausible year: $year.");
	}
	}
	}
	else {
	if (defined $matrix) {
	print STDERR "Found an implausible or wrong-length year: $year.\n";
	$matrix->warning("Found an implausible or wrong-length year ($year).");
	}
	}
	return $year;
	}

	sub canonicalize_location_and_year_in_name($;$)
	{
	my ($name, $matrix) = @_;
	my ($letter, $species, $location, $middle, $year, $rest, $subtype);

	if (0){
	# Nothing. This is here for indenting purposes - so all regexes below are equally indented.
	}
	elsif ($name =~ m:^([AB]) # Strain type
	# No Species
	/
	([^/]*) # Location
	/
	([^/]*) # Middle part
	/
	(\d\d\|\d\d\d\d) # Year
	(.*)$ # No rest
	:xi){
	($letter, $location, $middle, $year, $rest) = ($1, $2, $3, $4, $5);
	}

	if (defined $letter) {
	if (length($year) == 2) {
	if (int($year) > 68) {
	$year = "19$year";
	}
	elsif (int($year) <= $thisYear) {
	$year = "20$year";
	}
	}
	$location = AC::Location::canonicalizeLocation($location);
	$letter = uc($letter);
	$middle = uc($middle);
	$rest = uc($rest);
	if (defined $species) {
	$name = "$letter/$species/$location/$middle/$year$rest";
	}
	else {
	$name = "$letter/$location/$middle/$year$rest";
	}
	}
	return $name;
	}

	1;