dloscutoff/nameGenerator.py

## nameGenerator.py
#!/usr/bin/python3

import re, random

# A regex that matches a syllable, with three groups for the three
# segments of the syllable: onset (initial consonants), nucleus (vowels),
# and coda (final consonants).
# The regex also matches if there is just an onset (even an empty
# onset); this case corresponds to the final partial syllable of the
# stem, which is usually the consonant before a vowel ending (for
# example, the d in "ca-na-d a").
syllableRgx = re.compile(r"(y|[^aeiouy]*)([aeiouy]+|$)([^aeiouy]*)")
nameFile = "names.txt"

# Dictionary that holds the frequency of each syllable count (note that these
# are the syllables *before* the ending, so "al-ba-n ia" only counts two)
syllableCounts = {}

# List of four dictionaries (for onsets, nuclei, codas, and endings):
# Each dictionary's key/value pairs are prevSegment:segmentDict, where
# segmentDict is a frequency dictionary of various onsets, nuclei, codas,
# or endings, and prevSegment is a segment that can be the last nonempty
# segment preceding them. A prevSegment of None marks segments at the
# beginnings of names.
segmentData = [{}, {}, {}, {}]
ONSET = 0
NUCLEUS = 1
CODA = 2
ENDING = 3

# Read names from file and generate the segmentData structure
with open(nameFile) as f:
    for line in f.readlines():
        # Strip whitespace, ignore blank lines and comments
        line = line.strip()
        if not line:
            continue
        if line[0] == "#":
            continue
        stem, ending = line.split()
        # Endings should be of the format noun/adj
        if "/" not in ending:
            # The noun ending is given; the adjective ending can be
            # derived by appending -n
            ending = "{}/{}n".format(ending, ending)
        # Syllable count is the number of hyphens
        syllableCount = stem.count("-")
        if syllableCount in syllableCounts:
            syllableCounts[syllableCount] += 1
        else:
            syllableCounts[syllableCount] = 1

        # Add the segments in this name to segmentData
        prevSegment = None
        for syllable in stem.split("-"):
            segments = syllableRgx.match(syllable).groups()
            if segments[NUCLEUS] == segments[CODA] == "":
                # A syllable with emtpy nucleus and coda comes right before
                # the ending, so we only process the onset
                segments = (segments[ONSET],)
            for segType, segment in enumerate(segments):
                if prevSegment not in segmentData[segType]:
                    segmentData[segType][prevSegment] = {}
                segFrequencies = segmentData[segType][prevSegment]
                if segment in segFrequencies:
                    segFrequencies[segment] += 1
                else:
                    segFrequencies[segment] = 1
                if segment:
                    prevSegment = segment
        # Add the ending to segmentData
        if prevSegment not in segmentData[ENDING]:
            segmentData[ENDING][prevSegment] = {}
        endFrequencies = segmentData[ENDING][prevSegment]
        if ending in endFrequencies:
            endFrequencies[ending] += 1
        else:
            endFrequencies[ending] = 1


def randFromFrequencies(dictionary):
    "Returns a random dictionary key, where the values represent frequencies."

    keys = dictionary.keys()
    frequencies = dictionary.values()
    index = random.randrange(sum(dictionary.values()))
    for key, freq in dictionary.items():
        if index < freq:
            # Select this one
            return key
        else:
            index -= freq
    # Weird, should have returned something
    raise ValueError("randFromFrequencies didn't pick a value "
                     "(index remainder is {})".format(index))

def markovName(syllableCount):
    "Generate a country name using a Markov-chain-like process."

    prevSegment = None
    stem = ""
    for syll in range(syllableCount):
        for segType in [ONSET, NUCLEUS, CODA]:
            try:
                segFrequencies = segmentData[segType][prevSegment]
            except KeyError:
                # In the unusual situation that the chain fails to find an
                # appropriate next segment, it's too complicated to try to
                # roll back and pick a better prevSegment; so instead,
                # return None and let the caller generate a new name
                return None
            segment = randFromFrequencies(segFrequencies)
            stem += segment
            if segment:
                prevSegment = segment

    endingOnset = None
    # Try different onsets for the last syllable till we find one that's
    # legal before an ending; we also allow empty onsets. Because it's
    # possible we won't find one, we also limit the number of retries
    # allowed.
    retries = 10
    while (retries and endingOnset != ""
           and endingOnset not in segmentData[ENDING]):
        segFrequencies = segmentData[ONSET][prevSegment]
        endingOnset = randFromFrequencies(segFrequencies)
        retries -= 1
    stem += endingOnset
    if endingOnset != "":
        prevSegment = endingOnset
    if prevSegment in segmentData[ENDING]:
        # Pick an ending that goes with the prevSegment
        endFrequencies = segmentData[ENDING][prevSegment]
        endings = randFromFrequencies(endFrequencies)
    else:
        # It can happen, if we used an empty last-syllable onset, that
        # the previous segment does not appear before any ending in the
        # data set. In this case, we'll just use -a(n) for the ending.
        endings = "a/an"
    endings = endings.split("/")
    nounForm = stem + endings[0]
    # Filter out names that are too short or too long
    if len(nounForm) < 3:
        # This would give two-letter names like Mo, which don't appeal
        # to me
        return None
    if len(nounForm) > 11:
        # This would give very long names like Imbadossorbia that are too
        # much of a mouthful
        return None
    # Filter out names with weird consonant clusters at the end
    for consonants in ["bl", "tn", "sr", "sn", "sm", "shm"]:
        if nounForm.endswith(consonants):
            return None
    # Filter out names that sound like anatomical references
    for bannedSubstring in ["vag", "coc", "cok", "kok", "peni"]:
        if bannedSubstring in stem:
            return None
    if nounForm == "ass":
        # This isn't a problem if it's part of a larger name like Assyria,
        # so filter it out only if it's the entire name
        return None
    return stem, endings


def printCountryNames(count):
    for i in range(count):
        syllableCount = randFromFrequencies(syllableCounts)
        nameInfo = markovName(syllableCount)
        while nameInfo is None:
            nameInfo = markovName(syllableCount)
        stem, endings = nameInfo
        stem = stem.capitalize()
        noun = stem + endings[0]
        adjective = stem + endings[1]
        print("{} ({})".format(noun, adjective))

if __name__ == "__main__":
    printCountryNames(30)

## names.txt
# A few names have been respelled or omitted to avoid awkward-looking outputs

# Latinate names ending in -a/-an, -ia/-ian, or -a/-ian
a-fri-c a
sar-di-n ia
cor-si-c a
i-be-r ia
an-dor-r a
dal-ma-t ia
da-c ia
mo-e-s ia
thra-c ia
i-o-n ia
il-ly-r ia
do-r ia
a-cha- ia
spar-t a
a-s ia
ly-d ia
smyr-n a/ian
thy-a-ti-r a
la-o-di-ce- a
phry-g ia
my-s ia
bi-thi-n ia
ga-la-t ia
pi-si-d ia
pam-phi-l ia
ci-li-c ia
cap-pa-do-c ia
se-leu-c ia
ju-de- a
scy-th ia
par-th ia
me-d ia
per-s ia
me-so-po-ta-m ia
chal-de- a
mau-ri-ta-n ia
lib-y a
al-ge-r ia
tu-ni-s ia
e-thi-o-p ia
so-ma-l ia
e-ri-tre- a
u-gan-d a
ken-y a
tan-za-n ia
ru-an-d a
rho-de-s ia
zam-b ia
an-go-l a
na-mi-b ia
ni-ge-r ia
gam-b ia
li-be-r ia
gu-i-ne- a
a-ra-b ia
sy-r ia
ar-me-n ia
al-ba-n ia
bos-n ia
her-ze-go-vi-n a/ian
cro-a-t ia
ser-b ia
yu-go-sla-v ia
bul-ga-r ia
ro-ma-n ia
mol-do-v a
aus-tr ia
slo-va-k ia
slo-ve-n ia
es-to-n ia
lat-v ia
li-thu-a-n ia
scan-di-na-v ia
rus-s ia
si-be-r ia
ge-or-g ia
ab-kha-z ia
os-se-t ia
mon-go-l ia
ko-re- a
in-d ia
go- a
cam-bo-d ia
kam-pu-che- a
ma-lay-s ia
in-do-ne-s ia
ja-v a
ja-kar-t a
kra-ka-to- a
pa-pu- a
aus-tra-l ia
taz-ma-n ia
po-ly-ne-s ia
o-ce-a-n ia
me-la-ne-s ia
mi-cro-ne-s ia
sa-mo- a
ton-g a
ca-na-d a/ian
al-ber-t a
ma-ni-to-b a
a-me-ri-c a
a-las-k a
ca-li-for-n ia
ne-va-d a
a-ri-zo-n a
mon-ta-n a
da-ko-t a
ne-bras-k a
min-ne-so-t a
i-o-w a
lou-i-si-a-n a
a-la-ba-m a
flo-ri-d a/ian
ca-ro-li-n a/ian
vir-gi-n ia
pen-syl-va-n ia
do-mi-ni-c a
his-pa-ni-o-l a
ja-mai-c a
an-ti-gu- a
bar-bu-d a
ber-mu-d a
an-guil-l a/ian
gre-na-d a/ian
a-ru-b a
gua-te-ma-l a
ni-ca-ra-gu- a
ri-c a
co-lom-b ia
ve-ne-zue-l a
ar-gen-ti-n a/ian
bo-li-v ia
nar-n ia
pe-re-lan-dr a/ian
zoo-to-p ia
sur-d a
el-les-me-r a
ve-g a
ta-zen-d a

# Names with modified Latinate endings (-as/-an, -o/-an, _/-an, _/-ian, etc.)
eu-ro-pe /an
his-pa-n ia/ic
bri-tan-n ia/ic
gal-l ia/ic
pa-ri-s /ian
ger-ma-n ia/ic
ro-m e/an
i-ta-l y/ian
si-ci-l y/ian
mo-na-c o/an
cre-t e/an
hun-ga-r y/ian
be-la-ru-s /ian
u-krai-n e/ian
ma-ce-do-n /ian
a-the-n s/ian
co-rin-th /ian
pe-lo-pon-ne-s e/ian
phi-lip-pi- /an
co-los-s e/ian
sar-di s/an
troy- /an
ty-r e/ian
si-do-n /ian
su-me-r /ian
ba-by-lo-n /ian
pa-les-ti-n e/ian
mo-roc-c o/an
e-gyp-t /ian
dji-bou-ti- /an
bu-run-di- /an
ma-la-wi- /an
zim-bab-we- /an
ca-me-roo-n /ian
ver-de- /an
gha-na- /ian
cha-d /ian
ma-li- /an
jor-da-n /ian
i-ra-n /ian
ti-be-t /an
la-o-s /ian
sin-ga-po-re- /an
bru-nei- /an
fi-ji- /an
tu-va-lu- /an
na-u-ru- /an
to-ke-lau- /an
ni-u-e- /an
pa-lau- /an
on-ta-ri- o/an
la-bra-do-r /ian
ha-wai-i- /an
o-re-go-n /ian
i-da-ho- /an
co-lo-ra-d o/an
kan-s as/an
mis-sou-ri- /an
ar-kan-s as/an
tex- as/an
mis-sis-sip-pi- /an
ten-nes-see- /an
o-hi-o- /an
ri-c o/an
hai-ti- /an
mex-i-c o/an
hon-du-r as/an
sal-va-do-r /ian
e-cua-do-r /ian
bra-zi-l /ian
pa-ra-guay- /an
u-ru-guay- /an
chi-le- /an
gon-do-r /ian
mor-por-k /ian
kal-ga-n /ian
a-tu-r /an
ca-di-no-r /ian
ha-mil-to-n /ian

# Names ending in _/-i
is-ra-e-l /i
mo-a-b /i
is-mai-l /i
sau-d /i
ye-me-n /i
o-ma-n /i
ka-ta-r /i
bah-rai-n /i
ku-wai-t /i
i-ra-q /i
a-zer-bai-ja-n /i
pa-kis-ta-n /i
kash-mi-r /i
pun-ja-b /i
ra-jas-tha-n /i
gu-ja-ra-t /i
kon-ka-n /i
ben-ga-l /i
ban-gla-de-sh /i
mun-kha-sh /i

# Names ending in -a/-ese, _/-ese, etc.
mal-t a/ese
ge-no- a/ese
mi-la-n /ese
to-ri-n o/ese
bo-log-n a/ese
ba-r i/ese
mes-si-n a/ese
ve-ro-n a/ese
a-ra-go-n /ese
vi-en-n a/ese
dub-li-n /ese
fa-ro- e/ese
su-da-n /ese
ga-bo-n /ese
be-ni-n /ese
se-ne-ga-l /ese
ja-pa-n /ese
chi-n a/ese
can-to-n /ese
tai-wa-n /ese
bhu-ta-n /ese
ne-pa-l /ese
as-sa-m /ese
sin-ha-l a/ese
si-a-m /ese
bur-m a/ese
vi-et-na-m /ese
gu-ya-n a/ese
su-ri-na-m e/ese
flo-ri-n /ese
	#!/usr/bin/python3

	import re, random

	# A regex that matches a syllable, with three groups for the three
	# segments of the syllable: onset (initial consonants), nucleus (vowels),
	# and coda (final consonants).
	# The regex also matches if there is just an onset (even an empty
	# onset); this case corresponds to the final partial syllable of the
	# stem, which is usually the consonant before a vowel ending (for
	# example, the d in "ca-na-d a").
	syllableRgx = re.compile(r"(y\|[^aeiouy])([aeiouy]+\|$)([^aeiouy])")
	nameFile = "names.txt"

	# Dictionary that holds the frequency of each syllable count (note that these
	# are the syllables before the ending, so "al-ba-n ia" only counts two)
	syllableCounts = {}

	# List of four dictionaries (for onsets, nuclei, codas, and endings):
	# Each dictionary's key/value pairs are prevSegment:segmentDict, where
	# segmentDict is a frequency dictionary of various onsets, nuclei, codas,
	# or endings, and prevSegment is a segment that can be the last nonempty
	# segment preceding them. A prevSegment of None marks segments at the
	# beginnings of names.
	segmentData = [{}, {}, {}, {}]
	ONSET = 0
	NUCLEUS = 1
	CODA = 2
	ENDING = 3

	# Read names from file and generate the segmentData structure
	with open(nameFile) as f:
	for line in f.readlines():
	# Strip whitespace, ignore blank lines and comments
	line = line.strip()
	if not line:
	continue
	if line[0] == "#":
	continue
	stem, ending = line.split()
	# Endings should be of the format noun/adj
	if "/" not in ending:
	# The noun ending is given; the adjective ending can be
	# derived by appending -n
	ending = "{}/{}n".format(ending, ending)
	# Syllable count is the number of hyphens
	syllableCount = stem.count("-")
	if syllableCount in syllableCounts:
	syllableCounts[syllableCount] += 1
	else:
	syllableCounts[syllableCount] = 1

	# Add the segments in this name to segmentData
	prevSegment = None
	for syllable in stem.split("-"):
	segments = syllableRgx.match(syllable).groups()
	if segments[NUCLEUS] == segments[CODA] == "":
	# A syllable with emtpy nucleus and coda comes right before
	# the ending, so we only process the onset
	segments = (segments[ONSET],)
	for segType, segment in enumerate(segments):
	if prevSegment not in segmentData[segType]:
	segmentData[segType][prevSegment] = {}
	segFrequencies = segmentData[segType][prevSegment]
	if segment in segFrequencies:
	segFrequencies[segment] += 1
	else:
	segFrequencies[segment] = 1
	if segment:
	prevSegment = segment
	# Add the ending to segmentData
	if prevSegment not in segmentData[ENDING]:
	segmentData[ENDING][prevSegment] = {}
	endFrequencies = segmentData[ENDING][prevSegment]
	if ending in endFrequencies:
	endFrequencies[ending] += 1
	else:
	endFrequencies[ending] = 1


	def randFromFrequencies(dictionary):
	"Returns a random dictionary key, where the values represent frequencies."

	keys = dictionary.keys()
	frequencies = dictionary.values()
	index = random.randrange(sum(dictionary.values()))
	for key, freq in dictionary.items():
	if index < freq:
	# Select this one
	return key
	else:
	index -= freq
	# Weird, should have returned something
	raise ValueError("randFromFrequencies didn't pick a value "
	"(index remainder is {})".format(index))

	def markovName(syllableCount):
	"Generate a country name using a Markov-chain-like process."

	prevSegment = None
	stem = ""
	for syll in range(syllableCount):
	for segType in [ONSET, NUCLEUS, CODA]:
	try:
	segFrequencies = segmentData[segType][prevSegment]
	except KeyError:
	# In the unusual situation that the chain fails to find an
	# appropriate next segment, it's too complicated to try to
	# roll back and pick a better prevSegment; so instead,
	# return None and let the caller generate a new name
	return None
	segment = randFromFrequencies(segFrequencies)
	stem += segment
	if segment:
	prevSegment = segment

	endingOnset = None
	# Try different onsets for the last syllable till we find one that's
	# legal before an ending; we also allow empty onsets. Because it's
	# possible we won't find one, we also limit the number of retries
	# allowed.
	retries = 10
	while (retries and endingOnset != ""
	and endingOnset not in segmentData[ENDING]):
	segFrequencies = segmentData[ONSET][prevSegment]
	endingOnset = randFromFrequencies(segFrequencies)
	retries -= 1
	stem += endingOnset
	if endingOnset != "":
	prevSegment = endingOnset
	if prevSegment in segmentData[ENDING]:
	# Pick an ending that goes with the prevSegment
	endFrequencies = segmentData[ENDING][prevSegment]
	endings = randFromFrequencies(endFrequencies)
	else:
	# It can happen, if we used an empty last-syllable onset, that
	# the previous segment does not appear before any ending in the
	# data set. In this case, we'll just use -a(n) for the ending.
	endings = "a/an"
	endings = endings.split("/")
	nounForm = stem + endings[0]
	# Filter out names that are too short or too long
	if len(nounForm) < 3:
	# This would give two-letter names like Mo, which don't appeal
	# to me
	return None
	if len(nounForm) > 11:
	# This would give very long names like Imbadossorbia that are too
	# much of a mouthful
	return None
	# Filter out names with weird consonant clusters at the end
	for consonants in ["bl", "tn", "sr", "sn", "sm", "shm"]:
	if nounForm.endswith(consonants):
	return None
	# Filter out names that sound like anatomical references
	for bannedSubstring in ["vag", "coc", "cok", "kok", "peni"]:
	if bannedSubstring in stem:
	return None
	if nounForm == "ass":
	# This isn't a problem if it's part of a larger name like Assyria,
	# so filter it out only if it's the entire name
	return None
	return stem, endings


	def printCountryNames(count):
	for i in range(count):
	syllableCount = randFromFrequencies(syllableCounts)
	nameInfo = markovName(syllableCount)
	while nameInfo is None:
	nameInfo = markovName(syllableCount)
	stem, endings = nameInfo
	stem = stem.capitalize()
	noun = stem + endings[0]
	adjective = stem + endings[1]
	print("{} ({})".format(noun, adjective))

	if __name__ == "__main__":
	printCountryNames(30)
	# A few names have been respelled or omitted to avoid awkward-looking outputs

	# Latinate names ending in -a/-an, -ia/-ian, or -a/-ian
	a-fri-c a
	sar-di-n ia
	cor-si-c a
	i-be-r ia
	an-dor-r a
	dal-ma-t ia
	da-c ia
	mo-e-s ia
	thra-c ia
	i-o-n ia
	il-ly-r ia
	do-r ia
	a-cha- ia
	spar-t a
	a-s ia
	ly-d ia
	smyr-n a/ian
	thy-a-ti-r a
	la-o-di-ce- a
	phry-g ia
	my-s ia
	bi-thi-n ia
	ga-la-t ia
	pi-si-d ia
	pam-phi-l ia
	ci-li-c ia
	cap-pa-do-c ia
	se-leu-c ia
	ju-de- a
	scy-th ia
	par-th ia
	me-d ia
	per-s ia
	me-so-po-ta-m ia
	chal-de- a
	mau-ri-ta-n ia
	lib-y a
	al-ge-r ia
	tu-ni-s ia
	e-thi-o-p ia
	so-ma-l ia
	e-ri-tre- a
	u-gan-d a
	ken-y a
	tan-za-n ia
	ru-an-d a
	rho-de-s ia
	zam-b ia
	an-go-l a
	na-mi-b ia
	ni-ge-r ia
	gam-b ia
	li-be-r ia
	gu-i-ne- a
	a-ra-b ia
	sy-r ia
	ar-me-n ia
	al-ba-n ia
	bos-n ia
	her-ze-go-vi-n a/ian
	cro-a-t ia
	ser-b ia
	yu-go-sla-v ia
	bul-ga-r ia
	ro-ma-n ia
	mol-do-v a
	aus-tr ia
	slo-va-k ia
	slo-ve-n ia
	es-to-n ia
	lat-v ia
	li-thu-a-n ia
	scan-di-na-v ia
	rus-s ia
	si-be-r ia
	ge-or-g ia
	ab-kha-z ia
	os-se-t ia
	mon-go-l ia
	ko-re- a
	in-d ia
	go- a
	cam-bo-d ia
	kam-pu-che- a
	ma-lay-s ia
	in-do-ne-s ia
	ja-v a
	ja-kar-t a
	kra-ka-to- a
	pa-pu- a
	aus-tra-l ia
	taz-ma-n ia
	po-ly-ne-s ia
	o-ce-a-n ia
	me-la-ne-s ia
	mi-cro-ne-s ia
	sa-mo- a
	ton-g a
	ca-na-d a/ian
	al-ber-t a
	ma-ni-to-b a
	a-me-ri-c a
	a-las-k a
	ca-li-for-n ia
	ne-va-d a
	a-ri-zo-n a
	mon-ta-n a
	da-ko-t a
	ne-bras-k a
	min-ne-so-t a
	i-o-w a
	lou-i-si-a-n a
	a-la-ba-m a
	flo-ri-d a/ian
	ca-ro-li-n a/ian
	vir-gi-n ia
	pen-syl-va-n ia
	do-mi-ni-c a
	his-pa-ni-o-l a
	ja-mai-c a
	an-ti-gu- a
	bar-bu-d a
	ber-mu-d a
	an-guil-l a/ian
	gre-na-d a/ian
	a-ru-b a
	gua-te-ma-l a
	ni-ca-ra-gu- a
	ri-c a
	co-lom-b ia
	ve-ne-zue-l a
	ar-gen-ti-n a/ian
	bo-li-v ia
	nar-n ia
	pe-re-lan-dr a/ian
	zoo-to-p ia
	sur-d a
	el-les-me-r a
	ve-g a
	ta-zen-d a

	# Names with modified Latinate endings (-as/-an, -o/-an, _/-an, _/-ian, etc.)
	eu-ro-pe /an
	his-pa-n ia/ic
	bri-tan-n ia/ic
	gal-l ia/ic
	pa-ri-s /ian
	ger-ma-n ia/ic
	ro-m e/an
	i-ta-l y/ian
	si-ci-l y/ian
	mo-na-c o/an
	cre-t e/an
	hun-ga-r y/ian
	be-la-ru-s /ian
	u-krai-n e/ian
	ma-ce-do-n /ian
	a-the-n s/ian
	co-rin-th /ian
	pe-lo-pon-ne-s e/ian
	phi-lip-pi- /an
	co-los-s e/ian
	sar-di s/an
	troy- /an
	ty-r e/ian
	si-do-n /ian
	su-me-r /ian
	ba-by-lo-n /ian
	pa-les-ti-n e/ian
	mo-roc-c o/an
	e-gyp-t /ian
	dji-bou-ti- /an
	bu-run-di- /an
	ma-la-wi- /an
	zim-bab-we- /an
	ca-me-roo-n /ian
	ver-de- /an
	gha-na- /ian
	cha-d /ian
	ma-li- /an
	jor-da-n /ian
	i-ra-n /ian
	ti-be-t /an
	la-o-s /ian
	sin-ga-po-re- /an
	bru-nei- /an
	fi-ji- /an
	tu-va-lu- /an
	na-u-ru- /an
	to-ke-lau- /an
	ni-u-e- /an
	pa-lau- /an
	on-ta-ri- o/an
	la-bra-do-r /ian
	ha-wai-i- /an
	o-re-go-n /ian
	i-da-ho- /an
	co-lo-ra-d o/an
	kan-s as/an
	mis-sou-ri- /an
	ar-kan-s as/an
	tex- as/an
	mis-sis-sip-pi- /an
	ten-nes-see- /an
	o-hi-o- /an
	ri-c o/an
	hai-ti- /an
	mex-i-c o/an
	hon-du-r as/an
	sal-va-do-r /ian
	e-cua-do-r /ian
	bra-zi-l /ian
	pa-ra-guay- /an
	u-ru-guay- /an
	chi-le- /an
	gon-do-r /ian
	mor-por-k /ian
	kal-ga-n /ian
	a-tu-r /an
	ca-di-no-r /ian
	ha-mil-to-n /ian

	# Names ending in _/-i
	is-ra-e-l /i
	mo-a-b /i
	is-mai-l /i
	sau-d /i
	ye-me-n /i
	o-ma-n /i
	ka-ta-r /i
	bah-rai-n /i
	ku-wai-t /i
	i-ra-q /i
	a-zer-bai-ja-n /i
	pa-kis-ta-n /i
	kash-mi-r /i
	pun-ja-b /i
	ra-jas-tha-n /i
	gu-ja-ra-t /i
	kon-ka-n /i
	ben-ga-l /i
	ban-gla-de-sh /i
	mun-kha-sh /i

	# Names ending in -a/-ese, _/-ese, etc.
	mal-t a/ese
	ge-no- a/ese
	mi-la-n /ese
	to-ri-n o/ese
	bo-log-n a/ese
	ba-r i/ese
	mes-si-n a/ese
	ve-ro-n a/ese
	a-ra-go-n /ese
	vi-en-n a/ese
	dub-li-n /ese
	fa-ro- e/ese
	su-da-n /ese
	ga-bo-n /ese
	be-ni-n /ese
	se-ne-ga-l /ese
	ja-pa-n /ese
	chi-n a/ese
	can-to-n /ese
	tai-wa-n /ese
	bhu-ta-n /ese
	ne-pa-l /ese
	as-sa-m /ese
	sin-ha-l a/ese
	si-a-m /ese
	bur-m a/ese
	vi-et-na-m /ese
	gu-ya-n a/ese
	su-ri-na-m e/ese
	flo-ri-n /ese