Skip to content

Instantly share code, notes, and snippets.

@amake
Last active January 4, 2019 13:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amake/4c403bcca2d71310babf90fb6d07b8d7 to your computer and use it in GitHub Desktop.
Save amake/4c403bcca2d71310babf90fb6d07b8d7 to your computer and use it in GitHub Desktop.
`make test` to generate initialisms for Japanese place names, e.g. 池袋 → E. K. Bukuro
from __future__ import print_function
import romkan
with open('vendor/Noun.place.csv', encoding='utf-8') as in_file:
data = [line.split(',') for line in in_file]
kana2romaji = {
'エ': 'A',
'ビ': 'B',
'シ': 'C',
'シー': 'C',
'イ': 'E',
'イー': 'E',
'イイ': 'E',
'ジ': 'G',
'アイ': 'I',
'ケ': 'K',
'エン': 'N',
'オ': 'O',
'オオ': 'O',
'オウ': 'O',
'ピ': 'P',
'キュ': 'Q',
'キュウ': 'Q',
'ユ': 'U',
'ユウ': 'U',
}
forbidden = 'ャュョッン'
def get_initialism(reading):
out = []
initials = 0
while reading:
for kana in sorted(kana2romaji, key=len, reverse=True):
if reading.startswith(kana) and (reading == kana or
reading[len(kana)] not in forbidden):
out.append(kana2romaji[kana])
out.append('. ')
initials += 1
reading = reading[len(kana):]
break
else:
roma = romkan.to_roma(reading)
out.append(roma.capitalize())
reading = ''
return ''.join(out), initials
def main():
for item in data:
name, reading = item[0], item[11]
initialism, count = get_initialism(reading)
if count > 1:
print(name, initialism)
if __name__ == '__main__':
main()
vendor:
mkdir -p $(@)
NOUN_DATA := vendor/Noun.place.csv
NOUN_DATA_URL := https://raw.githubusercontent.com/taku910/mecab/master/mecab-ipadic/Noun.place.csv
$(NOUN_DATA): | vendor
curl $(NOUN_DATA_URL) | iconv -f EUC-JP -t UTF-8 > $(@)
ENV := .env
$(ENV):
virtualenv $(@)
$(@)/bin/pip install romkan
.PHONY: test
test: | $(NOUN_DATA) $(ENV)
$(ENV)/bin/python initialjp.py
@amake
Copy link
Author

amake commented Jan 4, 2019

Output (from a slightly older version) here:
https://pastebin.com/VAfDAF5u

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment