Last active
July 19, 2019 19:07
-
-
Save atsukoba/41f30be0b63bb54b5564a5677ee3e609 to your computer and use it in GitHub Desktop.
宇宙海賊ゴー☆ジャス
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import MeCab | |
import wikipedia | |
import logging | |
import shutil | |
from logging import getLogger | |
from pykakasi import kakasi | |
from Levenshtein import distance as D | |
logging.basicConfig(level=logging.INFO) | |
log = getLogger(__name__) | |
class Gorgeous: | |
""" | |
君のハートに、レボ☆リューション | |
gorgeous = Gorgeous() | |
gorgeous.revolution("まだ助かる") | |
>>> マダガスカル | |
""" | |
def __init__(self, **kwargs) -> None: | |
k = kakasi() | |
k.setMode('K', 'a') | |
self.conv = k.getConverter() | |
self.tagger = MeCab.Tagger() | |
self.nations = self.read_nations(**kwargs) | |
self.nations_roman = [ | |
self.romanize(nation) for nation in self.nations] | |
self.nations_roman_vowel = [self.extract_vowel( | |
self.romanize(nation)) for nation in self.nations] | |
self.recent_answer = "" | |
return | |
def read_nations(self, fname="nations.csv", **kwargs) -> list: | |
""" | |
Read csv file | |
published on 『国コード一覧CSV ISO 3166-1』 | |
https://qiita.com/tao_s/items/32b90a2751bfbdd585ea | |
""" | |
with open(fname, "r") as f: | |
nations = f.read().split("\n") | |
nations = [re.split("[,|]", nation)[0].replace("\"", "") for nation in nations] | |
nations.pop(0) | |
return nations | |
def read_csv_data(self, fname="nations.csv", **kwargs) -> list: | |
with open(fname, "r") as f: | |
data = f.read().split("\n") | |
data = [re.split("[,|]", area)[0].replace("\"", "") for area in data] | |
data.pop(0) | |
return data | |
def clean_str(self, s: str) -> str: | |
return re.sub(r'[*\s\t\n.,]', "", s) | |
def katakanize(self, s: str, morph=False, **kwargs) -> str: | |
""" | |
convert "kanji" to "katakana" | |
""" | |
morphed = [re.split("[,\t\s\n]", w) for w in self.tagger.parse(s).split("\n")] | |
morphed.remove([""]) | |
morphed.remove(["EOS"]) | |
k = [morph[-1] if morph[-1] != "*" else morph[0] for morph in morphed] | |
if morph: # morphlogical analysed output | |
return k | |
return "".join(k) | |
def romanize(self, s, **kwargs) -> list: | |
""" | |
convert "katakana" to "romaji" via kakasi | |
(kanji - kana simple inverter) | |
""" | |
s = self.katakanize(s, **kwargs) | |
if type(s) == str: | |
s = [s] | |
return [self.conv.do(w) for w in s] | |
def extract_vowel(self, word: str, **kwargs) -> str: | |
""" | |
extract vowels from romanized words | |
""" | |
if type(word) == list: | |
return [self.extract_vowel(w) for w in word] | |
return "".join([l for l in word if l in ["a", "i", "u", "e", "o", "n"]]) | |
def revolution(self, sentence: str, **kwargs) -> str: | |
""" | |
Revolution: Get Similar Nation Name from Word | |
gorgeous.revolution("まだ助かる") | |
>>> マダガスカル | |
args | |
---- | |
n_result : default=5 : lines of result print | |
vowel : default=False : if true, word-distance will be calculated based on vowels | |
""" | |
# default kargs | |
n_result = kwargs.get('n_result', 3) | |
vowel = kwargs.get('vowel', False) | |
print("INPUT: ", sentence) | |
# sentence -> [words] -> [katakana] -> [roman] | |
word_roman = self.romanize(sentence, **kwargs) | |
print("ROMAN: ", word_roman) | |
if vowel: | |
word_vowel = self.extract_vowel(word_roman) | |
print("VOWEL: ", word_vowel) | |
dists = [D(word_vowel[-1], nation[0]) for nation in self.nations_roman_vowel] | |
else: | |
dists = [D(word_roman[-1], nation[0]) for nation in self.nations_roman] | |
idx = sorted(range(len(dists)), key=lambda k: dists[k]) | |
# logging | |
print("RESULT:") | |
for i in range(n_result): | |
if vowel: | |
print(f"\tNo.{i+1} : {self.nations[idx[i]]} ({self.nations_roman_vowel[idx[i]]}) : ({dists[idx[i]]})") | |
else: | |
print(f"\tNo.{i+1} : {self.nations[idx[i]]} ({self.nations_roman[idx[i]]}) : ({dists[idx[i]]})") | |
self.recent_answer = self.nations[idx[0]] | |
# Answer | |
print(f"ここ!({self.googlemap()})") | |
print("-" * shutil.get_terminal_size()[0]) # draw line | |
print(f"{self.wikipedia()[1]}!!\n") | |
print(u"☆" * shutil.get_terminal_size()[0]) # draw line | |
return self.recent_answer | |
def googlemap(self, place=None) -> str: | |
"""generate Google Map Link""" | |
if place is None: | |
place = self.recent_answer | |
return f"https://www.google.com/maps/search/{place}/" | |
def wikipedia(self, place=None) -> tuple: | |
"""Generate Wikipedia Link""" | |
if place is None: | |
place = self.recent_answer | |
wikipedia.set_lang("ja") | |
p = wikipedia.page(wikipedia.search(place)[0]) | |
return (p.title, p.summary, p.url) | |
def showtime(self, **kwargs) -> None: | |
print("【ゴー☆ジャスのショータイム!】") | |
print(f"\n- 【お題】を入力してくれよな!\n- ランキングを{kwargs.get('n_result', 3)}件表示するぞ!\n- 地球義ではなく、GoogleMapとWikipediaの情報を出力するぞ!") | |
print(u"☆" * shutil.get_terminal_size()[0]) # draw line | |
while True: | |
place = input("\n【お題】を入力: ") | |
if place in ["終了", "end", "終わり"]: | |
break | |
self.revolution(place, **kwargs) | |
print("また遊んでくれよな!") | |
return | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser( | |
description='キミも、ゴー☆ジャスになろう!') | |
parser.add_argument('-N', '--n_line', help="結果表示数", default=3) | |
parser.add_argument('-F', '--file', help="nations.csv ファイルパス", | |
default='nations.csv') | |
parser.add_argument('-V', '--vowel', help="母音モード", action='store_true') | |
args = parser.parse_args() | |
gorgeous = Gorgeous(fname=args.file) | |
gorgeous.showtime(vowel=args.vowel, n_result=args.n_line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
plz save the list published on 国コード一覧CSV ISO 3166-1 : Qiita as
./nations.csv