Last active
July 25, 2020 16:26
-
-
Save mdk-aza/980eba6b139e063da6e0b94d794cf69d to your computer and use it in GitHub Desktop.
言語処理100本ノック 準備運動 0-4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import reduce | |
| import re | |
| # 00. 文字列の逆順 | |
| # 文字列”stressed”の文字を逆に(末尾から先頭に向かって)並べた文字列を得よ. | |
| # https://nlp100.github.io/ja/ch01.html#00-%E6%96%87%E5%AD%97%E5%88%97%E3%81%AE%E9%80%86%E9%A0%86 | |
| print("stressed"[::-1]) | |
| # スライス構文を使わないでやってみたパターン | |
| # https://ymgsapo.com/2019/11/11/python-reverse/ | |
| def rev_string_list(original_string): | |
| original_string_list = list(original_string) | |
| original_string_list.reverse() | |
| reverse_string = "".join(original_string_list) | |
| return reverse_string | |
| print(rev_string_list("stressed")) | |
| # 01. 「パタトクカシーー」 | |
| # 「パタトクカシーー」という文字列の1,3,5,7文字目を取り出して連結した文字列を得よ. | |
| # https://nlp100.github.io/ja/ch01.html#01-%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC | |
| print("パタトクカシーー"[::2]) | |
| # 02. 「パトカー」+「タクシー」=「パタトクカシーー」 | |
| # 「パトカー」+「タクシー」の文字を先頭から交互に連結して文字列「パタトクカシーー」を得よ. | |
| # https://nlp100.github.io/ja/ch01.html#02-%E3%83%91%E3%83%88%E3%82%AB%E3%83%BC%E3%82%BF%E3%82%AF%E3%82%B7%E3%83%BC%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC | |
| def zip_str(str1, str2): | |
| result = "" | |
| for p, t in zip(str1, str2): | |
| result += p + t | |
| return result | |
| print(zip_str("パトカー", "タクシー")) | |
| # https://www.suzu6.net/posts/11/ | |
| # 連想配列にして処理 | |
| print("".join([''.join(pair) for pair in zip("パトカー", "タクシー")])) | |
| # map(うーん、あんまり直感的ではないかなぁ) | |
| print("".join(map(lambda x: ''.join(x), zip("パトカー", "タクシー")))) | |
| # reduce | |
| print("".join(reduce(lambda p, t: p + t, zip("パトカー", "タクシー")))) | |
| # 03. 円周率 | |
| # “Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.” | |
| # という文を単語に分解し,各単語の(アルファベットの)文字数を先頭から出現順に並べたリストを作成せよ. | |
| # https://nlp100.github.io/ja/ch01.html#03-%E5%86%86%E5%91%A8%E7%8E%87 | |
| def make_word_list(str1): | |
| # [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9] | |
| return list(map(lambda x: len(x), re.sub('[,.]', '', str1).split())) | |
| print(make_word_list("Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.")) | |
| # 04. 元素記号 | |
| # “Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.” | |
| # という文を単語に分解し,1, 5, 6, 7, 8, 9, 15, 16, 19番目の単語は先頭の1文字,それ以外の単語は先頭の2文字を取り出し, | |
| # 取り出した文字列から単語の位置(先頭から何番目の単語か)への連想配列(辞書型もしくはマップ型)を作成せよ | |
| # https://nlp100.github.io/ja/ch01.html#04-%E5%85%83%E7%B4%A0%E8%A8%98%E5%8F%B7 | |
| # https://upura.hatenablog.com/entry/2020/04/14/032543 | |
| def extract_chars(i, word, list): | |
| if i in list: | |
| return (word[:1], i) | |
| else: | |
| return (word[:2], i) | |
| text = 'Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.' | |
| # リスト内表記を使用 | |
| ans = [extract_chars(i, w, [1, 5, 6, 7, 8, 9, 15, 16, 19]) for i, w in enumerate(text.split(), 1)] | |
| print(dict(ans)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment