Last active
October 16, 2015 02:10
-
-
Save dragstar328/0b6820b9086ac38896bc to your computer and use it in GitHub Desktop.
言語処理100本ノック
http://www.cl.ecei.tohoku.ac.jp/nlp100/
第1章: 準備運動
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
def chap1_0(): | |
targ = "stressed" | |
print targ[::-1] | |
def chap1_1(): | |
targ = u"パタトクカシーー" | |
print targ[0::2] | |
def chap1_2(): | |
str1 = u"パトカー" | |
str2 = u"タクシー" | |
str3 = u"" | |
for a, b in zip(str1, str2): | |
str3 = str3 + a + b | |
print str3 | |
def chap1_3(): | |
targ = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics." | |
targ = targ.replace(",","") | |
targ = targ.replace(".","") | |
ls = [] | |
for s in targ.split(): | |
ls.append(len(s)) | |
print ls | |
def chap1_4(): | |
targ = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can." | |
targ = targ.replace(",", "") | |
targ = targ.replace(".", "") | |
mp = {} | |
i = 0 | |
for s in targ.split(): | |
i += 1 | |
if i in [1,5,6,7,8,9,15,16,19]: | |
mp[i] = s[0] | |
else: | |
mp[i] = s[0:2] | |
for k, v in mp.items(): | |
print k, v | |
def chap1_5(): | |
sentence = "I am an NLPer" | |
#Charactor as bi-gram | |
charGram = [sentence[i:i+2] for i in range(len(sentence)-1)] | |
#Word as bi-gram | |
words = [word.strip(".,") for word in sentence.split()] | |
wordGram = ["-".join(words[i:i+2]) for i in range(len(words)-1)] | |
print charGram | |
print wordGram | |
def chap1_6(): | |
word1 = "paraparaparadise" | |
word2 = "paragraph" | |
setX = set([word1[i:i+2] for i in range(len(word1)-1)]) | |
setY = set([word2[i:i+2] for i in range(len(word2)-1)]) | |
print "setX", setX | |
print "setY", setY | |
print "和集合", setX|setY | |
print "差集合", setX - setY | |
print "積集合", setX&setY | |
if "se" in setX: | |
print "'se' contains setX" | |
else: | |
print "'se' does not contain setX" | |
if "se" in setY: | |
print "'se' contains setY" | |
else: | |
print "'se' does not contain setY" | |
def chap1_7(): | |
ret = makeStr("12", "気温", "22.4") | |
print ret | |
def makeStr(x, y, z): | |
return "{0}の{1}は{2}".format(x, y, z) | |
def chap1_8(): | |
print ciph("hello world") | |
print ciph("Hello World") | |
print ciph("HELLO WORLD") | |
print ciph(ciph("hello world")) | |
import re | |
def ciph(word): | |
return "".join(chr(219-ord(c)) if re.match(r"[a-z]", c) else c for c in word) | |
def chap1_9(): | |
targ = "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ." | |
print " ".join(typo(w) if len(w) > 4 else w for w in targ.split()) | |
typo(targ) | |
from random import shuffle | |
def typo(word): | |
targ = list(word[1:-1]) | |
shuffle(targ) | |
return word[0] + ''.join(targ) + word[-1] | |
chap1_0() | |
chap1_1() | |
chap1_2() | |
chap1_3() | |
chap1_4() | |
chap1_5() | |
chap1_6() | |
chap1_7() | |
chap1_8() | |
chap1_9() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment