Skip to content

Instantly share code, notes, and snippets.

@nutszebra
Last active November 20, 2015 14:27
Show Gist options
  • Save nutszebra/10195847eaa28a1d7869 to your computer and use it in GitHub Desktop.
Save nutszebra/10195847eaa28a1d7869 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Link: http://www.cl.ecei.tohoku.ac.jp/nlp100/
"""
Question 06:
06. 集合
"paraparaparadise"と"paragraph"に含まれる文字bi-gramの集合を,
それぞれ, XとYとして求め,XとYの和集合,積集合,差集合を求めよ.
さらに,'se'というbi-gramがXおよびYに含まれるかどうかを調べよ.
"""
"""***********************************************************
100_questions_NLP_005より同様のn-gramの関数を定義する
***********************************************************"""
import re
def parseSentence(sentence):
return re.findall(r"[\w,']+|,|\.|!|:", sentence)
def nGram(target, n, option="letter"):
answer = []
if option == "letter":
combination = len(target) - int(n) + 1
if not combination >= 1:
return answer
else:
for i in xrange(0, combination):
answer.append(tuple(target[i:i + n]))
return answer
else:
parse = parseSentence(target)
combination = len(parse) - int(n) + 1
if not combination >= 1:
return answer
else:
for i in xrange(0, combination):
answer.append(tuple(parse[i:i + n]))
return answer
"""*********************************************************
link: https://gist.github.com/nutszebra/5e29c345b700498bcc5b
*********************************************************"""
X = set(nGram("paraparaparadise",2))
Y = set(nGram("paragraph", 2))
XPlusY = X.union(Y)
XIntersectY = X.intersection(Y)
XMinusY = X.difference(Y)
YMinusX = Y.difference(X)
se = set([("s","e")])
print(u"X集合: {0}".format(list(X)))
print(u"Y集合: {0}".format(list(Y)))
print(u"和集合: {0}".format(list(XPlusY)))
print(u"X-Y差集合: {0}".format(list(XMinusY)))
print(u"Y-X差集合: {0}".format(list(YMinusX)))
print(u"積集合: {0}".format(list(XIntersectY)))
if len(se.intersection(XPlusY)):
print("seのバイグラムはX,Yに含まれる")
else:
print("seのバイグラムはX,Yに含まれない")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment