Last active
December 8, 2018 03:21
-
-
Save parksunwoo/144aaa3cb3f6cf957913a8827a5ba2e1 to your computer and use it in GitHub Desktop.
QA_baseline_ko
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##!pip3 install textrankr | |
# Step1. 문서요약으로 중요문장 찾기 (여기선 3문장) | |
from __future__ import print_function | |
from textrankr import TextRank | |
f = open("wiki/chosun.txt", 'r') | |
data = f.read() | |
textrank = TextRank(data) | |
print(textrank.summarize()) | |
#조선 후기의 정치는 붕당을 중심으로 형성되었다. | |
#조선 후기의 정치는 붕당을 중심으로 형성되었는데 마침내 서인은 17세기 중반의 예송논쟁에서 남인에게 권력을 넘겨준다. | |
#1890년대에는 농민 수탈에 대한 저항으로 동학농민운동이 일어났고, 동학농민운동의 진압을 명분으로 조선에 들어온 청나라와 일본의 군대가 충돌하였고, 친일적인 갑오개혁이 있었다. | |
# Step2. 중요문장을 사용해서 질문생성 | |
if all(key in bucket for key in l1): #'NNP', 'VBG', 'VBZ', 'IN' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NNP']]+ ' '+ line.words[bucket['VBG']] + '?' | |
elif all(key in bucket for key in l2): #'NNP', 'VBG', 'VBZ' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NNP']] +' '+ line.words[bucket['VBG']] + '?' | |
elif all(key in bucket for key in l3): #'PRP', 'VBG', 'VBZ', 'IN' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['PRP']]+ ' '+ line.words[bucket['VBG']] + '?' | |
elif all(key in bucket for key in l4): #'PRP', 'VBG', 'VBZ' in sentence. | |
question = '무엇 ' + line.words[bucket['PRP']] +' '+ ' does ' + line.words[bucket['VBG']]+ ' '+ line.words[bucket['VBG']] + '?' | |
elif all(key in bucket for key in l7): #'NN', 'VBG', 'VBZ' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] +' '+ line.words[bucket['NN']] +' '+ line.words[bucket['VBG']] + '?' | |
elif all(key in bucket for key in l8): #'NNP', 'VBZ', 'JJ' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NNP']] + '?' | |
elif all(key in bucket for key in l9): #'NNP', 'VBZ', 'NN' in sentence | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NNP']] + '?' | |
elif all(key in bucket for key in l11): #'PRP', 'VBZ' in sentence. | |
if line.words[bucket['PRP']] in ['she','he']: | |
question = '무엇' + ' does ' + line.words[bucket['PRP']].lower() + ' ' + line.words[bucket['VBZ']].singularize() + '?' | |
elif all(key in bucket for key in l10): #'NNP', 'VBZ' in sentence. | |
question = '무엇' + ' does ' + line.words[bucket['NNP']] + ' ' + line.words[bucket['VBZ']].singularize() + '?' | |
elif all(key in bucket for key in l13): #'NN', 'VBZ' in sentence. | |
question = '무엇' + ' ' + line.words[bucket['VBZ']] + ' ' + line.words[bucket['NN']] + '?' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment