Skip to content

Instantly share code, notes, and snippets.

@kurehajime
Created December 14, 2013 05:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kurehajime/7955959 to your computer and use it in GitHub Desktop.
Save kurehajime/7955959 to your computer and use it in GitHub Desktop.
はてブのホッテントリのタイトルを要約してWebの今を見つめる ref: http://qiita.com/kurehajime/items/f1cfd74e1ec7b45fbafa
<html>
<head>
</head>
<body style="">
<p>&nbsp;</p>
<p>
<meta charset="UTF-8">
<title>要約くん</title>
<link rel="stylesheet" href="http://code.jquery.com/mobile/1.1.0/jquery.mobile-1.1.0.min.css" />
<script type="text/javascript" src="http://code.jquery.com/jquery-1.7.1.min.js"></script>
<script type="text/javascript" src="http://code.jquery.com/mobile/1.1.0/jquery.mobile-1.1.0.min.js"></script>
<script type="text/javascript" src="jscss/tiny_segmenter-0.1.js" charset="UTF-8">
</script> <script type="text/javascript">
var segmenter
$(function(){
segmenter = new TinySegmenter();// インスタンス生成
})
//実行
function doAction(){
var wkIn=$("#txtIN").val()//インプット
var segs = segmenter.segment(wkIn); // 単語の配列が返る
var dict=makeDic(wkIn)
var wkbest=doShuffle(dict);
for(var i=0;i<=10;i++){
wkOut=doShuffle(dict).replace(/\n/g,"");
if(Math.abs(40-wkOut.length)<Math.abs(40-wkbest.length)){
wkbest=wkOut
}
}
$("#txtOUT").val(wkbest);//アウトプット
}
//文章をシャッフル
function doShuffle(wkDic){
var wkNowWord=""
var wkStr=""
wkNowWord=wkDic["_BOS_"][Math.floor( Math.random() * wkDic["_BOS_"].length )];
wkStr+=wkNowWord;
while(wkNowWord != "_EOS_"){
wkNowWord=wkDic[wkNowWord][Math.floor( Math.random() * wkDic[wkNowWord].length )];
wkStr+=wkNowWord;
}
wkStr=wkStr.replace(/_EOS_$/,"。")
return wkStr;
}
//辞書に追加
function makeDic(wkStr){
wkStr=nonoise(wkStr);
var wkLines= wkStr.split("。");
var wkDict=new Object();
for(var i =0;i<=wkLines.length-1;i++){
var wkWords=segmenter.segment(wkLines[i]);
if(! wkDict["_BOS_"] ){wkDict["_BOS_"]=new Array();}
if(wkWords[0]){wkDict["_BOS_"].push(wkWords[0])};//文頭
for(var w=0;w<=wkWords.length-1;w++){
var wkNowWord=wkWords[w];//今の単語
var wkNextWord=wkWords[w+1];//次の単語
if(wkNextWord==undefined){//文末
wkNextWord="_EOS_"
}
if(! wkDict[wkNowWord] ){
wkDict[wkNowWord]=new Array();
}
wkDict[wkNowWord].push(wkNextWord);
if(wkNowWord=="、"){//「、」は文頭として使える。
wkDict["_BOS_"].push(wkNextWord);
}
}
}
return wkDict;
}
//ノイズ除去
function nonoise(wkStr){
wkStr=wkStr.replace(/\n/g,"。");
wkStr=wkStr.replace(/[\?\!?!]/g,"。");
wkStr=wkStr.replace(/[-||::・]/g,"。");
wkStr=wkStr.replace(/[「」()\(\)\[\]【】]/g," ");
return wkStr;
}
</script> </meta>
<div data-role="page" id="first">
<div data-role="content">
<p>今ネット上で話題の記事を一行で要約すると・・・</p>
<p><textarea cols="60" rows="8" name="txtIN" id="txtIN" style="max-height:200px;">{{ mes }}</textarea></p>
<input type="button" name="" value="生成" onClick=" doAction()"></br>
<textarea cols="60" rows="8" name="txtIN" id="txtOUT"></textarea>
<p></p>
</div>
</div>
</body>
</html>
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import webapp2
import os
from google.appengine.ext.webapp import template
from xml.etree.ElementTree import *
import re
import urllib
class Markov(webapp2.RequestHandler):
def get(self):
mes=""
if self.request.get('mode')=="2ch":
mes=self.get_2ch()
else:
mes=self.get_hotentry_title()
template_values={
'mes':mes
}
path = os.path.join(os.path.dirname(__file__), 'html/markov.html')
self.response.out.write(template.render(path, template_values))
def get_hotentry_title(self):
titles = ""
tree = parse(urllib.urlopen('http://feeds.feedburner.com/hatena/b/hotentry'))
for i in tree.findall('./{http://purl.org/rss/1.0/}item'):
titles+= re.sub("[-:|/|:].{1,30}$","",i.find('{http://purl.org/rss/1.0/}title').text) + "\n"
return titles
def get_2ch(self):
titles = ""
response = urllib.urlopen('http://engawa.2ch.net/poverty/subject.txt')
html = unicode(response.read(), "cp932", 'ignore').encode("utf-8")
for line in html.split("\n"):
if line != "":
titles+=re.sub("\(.*?\)$","",line.split("<>", 2)[1])+ "\n"
return titles
app = webapp2.WSGIApplication([
('/markov.html', Markov)
], debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment