Created
August 10, 2016 15:18
-
-
Save oimou/b869ab0af6ee789634e914d78a0234b6 to your computer and use it in GitHub Desktop.
Qiitaのタイトルから頻出パターンを導く
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = new require("fs"); | |
const _ = new require("underscore"); | |
const MeCab = new require("mecab-async"); | |
const mecab = new MeCab(); | |
const titles = fs.readFileSync("./title.txt").toString().split("\n"); | |
const dict = {}; | |
/** | |
* 文章中の名詞を◯◯に置き換える | |
* 例:「私はペンです。」->「◯◯は△△です。」 | |
*/ | |
class Mozaik { | |
constructor() { | |
this.mozaikChars = "◯△□✗◎▲●■"; | |
this.mozaikedWords = []; | |
} | |
generate(word) { | |
let index = this.mozaikedWords.indexOf(word); | |
if (index === -1) { | |
this.mozaikedWords.push(word); | |
index = this.mozaikedWords.length - 1; | |
} | |
const mozaikChar = this.mozaikChars[index % this.mozaikChars.length]; | |
return mozaikChar + mozaikChar; | |
} | |
} | |
/** | |
* タイトルを形態素解析し、パターンのランキングを算出する | |
*/ | |
function parseTitle(title) { | |
return new Promise((resolve, reject) => { | |
mecab.parse(title, function (err, result) { | |
if (err) { | |
return reject(err); | |
} | |
// タイトルを「◯◯を△△する」形式に変換する | |
const mozaik = new Mozaik(); | |
let prevPos = null; | |
let sentence = ""; | |
result.forEach(([phenotype, pos]) => { | |
if (pos == prevPos) { | |
// 名詞が連続したら単一の名詞とみなして何もしない | |
} else if (pos == "名詞") { | |
sentence += mozaik.generate(phenotype); | |
} else { | |
sentence += phenotype; | |
} | |
prevPos = pos; | |
}); | |
// タイトルをグルーピングする | |
dict[sentence] = dict[sentence] || []; | |
dict[sentence].push(title); | |
// ランキングを表示する | |
_(dict).chain() | |
.pairs() | |
.sortBy(([sentence, mozaikedList]) => -mozaikedList.length) | |
.head(20) | |
.each(([sentence, mozaikedList]) => { | |
console.log(`${mozaikedList.length}: ${sentence} (例: ${_.sample(mozaikedList)})`); | |
}); | |
console.log(); | |
resolve(); | |
}); | |
}); | |
} | |
function main() { | |
titles.map(title => () => parseTitle(title)) | |
.reduce((seq, task) => { | |
return seq.then(task); | |
}, Promise.resolve()) | |
.then(results => { | |
console.log(results); | |
}) | |
.catch(err => { | |
console.error(err); | |
}); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment