Skip to content

Instantly share code, notes, and snippets.

@oimou
Created August 10, 2016 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oimou/b869ab0af6ee789634e914d78a0234b6 to your computer and use it in GitHub Desktop.
Save oimou/b869ab0af6ee789634e914d78a0234b6 to your computer and use it in GitHub Desktop.
Qiitaのタイトルから頻出パターンを導く
const fs = new require("fs");
const _ = new require("underscore");
const MeCab = new require("mecab-async");
const mecab = new MeCab();
const titles = fs.readFileSync("./title.txt").toString().split("\n");
const dict = {};
/**
* 文章中の名詞を◯◯に置き換える
* 例:「私はペンです。」->「◯◯は△△です。」
*/
class Mozaik {
constructor() {
this.mozaikChars = "◯△□✗◎▲●■";
this.mozaikedWords = [];
}
generate(word) {
let index = this.mozaikedWords.indexOf(word);
if (index === -1) {
this.mozaikedWords.push(word);
index = this.mozaikedWords.length - 1;
}
const mozaikChar = this.mozaikChars[index % this.mozaikChars.length];
return mozaikChar + mozaikChar;
}
}
/**
* タイトルを形態素解析し、パターンのランキングを算出する
*/
function parseTitle(title) {
return new Promise((resolve, reject) => {
mecab.parse(title, function (err, result) {
if (err) {
return reject(err);
}
// タイトルを「◯◯を△△する」形式に変換する
const mozaik = new Mozaik();
let prevPos = null;
let sentence = "";
result.forEach(([phenotype, pos]) => {
if (pos == prevPos) {
// 名詞が連続したら単一の名詞とみなして何もしない
} else if (pos == "名詞") {
sentence += mozaik.generate(phenotype);
} else {
sentence += phenotype;
}
prevPos = pos;
});
// タイトルをグルーピングする
dict[sentence] = dict[sentence] || [];
dict[sentence].push(title);
// ランキングを表示する
_(dict).chain()
.pairs()
.sortBy(([sentence, mozaikedList]) => -mozaikedList.length)
.head(20)
.each(([sentence, mozaikedList]) => {
console.log(`${mozaikedList.length}: ${sentence} (例: ${_.sample(mozaikedList)})`);
});
console.log();
resolve();
});
});
}
function main() {
titles.map(title => () => parseTitle(title))
.reduce((seq, task) => {
return seq.then(task);
}, Promise.resolve())
.then(results => {
console.log(results);
})
.catch(err => {
console.error(err);
});
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment