Skip to content

Instantly share code, notes, and snippets.

@sasamijp
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sasamijp/9913369 to your computer and use it in GitHub Desktop.
Save sasamijp/9913369 to your computer and use it in GitHub Desktop.
ミキ的にはこのコード、エロいものかそうでないかのスコアを出すには全然合ってないって思うな
# -*- encoding: utf-8 -*-
require 'natto'
@natto = Natto::MeCab.new
def getSS
sentenses = []
File.read("./ss.txt", :encoding => Encoding::UTF_8).split("\n").each do |str|
sentenses.push str if str.include?("「") and str.include?("」")
end
sentenses.map! do |sentense|
sentense = sentense.split("「")[1].chop
end
return sentenses
end
def getSSnoEro
sentenses = []
File.read("./ss_notero.txt", :encoding => Encoding::UTF_8).split("\n").each do |str|
sentenses.push str if str.include?("「") and str.include?("」")
end
sentenses.map! do |sentense|
sentense = sentense.split("「")[1].chop
end
return sentenses
end
def extractnouns(str)
nouns = []
@natto.parse(str) do |word|
case word.feature.split(",")[0]
when '名詞'
nouns.push(word.surface)
end
end
return nouns
end
allnouns = []
allnouns_ero = []
allnouns_notero = []
getSS.push(getSSnoEro).flatten.compact.each do |sentense|
allnouns.push extractnouns sentense
end
getSS.compact.each do |sentense|
allnouns_ero.push extractnouns sentense
end
getSSnoEro.compact.each do |sentense|
allnouns_notero.push extractnouns sentense
end
def getWordscore(word, allnouns, allnouns_ero, allnouns_notero)
found = 0
allnouns_ero.each do |noun|
if noun.include? word
found += 1
end
end
occuranceInNegatives = 0
allnouns_notero.each do |noun|
if noun.include?(word)
occuranceInNegatives += 1
end
end
wordAppearInEro = found.to_f / allnouns_ero.length
prior = allnouns_ero.length.to_f / allnouns.length.to_f
occuranceInAll = found + occuranceInNegatives
wordAppearInDocuments = occuranceInAll.to_f / allnouns.length.to_f
if wordAppearInDocuments == 0
return nil
end
if occuranceInAll >= 5
confidence = 1.0
else
confidence = occuranceInAll * 0.2
end
return [prior * wordAppearInEro / wordAppearInDocuments, confidence]
end
def dumpAllScore(allnouns, allnouns_ero, allnouns_notero)
allnouns.each do |noun|
score = getWordscore(noun.to_s, allnouns, allnouns_ero, allnouns_notero)
if score != nil
puts "#{noun},#{score[0]},#{score[1]}\n"
end
end
end
dumpAllScore(allnouns.flatten, allnouns_ero, allnouns_notero)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment