Skip to content

Instantly share code, notes, and snippets.

@ayemos
Created February 9, 2016 10:28
Show Gist options
  • Save ayemos/3c453d2bc90b23be673b to your computer and use it in GitHub Desktop.
Save ayemos/3c453d2bc90b23be673b to your computer and use it in GitHub Desktop.
Count hinshi-s
# Usage: ruby analyze.rb CSV_FILE
require 'csv'
require 'awesome_print'
VERB_KEYS = [/[^助感]動詞/, /^動詞/]
AUX_VERB_KEYS = [/助動詞/]
CONJUNCTION_KEYS = [/接続詞/]
NOUN_KEYS = [/名詞/]
ADJECTIVAL_VERB_KEYS = [/形容動詞/, /形状詞/]
ADJECTIVE_KEYS = [/形容詞/]
SYMBOL_KEYS = [/記号/]
INTERJECTION_KEYS = [/感動詞/]
PARTICLE_KEYS = [/助詞/]
PREFIX_KEYS = [/接頭辞/]
SUFFIX_KEYS = [/接尾辞/]
ADVERB = [/副詞/]
ANY = [/.*/]
ALL = [VERB_KEYS, AUX_VERB_KEYS, CONJUNCTION_KEYS, NOUN_KEYS, ADJECTIVAL_VERB_KEYS, ADJECTIVE_KEYS,
SYMBOL_KEYS, INTERJECTION_KEYS, PARTICLE_KEYS, ADVERB, PREFIX_KEYS, SUFFIX_KEYS, ANY]
words = 0
count = {}
CSV.foreach(ARGV[0]) do |row|
_, _, _, _, _, hinshi, *_ = row
next if hinshi.nil?
words += 1
ALL.each do |keys|
if keys.any?{|key| hinshi =~ key}
unless count.has_key?(keys[0].to_s)
count[keys[0].to_s] = Hash.new(0)
end
count[keys[0].to_s][hinshi] += 1
end
end
end
c = 0
sum = 0
count.each do |k, v|
puts "#{k.gsub(/\(\?-mix:|\)/,'')}: #{v.inject(0) {|sum, hash| sum + hash[1]}}"
if c > 0
sum += v.inject(0) {|sum, hash| sum + hash[1]}
end
c += 1
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment