Last active
August 29, 2015 13:56
-
-
Save sambaiz/9021589 to your computer and use it in GitHub Desktop.
英文テキストファイルの各アルファベットの数を出力して割合をグラフで表示する
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "gnuplot" | |
# (gem install gnuplot) | |
# (brew install gnuplot) | |
INPUT_DATA = ARGV[0] || raise("input file name is not given") | |
OUTPUT_DATA = ARGV[1] || "output.txt" | |
OUTPUT_GRAPH = ARGV[2] || "output.eps" | |
n = 0 | |
hash = Hash.new(0) #デフォルトの値を設定(登場していないのは0) | |
File::open(INPUT_DATA) {|file| | |
file.each {|line| | |
line = line.downcase.split(//).delete_if{|c| !('a'..'z').include?(c) } | |
while line.length > 0 | |
hash[line[0]] = line.count(line[0]) | |
line.delete(line[0]) | |
n += 1 | |
end | |
} | |
} | |
letters_count = hash.values.reduce {|sum, n| sum + n } | |
hash_sorted = hash.sort {|(k1, v1), (k2, v2)| v2 <=> v1 } #[[k1, v1], [k2, v2]]みたいな形式になるから | |
hash_sorted = Hash[*hash_sorted.flatten] #[k1, v1, k2, v2 ...]にしてハッシュ化 | |
File::open(OUTPUT_DATA, "w") {|file| | |
file.puts(letters_count) | |
hash_sorted.each {|key, value| file.puts("#{key},#{value}")} | |
} | |
Gnuplot.open do |gp| | |
Gnuplot::Plot.new( gp ) do |plot| | |
# set image paramter | |
plot.terminal 'postscript eps enhanced color' | |
tmp = Array.new(('a'..'z').to_a.length) { |i| "'#{('a'.ord + i).chr}' #{i + 1}" } | |
plot.set 'xtics' , "(" + tmp.join(', ') + ")" | |
plot.set 'output', OUTPUT_GRAPH | |
plot.set 'xrange', "[0:#{('a'..'z').to_a.length + 1}]" | |
plot.set 'yrange', "[0:0.15]" | |
plot.set 'key', 'outside' #凡例項目の表示をグラフ外にする | |
# set title and labels | |
plot.ylabel 'percentage(%)' | |
plot.xlabel 'letter' | |
plot.title 'letter frequency' | |
def formatData(hash, letters_count) | |
x = (1..('a'..'z').to_a.length).to_a | |
y = Array.new(('a'..'z').to_a.length) { |i| hash[('a'.ord + i).chr] / letters_count.to_f } | |
return [x, y] | |
end | |
# make fill lines with title | |
def makeBoxGraph( data , title ) | |
Gnuplot::DataSet.new( data ) do |ds| | |
ds.title = title | |
ds.with = "boxes" | |
end | |
end | |
# plot graph | |
plot.data << makeBoxGraph(formatData(hash, letters_count) , ARGV[0]) | |
end | |
end | |
`open #{OUTPUT_GRAPH}` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment