Last active
August 30, 2015 05:05
-
-
Save melborne/93900 to your computer and use it in GitHub Desktop.
frequently used words picker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: UTF-8 | |
require "open-uri" | |
module Enumerable | |
def take_by(nth) | |
sort_by { |elem| yield elem }.slice(0...nth) | |
end | |
end | |
class WordDictionary | |
include Enumerable | |
attr_reader :name, :words | |
def initialize(input, name= 'none', inner_call=false) | |
input = input_to_string(input, inner_call) | |
@words = input.downcase.scan(/[a-z']+/) | |
@freq_dic = @words.inject(Hash.new(0)) { |dic, word| dic[word] += 1 ; dic } | |
@name = name | |
end | |
def each | |
@freq_dic.each { |elem| yield elem } | |
end | |
def top_by_frequency(nth, &blk) | |
take_by_value(nth, lambda { |v| -v }, &blk) | |
end | |
def bottom_by_frequency(nth, &blk) | |
take_by_value(nth, lambda { |v| v }, &blk) | |
end | |
def top_by_length(nth, &blk) | |
list = take_by_key(nth, lambda { |word| -word.length }, &blk) | |
list.map { |word, freq| [word, freq, word.length] } | |
end | |
def select(regexp) | |
text = @freq_dic.select { |word, freq| word =~ regexp } | |
.select { |word, freq| block_given? ? yield(freq) : freq } | |
.map { |word, freq| "#{word} " * freq } | |
.join(" ") | |
WordDictionary.new(text, @name, true) | |
end | |
def to_s | |
@freq_dic.to_s | |
end | |
def size | |
@freq_dic.length | |
end | |
def +(other) | |
arithmetics(:+, other) | |
end | |
def -(other) | |
arithmetics(:-, other) | |
end | |
def &(other) | |
arithmetics(:&, other) | |
end | |
def |(other) | |
arithmetics(:|, other) | |
end | |
def uniq_words(nth, *base) | |
base.inject(self) { |_self, b| _self - b.select(/./) { |freq| freq >= 10 } } | |
.top_by_frequency(nth) | |
end | |
protected :words | |
private | |
def input_to_string(input, inner_call) | |
case input | |
when /^http/ | |
begin | |
open(input) { |f| return f.read } | |
rescue Exception => e | |
puts e | |
exit | |
end | |
when String | |
begin | |
File.open(input, "r:utf-8") { |f| return f.read } | |
rescue | |
STDERR.puts "Argument has assumed as a text string" unless inner_call | |
input | |
end | |
when ARGF.class | |
input.read | |
else | |
raise "Wrong argument. ARGF, file or string are acceptable." | |
end | |
end | |
def take_by_value(nth, sort_opt, &blk) | |
val = lambda { |key, val| val } | |
take_by_key_or_val(nth, sort_opt, val, &blk) | |
end | |
def take_by_key(nth, sort_opt, &blk) | |
key = lambda { |key, val| key } | |
take_by_key_or_val(nth, sort_opt, key, &blk) | |
end | |
def take_by_key_or_val(nth, sort_opt, by) | |
@freq_dic.select { |key, val| block_given? ? yield(val) : val } | |
.take_by(nth) { |key, val| sort_opt[by[key, val]] } | |
end | |
def arithmetics(op, other) | |
result = (@words.send op, other.words).join(" ") | |
WordDictionary.new(result, '', true) | |
end | |
end | |
def pretty_print(data) | |
max_stars = 60 | |
max_value = data.max_by { |word, freq| freq }.slice(1) | |
data.each do |word, freq| | |
stars = "*" * (max_stars * (freq/max_value.to_f)).ceil | |
printf "%5d:%-5s %s\n", freq, word, stars | |
end | |
end | |
if $0 == __FILE__ | |
base = WordDictionary.new('./public/base.txt') | |
alice = WordDictionary.new('./public/alice.txt', :alice) | |
jp_history = WordDictionary.new('./public/japanese_history.txt') | |
p alice.uniq_words(40, base) | |
p jp_history.uniq_words(40, base) | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "topwords" | |
WIDTH = 510 | |
WORD_POS = 23 | |
WORD_WIDTH = 60 | |
BAR_POS = WORD_POS + WORD_WIDTH | |
BAR_WIDTH = 350 | |
MARGIN = 2 | |
WORD_COLOR = "#253" | |
BG_COLOR = "#eed" | |
Shoes.app :width => WIDTH, :height => 650 do | |
background BG_COLOR | |
stack :height => 30 do | |
background WORD_COLOR | |
flow do | |
caption "Frequently Used Words", :stroke => BG_COLOR | |
button 'Show' do | |
pretty_print | |
end | |
@fpara = para 'select file', :stroke => BG_COLOR | |
button 'Open' do | |
filename = ask_open_file | |
@data = WordDictionary.new(filename).top_by_frequency(20) | |
@fpara.replace File.basename(filename) | |
end | |
end | |
end | |
@body = stack :margin_top => 10 do | |
def pretty_print | |
max_value = @data.max { |a, b| a[1] <=> b[1] }.slice(1) | |
@body.clear do | |
@data.each_with_index do |(word, val), nth| | |
rel_val = val/max_value.to_f | |
flow :width => WIDTH, :height => 30 do | |
para "#{nth+1}:", :stroke => WORD_COLOR | |
para strong(word), :width => WORD_WIDTH, :left => WORD_POS, | |
:align => 'left', :stroke => WORD_COLOR | |
bar = progress :width => BAR_WIDTH, :left => BAR_POS + MARGIN | |
freq = para "" | |
anim = animate 24 do |i| | |
int = i/100.0 | |
bar.fraction = int * rel_val | |
if int >=1 | |
anim.stop | |
freq.replace val, :left => BAR_POS + BAR_WIDTH + MARGIN, | |
:stroke => WORD_COLOR | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "rubygems" | |
require "sinatra" | |
require "topwords" | |
BOOKS = {:alice => ['public/alice.txt', "Alice's Adventures in Wonderland"], | |
:pride => ['public/pride_prejudice.txt', "Pride and Prejudice"], | |
:japan => ['public/japanese_history.txt', "A History of the Japanese People"], | |
:romeo => ['public/romeo_juliet.txt', "The Tragedy of Romeo and Juliet"], | |
:science => ['public/outline_of_science.txt', "Outline of Science"]} | |
BASE = ['public/base.txt', 'base'] | |
get '/' do | |
init | |
@result = [@alice, @pride, @japan, @romeo, @science].map do |obj| | |
get_data(obj) | |
end | |
erb :index | |
end | |
post '/' do | |
begin | |
@input = params[:input] | |
dic = WordDictionary.new(@input) | |
@yours = get_data(dic).slice(1..-1) | |
erb :show | |
rescue | |
redirect '/' | |
end | |
end | |
helpers do | |
def init | |
@files = {} | |
BOOKS.each do |name, (file, title)| | |
instance_variable_set("@#{name}", WordDictionary.new(file, title) ) | |
@files[title] = File.basename(file) | |
end | |
end | |
def get_data(obj) | |
base = WordDictionary.new(BASE[0], BASE[1]) | |
[obj.name, obj.size, obj.top_by_frequency(30), obj.top_by_length(10), obj.uniq_words(20, base)] | |
end | |
end | |
__END__ | |
@@layout | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" | |
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html> | |
<head> | |
<meta http-equiv="Content-type" content="text/html; charset=utf-8"> | |
<title>WORDS in Books</title> | |
<style type="text/css" media="screen"> | |
* { margin: 0; padding: 0;} | |
#side {float: right; padding:15px 10px 15px 10px; margin:5px 0 60em 3px; background:#471;} | |
#input {margin-top:20px; background:#aa7;} | |
#home_link {font-size:14pt; clear:left; margin:40px 0 0 20px;} | |
body {background:#222;} | |
a { text-decoration:none;} | |
a:link, a:visited {color:#ffc;} | |
a#home {color:#6b1;} | |
h1, #title { color:#6b1; margin:7px 0 3px 7px;} | |
h3.book_title { background:#606; color:#fc3; margin:5px 3px 3px 6px; padding:3px 10px 3px 10px; border-bottom:#300 solid 2px; border-right:#300 solid 2px; word-break: break-all;} | |
h3.book_title:hover {background:#93f;} | |
a:hover {color:#f6f;} | |
div.obj { width:350px; float:left; padding-bottom:5px;} | |
div.category { font-size:12pt; color:#f6f; font-weight:bolder; padding:3px 0 0 20px;} | |
div.item { font-size:14pt; color:#ffc; padding-left:30px;} | |
span#num { color:#ffc; margin-left:10px;} | |
</style> | |
</head> | |
<body id="main"> | |
<div id="banner"> | |
<h1><a href='/' id='title'>WORDS in Books</a></h1> | |
</div> | |
<div id="side"> | |
<a href="http://www.gutenberg.org/wiki/Main_Page">Gutenberg</a><br/> | |
<a href="http://www.sinatrarb.com/">Sinatra</a><br/> | |
<a href="http://www8.plala.or.jp/abridge/ruby191class.html">Ruby 1.9.1 Methods List</a><br/> | |
<div class='input_form'> | |
<form action="/" method="POST" accept-charset="utf-8"> | |
<textarea name="input" id="input" rows="20">英文または英文の置かれているURLを入力してください</textarea> | |
<p id='bottom'><input type="submit" value="Post"></p> | |
</form> | |
</div> | |
</div> | |
<div id="main"> | |
<%= yield %> | |
</div> | |
</body> | |
</html> | |
@@ index | |
<% @result.each do |obj| %> | |
<div class='obj'> | |
<h3 class='book_title'><a href=<%= @files[obj[0]] %>><%= obj.shift %></a></h3> | |
<div class='category'>Total Words in Kind : <span id='num'><%= obj.shift %></span></div> | |
<div class='category'>Most Used Words</div> | |
<div class='item'> | |
<% obj.shift.each do |word, freq| %> | |
<%= word %> | |
<% end %> | |
</div> | |
<div class='category'>Longest Words</div> | |
<div class= 'item'> | |
<% obj.shift.each do |word, freq, len| %> | |
<%= word %> | |
<% end %> | |
</div> | |
<div class='category'>Characteristic Words</div> | |
<div class='item'> | |
<% obj.shift.each do |word, freq| %> | |
<%= word %> | |
<% end %> | |
</div> | |
</div> | |
<% end %> | |
@@ show | |
<div class='obj'> | |
<h3 class='book_title'><%= @input.slice(0...70) %></h3> | |
<div class='category'>Total Words in Kind : <%= @yours.shift %></div> | |
<div class='category'>Most Used Words</div> | |
<div class='item'> | |
<% @yours.shift.each do |word, freq| %> | |
<%= word %> | |
<% end %> | |
</div> | |
<div class='category'>Longest Words</div> | |
<div class= 'item'> | |
<% @yours.shift.each do |word, freq, len| %> | |
<%= word %> | |
<% end %> | |
</div> | |
<div class='category'>Characteristic Words</div> | |
<div class='item'> | |
<% @yours.shift.each do |word, freq| %> | |
<%= word %> | |
<% end %> | |
</div> | |
</div> | |
<br/><br/> | |
<div id='home_link'><a href='/' id='home'>Home</a></div> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment