Skip to content

Instantly share code, notes, and snippets.

@melborne
Last active August 30, 2015 05:05
Show Gist options
  • Save melborne/93900 to your computer and use it in GitHub Desktop.
Save melborne/93900 to your computer and use it in GitHub Desktop.
frequently used words picker
# encoding: UTF-8
require "open-uri"
module Enumerable
def take_by(nth)
sort_by { |elem| yield elem }.slice(0...nth)
end
end
class WordDictionary
include Enumerable
attr_reader :name, :words
def initialize(input, name= 'none', inner_call=false)
input = input_to_string(input, inner_call)
@words = input.downcase.scan(/[a-z']+/)
@freq_dic = @words.inject(Hash.new(0)) { |dic, word| dic[word] += 1 ; dic }
@name = name
end
def each
@freq_dic.each { |elem| yield elem }
end
def top_by_frequency(nth, &blk)
take_by_value(nth, lambda { |v| -v }, &blk)
end
def bottom_by_frequency(nth, &blk)
take_by_value(nth, lambda { |v| v }, &blk)
end
def top_by_length(nth, &blk)
list = take_by_key(nth, lambda { |word| -word.length }, &blk)
list.map { |word, freq| [word, freq, word.length] }
end
def select(regexp)
text = @freq_dic.select { |word, freq| word =~ regexp }
.select { |word, freq| block_given? ? yield(freq) : freq }
.map { |word, freq| "#{word} " * freq }
.join(" ")
WordDictionary.new(text, @name, true)
end
def to_s
@freq_dic.to_s
end
def size
@freq_dic.length
end
def +(other)
arithmetics(:+, other)
end
def -(other)
arithmetics(:-, other)
end
def &(other)
arithmetics(:&, other)
end
def |(other)
arithmetics(:|, other)
end
def uniq_words(nth, *base)
base.inject(self) { |_self, b| _self - b.select(/./) { |freq| freq >= 10 } }
.top_by_frequency(nth)
end
protected :words
private
def input_to_string(input, inner_call)
case input
when /^http/
begin
open(input) { |f| return f.read }
rescue Exception => e
puts e
exit
end
when String
begin
File.open(input, "r:utf-8") { |f| return f.read }
rescue
STDERR.puts "Argument has assumed as a text string" unless inner_call
input
end
when ARGF.class
input.read
else
raise "Wrong argument. ARGF, file or string are acceptable."
end
end
def take_by_value(nth, sort_opt, &blk)
val = lambda { |key, val| val }
take_by_key_or_val(nth, sort_opt, val, &blk)
end
def take_by_key(nth, sort_opt, &blk)
key = lambda { |key, val| key }
take_by_key_or_val(nth, sort_opt, key, &blk)
end
def take_by_key_or_val(nth, sort_opt, by)
@freq_dic.select { |key, val| block_given? ? yield(val) : val }
.take_by(nth) { |key, val| sort_opt[by[key, val]] }
end
def arithmetics(op, other)
result = (@words.send op, other.words).join(" ")
WordDictionary.new(result, '', true)
end
end
def pretty_print(data)
max_stars = 60
max_value = data.max_by { |word, freq| freq }.slice(1)
data.each do |word, freq|
stars = "*" * (max_stars * (freq/max_value.to_f)).ceil
printf "%5d:%-5s %s\n", freq, word, stars
end
end
if $0 == __FILE__
base = WordDictionary.new('./public/base.txt')
alice = WordDictionary.new('./public/alice.txt', :alice)
jp_history = WordDictionary.new('./public/japanese_history.txt')
p alice.uniq_words(40, base)
p jp_history.uniq_words(40, base)
end
require "topwords"
WIDTH = 510
WORD_POS = 23
WORD_WIDTH = 60
BAR_POS = WORD_POS + WORD_WIDTH
BAR_WIDTH = 350
MARGIN = 2
WORD_COLOR = "#253"
BG_COLOR = "#eed"
Shoes.app :width => WIDTH, :height => 650 do
background BG_COLOR
stack :height => 30 do
background WORD_COLOR
flow do
caption "Frequently Used Words", :stroke => BG_COLOR
button 'Show' do
pretty_print
end
@fpara = para 'select file', :stroke => BG_COLOR
button 'Open' do
filename = ask_open_file
@data = WordDictionary.new(filename).top_by_frequency(20)
@fpara.replace File.basename(filename)
end
end
end
@body = stack :margin_top => 10 do
def pretty_print
max_value = @data.max { |a, b| a[1] <=> b[1] }.slice(1)
@body.clear do
@data.each_with_index do |(word, val), nth|
rel_val = val/max_value.to_f
flow :width => WIDTH, :height => 30 do
para "#{nth+1}:", :stroke => WORD_COLOR
para strong(word), :width => WORD_WIDTH, :left => WORD_POS,
:align => 'left', :stroke => WORD_COLOR
bar = progress :width => BAR_WIDTH, :left => BAR_POS + MARGIN
freq = para ""
anim = animate 24 do |i|
int = i/100.0
bar.fraction = int * rel_val
if int >=1
anim.stop
freq.replace val, :left => BAR_POS + BAR_WIDTH + MARGIN,
:stroke => WORD_COLOR
end
end
end
end
end
end
end
end
require "rubygems"
require "sinatra"
require "topwords"
BOOKS = {:alice => ['public/alice.txt', "Alice's Adventures in Wonderland"],
:pride => ['public/pride_prejudice.txt', "Pride and Prejudice"],
:japan => ['public/japanese_history.txt', "A History of the Japanese People"],
:romeo => ['public/romeo_juliet.txt', "The Tragedy of Romeo and Juliet"],
:science => ['public/outline_of_science.txt', "Outline of Science"]}
BASE = ['public/base.txt', 'base']
get '/' do
init
@result = [@alice, @pride, @japan, @romeo, @science].map do |obj|
get_data(obj)
end
erb :index
end
post '/' do
begin
@input = params[:input]
dic = WordDictionary.new(@input)
@yours = get_data(dic).slice(1..-1)
erb :show
rescue
redirect '/'
end
end
helpers do
def init
@files = {}
BOOKS.each do |name, (file, title)|
instance_variable_set("@#{name}", WordDictionary.new(file, title) )
@files[title] = File.basename(file)
end
end
def get_data(obj)
base = WordDictionary.new(BASE[0], BASE[1])
[obj.name, obj.size, obj.top_by_frequency(30), obj.top_by_length(10), obj.uniq_words(20, base)]
end
end
__END__
@@layout
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html>
<head>
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
<title>WORDS in Books</title>
<style type="text/css" media="screen">
* { margin: 0; padding: 0;}
#side {float: right; padding:15px 10px 15px 10px; margin:5px 0 60em 3px; background:#471;}
#input {margin-top:20px; background:#aa7;}
#home_link {font-size:14pt; clear:left; margin:40px 0 0 20px;}
body {background:#222;}
a { text-decoration:none;}
a:link, a:visited {color:#ffc;}
a#home {color:#6b1;}
h1, #title { color:#6b1; margin:7px 0 3px 7px;}
h3.book_title { background:#606; color:#fc3; margin:5px 3px 3px 6px; padding:3px 10px 3px 10px; border-bottom:#300 solid 2px; border-right:#300 solid 2px; word-break: break-all;}
h3.book_title:hover {background:#93f;}
a:hover {color:#f6f;}
div.obj { width:350px; float:left; padding-bottom:5px;}
div.category { font-size:12pt; color:#f6f; font-weight:bolder; padding:3px 0 0 20px;}
div.item { font-size:14pt; color:#ffc; padding-left:30px;}
span#num { color:#ffc; margin-left:10px;}
</style>
</head>
<body id="main">
<div id="banner">
<h1><a href='/' id='title'>WORDS in Books</a></h1>
</div>
<div id="side">
<a href="http://www.gutenberg.org/wiki/Main_Page">Gutenberg</a><br/>
<a href="http://www.sinatrarb.com/">Sinatra</a><br/>
<a href="http://www8.plala.or.jp/abridge/ruby191class.html">Ruby 1.9.1 Methods List</a><br/>
<div class='input_form'>
<form action="/" method="POST" accept-charset="utf-8">
<textarea name="input" id="input" rows="20">英文または英文の置かれているURLを入力してください</textarea>
<p id='bottom'><input type="submit" value="Post"></p>
</form>
</div>
</div>
<div id="main">
<%= yield %>
</div>
</body>
</html>
@@ index
<% @result.each do |obj| %>
<div class='obj'>
<h3 class='book_title'><a href=<%= @files[obj[0]] %>><%= obj.shift %></a></h3>
<div class='category'>Total Words in Kind : <span id='num'><%= obj.shift %></span></div>
<div class='category'>Most Used Words</div>
<div class='item'>
<% obj.shift.each do |word, freq| %>
<%= word %>
<% end %>
</div>
<div class='category'>Longest Words</div>
<div class= 'item'>
<% obj.shift.each do |word, freq, len| %>
<%= word %>
<% end %>
</div>
<div class='category'>Characteristic Words</div>
<div class='item'>
<% obj.shift.each do |word, freq| %>
<%= word %>
<% end %>
</div>
</div>
<% end %>
@@ show
<div class='obj'>
<h3 class='book_title'><%= @input.slice(0...70) %></h3>
<div class='category'>Total Words in Kind : <%= @yours.shift %></div>
<div class='category'>Most Used Words</div>
<div class='item'>
<% @yours.shift.each do |word, freq| %>
<%= word %>
<% end %>
</div>
<div class='category'>Longest Words</div>
<div class= 'item'>
<% @yours.shift.each do |word, freq, len| %>
<%= word %>
<% end %>
</div>
<div class='category'>Characteristic Words</div>
<div class='item'>
<% @yours.shift.each do |word, freq| %>
<%= word %>
<% end %>
</div>
</div>
<br/><br/>
<div id='home_link'><a href='/' id='home'>Home</a></div>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment