Skip to content

Instantly share code, notes, and snippets.

@cuixin
Created May 5, 2013 19:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cuixin/5521867 to your computer and use it in GitHub Desktop.
Save cuixin/5521867 to your computer and use it in GitHub Desktop.
写了个用lua解析英文单词和统计的小玩意,方便了解每篇英文文章的单词统计数据。
#!/usr/local/bin/lua
require "io"
local args = {...}
if #args < 1 then
print("Please specify the file path.")
print("./words.lua as.txt")
return
end
local file = assert(io.open(args[1], "r"))
local count = 0
local words = {}
while true do
local line, rest = file:read(2^13, "*line")
if not line then break end
for w in string.gmatch(line, "%a+") do
if words[w] then
words[w] = 1 + words[w]
else
words[w] = 1
end
end
end
local merged_words = {}
for k,v in pairs(words) do
local _t = merged_words[v]
if _t then
_t.Words = _t.Words .. "|" .. k
else
merged_words[v] = {Count = v, Words = k}
end
count = count + 1
end
local sorted_words = {}
table.foreach(merged_words, function(k, v) table.insert(sorted_words, v) end)
table.sort(sorted_words, function(a, b)
return a.Count > b.Count end)
table.foreach(sorted_words,
function(k, v)
print(string.format("%d = %s", v.Count, v.Words))
end)
print(string.format("%s = %d", "Total is", count))
print(string.format("%s = %d", "Lines is", #sorted_words))
file:close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment