写了个用lua解析英文单词和统计的小玩意,方便了解每篇英文文章的单词统计数据。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/lua | |
require "io" | |
local args = {...} | |
if #args < 1 then | |
print("Please specify the file path.") | |
print("./words.lua as.txt") | |
return | |
end | |
local file = assert(io.open(args[1], "r")) | |
local count = 0 | |
local words = {} | |
while true do | |
local line, rest = file:read(2^13, "*line") | |
if not line then break end | |
for w in string.gmatch(line, "%a+") do | |
if words[w] then | |
words[w] = 1 + words[w] | |
else | |
words[w] = 1 | |
end | |
end | |
end | |
local merged_words = {} | |
for k,v in pairs(words) do | |
local _t = merged_words[v] | |
if _t then | |
_t.Words = _t.Words .. "|" .. k | |
else | |
merged_words[v] = {Count = v, Words = k} | |
end | |
count = count + 1 | |
end | |
local sorted_words = {} | |
table.foreach(merged_words, function(k, v) table.insert(sorted_words, v) end) | |
table.sort(sorted_words, function(a, b) | |
return a.Count > b.Count end) | |
table.foreach(sorted_words, | |
function(k, v) | |
print(string.format("%d = %s", v.Count, v.Words)) | |
end) | |
print(string.format("%s = %d", "Total is", count)) | |
print(string.format("%s = %d", "Lines is", #sorted_words)) | |
file:close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment