Skip to content

Instantly share code, notes, and snippets.

@garborg
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save garborg/13a360f809ee8a74d890 to your computer and use it in GitHub Desktop.
Save garborg/13a360f809ee8a74d890 to your computer and use it in GitHub Desktop.
if !isfile("text8")
download("http://mattmahoney.net/dc/text8.zip", "text8.zip")
run(`unzip text8.zip`)
end
if !isfile("lil8")
str = readall("text8")
open("lil8", "w") do io
n = fld(length(str), 20)
write(io, str.data[1:n])
end
end
function wc(path)
counts = Dict{String,Int}()
io = open(path, "r")
for line in eachline(io)
for word in split(line)
counts[word] = get(counts, word, 0) + 1
end
end
close(io)
cut = 10
guys = collect(counts)
sort!(guys, by = x->x[2], rev = true)
for (word, count) in guys
if count > cut
println(word, '\t', count)
end
end
end
gc()
Profile.clear()
@profile wc("lil8")
Profile.print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment