Skip to content

Instantly share code, notes, and snippets.

@l3kn
Created July 25, 2014 16:17
Show Gist options
  • Save l3kn/7ccea4898b70c404031a to your computer and use it in GitHub Desktop.
Save l3kn/7ccea4898b70c404031a to your computer and use it in GitHub Desktop.
defmodule NLP do
def ngrams(0, _), do: []
def ngrams(_, []), do: []
def ngrams(n, list) do
ngrams(n, list, [])
end
def ngrams(n, list, acc) do
ngram = Enum.take(list, n)
if length(ngram) == n do
ngrams(n, tl(list), [ngram|acc])
else
acc
end
end
def words(string) do
String.split(string, " ")
end
def pmap(collection, fun) do
me = self
collection
|>
Enum.map(fn (elem) ->
spawn_link(fn -> send(me, { self, fun.(elem) }) end)
end)
|>
Enum.map(fn (pid) ->
receive do { ^pid, result } -> result end
end)
end
def dir_to_bigrams(path) do
files = Enum.take(File.ls!(path), 20)
|> Enum.map(fn filename -> Path.join(path, filename) end)
|> pmap(fn filename -> file_to_bigrams(filename) end)
|> Enum.reduce([], fn(e, acc) -> acc ++ e end)
end
def file_to_bigrams(name) do
{:ok, data} = File.read(name)
data
|> String.split("\n")
|> Enum.map(&(String.to_char_list &1))
|> Enum.map(&(ngrams 2, &1))
|> Enum.reduce([], fn(e, acc) -> acc ++ e end)
|> Enum.reject(fn([a,b]) -> (a == 32) or (b == 32) end)
end
def count(_, acc \\ HashDict.new)
def count([], acc), do: acc
def count([x|xs], acc) do
if Dict.has_key?(acc, x) do
acc_new = Dict.update!(acc, x, fn(val) -> val + 1 end)
else
acc_new = Dict.put(acc, x, 1)
end
count(xs, acc_new)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment