Skip to content

Instantly share code, notes, and snippets.

@ceolinrenato
Last active April 28, 2021 19:18
Show Gist options
  • Save ceolinrenato/ff40b85aa60e9d560741f96034cae773 to your computer and use it in GitHub Desktop.
Save ceolinrenato/ff40b85aa60e9d560741f96034cae773 to your computer and use it in GitHub Desktop.
n_grams.exs
# Renato Ceolin - 10/28/2020
# Built with elixir 1.11
# To run the code use the comamnd `elixir n_grams.exs`
# Tests will be run after the script execution
defmodule NGrams do
@moduledoc """
NGrams are continguos sequences of words in a sentence
"""
@doc """
Returns an array containing all the n-grams that can be generated from `sentence`.
## Examples
iex> NGrams.from_sentence("Show me the code.")
["Show me the code", "Show me the", "me the code", "Show me", "me the", "the code", "Show", "me", "the", "code"]
"""
@spec from_sentence(String.t()) :: [String.t()]
def from_sentence(sentence) do
~r<[[:punct:]]>
|> Regex.replace(sentence, "")
|> String.split()
|> get_sublists()
|> form_ngrams()
end
defp get_sublists([]), do: []
defp get_sublists([_ | remaining_words] = words) do
Enum.concat(
[words],
get_sublists(remaining_words)
)
end
defp form_ngrams([]), do: []
defp form_ngrams(list) do
{_, new_list} = List.pop_at(list, -1)
list
|> List.zip()
|> Enum.map(fn tuple ->
tuple
|> Tuple.to_list()
|> Enum.join(" ")
end)
|> Kernel.++(form_ngrams(new_list))
end
end
sentence = IO.gets("Enter a sentence: ")
IO.puts("N-grams for the sentence `#{String.trim(sentence)}`:")
sentence
|> NGrams.from_sentence()
|> Enum.each(&IO.puts/1)
ExUnit.start()
defmodule NGramsTest do
@moduledoc false
use ExUnit.Case, async: true
describe "from_sentence/1" do
test "must remove punctuation" do
assert ["Hello World", "Hello", "World"] == NGrams.from_sentence("Hello... World!")
end
test "must ignores ascii control sequences" do
assert ["Hello World", "Hello", "World"] ==
NGrams.from_sentence("Hello... \n World! \r\t\n")
end
test "must work with 5 words sequence" do
assert [
"Lorem ipsum dolor amet ipsum",
"Lorem ipsum dolor amet",
"ipsum dolor amet ipsum",
"Lorem ipsum dolor",
"ipsum dolor amet",
"dolor amet ipsum",
"Lorem ipsum",
"ipsum dolor",
"dolor amet",
"amet ipsum",
"Lorem",
"ipsum",
"dolor",
"amet",
"ipsum"
] == NGrams.from_sentence("Lorem ipsum dolor amet ipsum")
end
test "must work with the challenge spec example" do
assert [
"Show me the code",
"Show me the",
"me the code",
"Show me",
"me the",
"the code",
"Show",
"me",
"the",
"code"
] == NGrams.from_sentence("Show me the code.")
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment