Skip to content

Instantly share code, notes, and snippets.

@joshcrews
Forked from jorendorff/searcher.ex
Last active July 27, 2016 03:06
Show Gist options
  • Save joshcrews/ccddd07e9d6a8c7a8f3a8f9dee5201a8 to your computer and use it in GitHub Desktop.
Save joshcrews/ccddd07e9d6a8c7a8f3a8f9dee5201a8 to your computer and use it in GitHub Desktop.
# To run this code, you'll need the sample data (133 MB download, unzips to 492 MB):
# http://bit.ly/2avfASU
# tar xjf sample.tar.bz2
# `iex -S mix`
# `Elindex.Searcher.load_files`
# `Elindex.Searcher.search("Tim Cook")`
defmodule Elindex.Searcher do
def load_files do
Agent.start_link(fn -> [] end, name: :files_in_memory)
File.ls!("sample")
|> Enum.each(fn(filename) ->
text = Path.join("sample", filename) |> File.read!
title = String.split(text, "\n") |> List.first
{title, text}
Agent.update(:files_in_memory, fn list -> [{title, text}|list] end)
end)
end
def search(word) do
Agent.get(:files_in_memory, fn list -> list end)
|> Stream.map(fn({title, text}) ->
fn -> hit_info(text, word, title) end
end)
|> Stream.map(&Task.async/1)
|> Stream.map(&Task.await/1)
|> Stream.filter(fn({count, _}) -> count > 0 end)
|> Enum.sort(&>=/2)
|> Enum.take(10)
end
def hit_info(blob, word, title) do
count = Enum.count(Regex.scan(~r(\b#{word}\b), blob))
{count, title}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment