Skip to content

Instantly share code, notes, and snippets.

@elvanja
Created September 10, 2020 13:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save elvanja/9e63ff2306481555fd194c5631cc4f95 to your computer and use it in GitHub Desktop.
Save elvanja/9e63ff2306481555fd194c5631cc4f95 to your computer and use it in GitHub Desktop.
Build Elasticsearch mapping schema from given Elixir struct's typespec
defmodule DataKiosk.Utils.BuildElasticsearchMapping do
@moduledoc """
██╗ ██╗███████╗██████╗ ███████╗ ██████╗ ███████╗ ██████╗ ██████╗ █████╗ ██████╗ ██████╗ ███╗ ██╗███████╗
██║ ██║██╔════╝██╔══██╗██╔════╝ ██╔══██╗██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔════╝ ██╔═══██╗████╗ ██║██╔════╝
███████║█████╗ ██████╔╝█████╗ ██████╔╝█████╗ ██║ ██║██████╔╝███████║██║ ███╗██║ ██║██╔██╗ ██║███████╗
██╔══██║██╔══╝ ██╔══██╗██╔══╝ ██╔══██╗██╔══╝ ██║ ██║██╔══██╗██╔══██║██║ ██║██║ ██║██║╚██╗██║╚════██║
██║ ██║███████╗██║ ██║███████╗ ██████╔╝███████╗ ██████╔╝██║ ██║██║ ██║╚██████╔╝╚██████╔╝██║ ╚████║███████║
╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚══════╝ ╚═════╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═══╝╚══════╝
Builds Elasticsearch mapping for given module.
Inspired by ["How to get typespec information from Elixir at runtime"](https://gist.github.com/JEG2/1685a9df2274ca5cf866122fa2dbc42d) gist.
See also [dialyxir issue 411](https://github.com/jeremyjh/dialyxir/issues/411).
It has been tested on several typespecs and suits this project's needs just fine.
But, it may (and most likely will) not work on all typespecs out there, simply because it is tailored for specific usage.
Consider yourself warned.
That being said, here is an example. Given these modules:
```
defmodule Plant do
@type type :: :fruit | :vegetable | nil
@type t :: %__MODULE__{
type: type(),
name: String.t(),
tags: list(atom())
}
defstruct [
:type,
:name,
:tags
]
end
defmodule Basket do
@type t :: %__MODULE__{
plants: list(Plant.t()),
weight: Decimal.t()
}
defstruct [
:plants,
:weight
]
end
```
The resulting Elasticsearch mapping would be:
```
%{
plants: %{
properties: %{
name: %{type: "text"},
tags: %{type: "keyword"},
type: %{type: "keyword"}
},
type: "nested"
},
weight: %{scaling_factor: 100, type: "scaled_float"}
}
```
Notes:
- plants list is mapped as [nested](https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html) type
- it does not require to be explicitly defined as list/array since that is automatic for Elasticsearch
- `Plant.type` is converted to [keyword](https://www.elastic.co/guide/en/elasticsearch/reference/current/keyword.html#keyword-field-type)
- same thing for `Plant.tags`, both have fixed values
- and finally `Basket.plants` is also converted as nested `Plant`
"""
require Logger
def given(module), do: %{properties: to_es(module, :t)}
defp to_es(module, type) do
{:ok, {^module, [{:abstract_code, {:raw_abstract_v1, attributes}}]}} =
module
|> :code.which()
|> :beam_lib.chunks([:abstract_code])
all_types =
attributes
|> Enum.filter(fn attribute ->
case attribute do
{:attribute, _, :type, _} -> true
_ -> false
end
end)
|> Enum.map(fn {:attribute, _, :type, {name, type_spec, _}} ->
{name, type_spec}
end)
|> Enum.into(%{})
main_type_spec = Map.get(all_types, type)
user_types =
all_types
|> Map.delete(type)
|> Enum.map(fn {name, type_spec} ->
case to_es(name, type_spec, []) do
{^name, %{properties: {_, spec}, type: "nested"}} -> {name, spec}
result -> result
end
end)
case main_type_spec do
{:type, _, :map, fields} ->
fields
|> Enum.map(fn {:type, _, :map_field_exact, [{:atom, _, field}, spec]} ->
to_es(field, spec, user_types)
end)
|> Enum.reject(&is_nil/1)
|> Enum.into(%{})
spec ->
to_es(type, spec, [])
end
end
defp to_es(:__struct__, _, _), do: nil
defp to_es(_, {:atom, _, value}, _), do: value
defp to_es(field, {:type, _, :list, [type]}, user_types),
do: to_es(field, type, user_types)
defp to_es(field, {:type, _, :union, types}, user_types) do
specs =
types
|> Enum.map(&to_es(field, &1, user_types))
|> Enum.reject(&is_nil/1)
if Enum.all?(specs, &is_atom/1) do
# module atoms are already handled by `&to_es/2` so here we have a list of atom values
{field, %{type: "keyword"}}
else
List.first(specs)
end
end
defp to_es(field, {:remote_type, _, [{:atom, _, String}, _, _]}, _),
do: {field, %{type: "text"}}
defp to_es(field, {:remote_type, _, [{:atom, _, Decimal}, _, _]}, _),
do: {field, %{type: "scaled_float", scaling_factor: 100}}
defp to_es(field, {:remote_type, _, [{:atom, _, module}, {:atom, 0, type}, _]}, _),
do: {field, %{type: "nested", properties: to_es(module, type)}}
defp to_es(field, {:type, _, :integer, _}, _), do: {field, %{type: "long"}}
defp to_es(field, {:type, _, :boolean, _}, _), do: {field, %{type: "boolean"}}
defp to_es(field, {:type, _, :map, _}, _), do: {field, %{type: "object"}}
defp to_es(field, {:type, _, :atom, _}, _), do: {field, %{type: "keyword"}}
defp to_es(field, {:user_type, _, type, _}, user_types), do: {field, user_types[type]}
defp to_es(field, spec, _) do
Logger.error("could not parse spec for field #{field}, spec: #{inspect(spec)}")
{field, :unknown}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment