garthk/telemetry_tracing_experiment_test.exs

## telemetry_tracing_experiment_test.exs
defmodule TelemetryTracingExperimentTest do
  use ExUnit.Case, async: true

  @doc """
  Dump a variable to standard output, ANSI formatted, and pretty.
  """
  def dump(v) do
    opts = IEx.Config.inspect_opts()

    [:reset, "\n", get_in(opts, [:syntax_colors, :reset]) || :reset, inspect(v, opts), :reset]
    |> IO.ANSI.format()
    |> IO.puts()
  end

  @doc """
  Dump a string back out, formatted as the function call.
  """
  def explain(s) do
    opts = IEx.Config.inspect_opts()

    [
      :reset,
      "\nexplain ",
      get_in(opts, [:syntax_colors, :string]) || :reset,
      "\"\"\"",
      for line <- s |> String.trim_trailing() |> String.split("\n") do
        ["\n", line]
      end,
      "\n\"\"\"",
      :reset
    ]
    |> IO.ANSI.format()
    |> IO.puts()
  end

  @doc """
  Dump some code to standard output, then run it, and `dump/1` its result also.
  """
  defmacro loudly(do: block) do
    code =
      Macro.to_string(
        quote generated: true do
          loudly do
            unquote(block)
          end
        end
      )

    quote do
      IO.puts(["\n", unquote(code)])

      dump(unquote(block))
    end
  end

  @tag :experiments
  test "the whole thing" do
    explain("""
    How long is a second, in :timer native units? Let's measure a tenth of a second:
    """)

    loudly do
      {d, :ok} = :timer.tc(fn -> 1 |> :timer.seconds() |> div(10) |> :timer.sleep() end)
      d * 10
    end

    explain("""
    1_000_000 and change, right? Looks like microseconds. Let's shift it to milliseconds.
    If we get 100, we're good:
    """)

    loudly do
      System.convert_time_unit(d, :microsecond, :millisecond)
    end

    explain("""
    How many times can we call :telemetry.span/3 in a second if there are no handlers?
    First, let's enforce our assumptions:
    """)

    loudly do
      event_prefix = [:d597a6960262870e]
      for %{id: id} <- :telemetry.list_handlers(event_prefix), do: :telemetry.detach(id)
    end

    explain("""
    If we separate out, as much as possible:

    * The code being measured (`{:ok, arg}`)
    * The code instrumenting it with `:telemetry.span/3` (the rest of `measure_me`)
    * The benchmark (the call to `:timer.tc/1`)

    ... and measure 10K results, we'll get a rough idea of the overhead:
    """)

    loudly do
      measure_me = fn arg ->
        ref = make_ref()
        start_and_exception_metadata = %{arg: arg, ref: ref}

        :telemetry.span(event_prefix, start_and_exception_metadata, fn ->
          # THIS IS OUR ONLY LINE OF BUSINESS LOGIC CODE:
          result = {:ok, arg}
          # Back to the overhead:
          stop_metadata = %{arg: arg, ref: ref, result: result}
          {result, stop_metadata}
        end)
      end

      count = 10_000
      {d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      assert length(result) == count
      assert hd(result) == {:ok, 1}
      assert result |> Enum.reverse() |> hd() == {:ok, count}
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain("""
    ~900K per second on a server CPU? Library vendors should feel safe sprinkling `:telemetry`
    about. Anyone attaching handlers should think of the extra impact, though. What if there's a
    handler attached to :start?
    """)

    loudly do
      handler_id = make_ref()
      handler = fn _n, _m10s, _m6a, _c -> :... end
      :telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain("""
    ~800K calls per second isn't bad, either.
    How about three handlers, one of which is almost never called?
    """)

    loudly do
      handler_id = make_ref()
      handler = fn _n, _m10s, _m6a, _c -> :... end
      :telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)
      :telemetry.attach(handler_id, event_prefix ++ [:stop], handler, nil)
      :telemetry.attach(handler_id, event_prefix ++ [:exception], handler, nil)

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain("""
    Three handlers that try to start and stop spans?
    """)

    loudly do
      handler_id = make_ref()

      handle_start = fn _n, _m10s, %{ref: _}, _c ->
        :ocp.with_child_span("benchmark")
      end

      handle_stop = fn _n, _m10s, %{ref: _}, _c ->
        :ocp.set_status(0, "OK")
        :ocp.finish_span()
      end

      handle_exception = fn _n, _m10s, %{ref: _}, _c ->
        :ocp.set_status(2, "FAIL")
        :ocp.finish_span()
      end

      :telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
      :telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

      :telemetry.attach(
        {handler_id, :exception},
        event_prefix ++ [:exception],
        handle_exception,
        nil
      )

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain("""
    ~50K? Ouch! Not bad if we're wrapping something taking 1ms (~2% overhead?) or more,
    but tracing is definitely heavier than telemetry.

    We got ahead of ourselves, though. How about if we did tracing directly?
    """)

    loudly do
      self_tracing = fn arg ->
        :ocp.with_child_span("benchmark")

        try do
          {:ok, arg}
        after
          :ocp.set_status(0, "OK")
          :ocp.finish_span()
        end
      end

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: self_tracing.(i) end)

      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain("""
    Faster than doing it via :telemetry.span/3, but not that much faster. Back to the original
    version, let's get defensive. TODO explain concerns.
    """)

    loudly do
      pdict_key = :d597a6960262870e
      handler_id = make_ref()

      handle_start = fn _n, _m10s, %{ref: ref}, _c ->
        previous_span_ctx = :ocp.current_span_ctx()
        span_ctx = :oc_trace.start_span("benchmark", previous_span_ctx)
        :ocp.with_span_ctx(span_ctx)

        pdict_key
        |> Process.get(%{})
        |> put_in([ref], {previous_span_ctx, span_ctx})
        |> Process.put(pdict_key)
      end

      handle_stop = fn _n, _m10s, %{ref: ref}, _c ->
        state = Process.get(pdict_key, %{})
        {previously, state} = Map.pop(state, ref)
        Process.put(pdict_key, state)

        case {previously, :ocp.current_span_ctx()} do
          {nil, span_ctx} ->
            # can't be defensive; started without ref?
            previous_span_ctx = :oc_trace.parent_span_ctx(span_ctx)
            :oc_trace.set_status(0, "OK", span_ctx)
            :oc_trace.finish_span(span_ctx)
            :ocp.with_span_ctx(previous_span_ctx)

          {{previous_span_ctx, span_ctx}, span_ctx} ->
            # nothing bad happened!
            :oc_trace.set_status(0, "OK", span_ctx)
            :oc_trace.finish_span(span_ctx)
            :ocp.with_span_ctx(previous_span_ctx)

          {{previous_span_ctx, span_ctx}, other_span_ctx} ->
            IO.warn("span mismatch")

            # expecting more push than pop, let's wind up the other one
            :oc_trace.set_status(2, "UNKNOWN", other_span_ctx)
            :oc_trace.finish_span(other_span_ctx)

            # ... and then back to normal:
            :oc_trace.set_status(0, "OK", span_ctx)
            :oc_trace.finish_span(span_ctx)
            :ocp.with_span_ctx(previous_span_ctx)
        end
      end

      handle_exception = fn _n, _m10s, %{ref: _}, _c ->
        # TODO also do all that defensive span popping:
        :ocp.set_status(2, "FAIL")
        :ocp.finish_span()
      end

      :telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
      :telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

      :telemetry.attach(
        {handler_id, :exception},
        event_prefix ++ [:exception],
        handle_exception,
        nil
      )

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end
  end
end

## via_telemetry_test.exs
defmodule ViaTelemetryTest do
  @moduledoc "Tracing hooked up with Telemetry, OpenTelemetry version."

  use ExUnit.Case, async: true

  alias OpenTelemetry.Tracer

  require OpenTelemetry.Tracer

  @doc """
  Dump a variable to standard output, ANSI formatted, and pretty.
  """
  def dump(v) do
    opts = IEx.Config.inspect_opts()

    [:reset, "\n", get_in(opts, [:syntax_colors, :reset]) || :reset, inspect(v, opts), :reset]
    |> IO.ANSI.format()
    |> IO.puts()
  end

  @doc """
  Dump a string back out, formatted as the function call.
  """
  def explain(s) do
    opts = IEx.Config.inspect_opts()

    [
      :reset,
      "\nexplain ",
      get_in(opts, [:syntax_colors, :string]) || :reset,
      "\"\"\"",
      for line <- s |> String.trim_trailing() |> String.split("\n") do
        ["\n", line]
      end,
      "\n\"\"\"",
      :reset
    ]
    |> IO.ANSI.format()
    |> IO.puts()
  end

  @doc """
  Dump some code to standard output, then run it, and `dump/1` its result also.
  """
  defmacro loudly(do: block) do
    code =
      Macro.to_string(
        quote generated: true do
          loudly do
            unquote(block)
          end
        end
      )

    quote do
      IO.puts(["\n", unquote(code)])

      unquote(block)
      |> dump()
    end
  end

  @tag :experiments
  test "the whole thing" do
    explain """
    TODO explain why I'm checking this
    """

    loudly do
      :opentelemetry.get_tracer(__MODULE__)
    end

    explain """
    How long is a second, in :timer native units? Let's measure a tenth of a second:
    """

    loudly do
      {d, :ok} = :timer.tc(fn -> 1 |> :timer.seconds() |> div(10) |> :timer.sleep() end)
      d * 10
    end

    explain """
    1_000_000 and change, right? Looks like microseconds. Let's shift it to milliseconds.
    If we get 100, we're good:
    """

    loudly do
      System.convert_time_unit(d, :microsecond, :millisecond)
    end

    explain """
    How many times can we call :telemetry.span/3 in a second if there are no handlers?
    First, let's enforce our assumptions:
    """

    loudly do
      event_prefix = [:d597a6960262870e]
      for %{id: id} <- :telemetry.list_handlers(event_prefix), do: :telemetry.detach(id)
    end

    explain """
    If we separate out, as much as possible:

    * The code being measured (`{:ok, arg}`)
    * The code instrumenting it with `:telemetry.span/3` (the rest of `measure_me`)
    * The benchmark (the call to `:timer.tc/1`)

    ... and measure 10K results, we'll get a rough idea of the overhead:
    """

    loudly do
      measure_me = fn arg ->
        ref = make_ref()
        start_and_exception_metadata = %{arg: arg, ref: ref}

        :telemetry.span(event_prefix, start_and_exception_metadata, fn ->
          # THIS IS OUR ONLY LINE OF BUSINESS LOGIC CODE:
          result = {:ok, arg}
          # Back to the overhead:
          stop_metadata = %{arg: arg, ref: ref, result: result}
          {result, stop_metadata}
        end)
      end

      count = 10_000
      {d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      assert length(result) == count
      assert hd(result) == {:ok, 1}
      assert result |> Enum.reverse() |> hd() == {:ok, count}
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain """
    ~900K per second on a server CPU? Library vendors should feel safe sprinkling `:telemetry`
    about. Anyone attaching handlers should think of the extra impact, though. What if there's a
    handler attached to :start?
    """

    loudly do
      handler_id = make_ref()
      handler = fn _n, _m10s, _m6a, _c -> :... end
      :telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)

      {d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain """
    ~800K calls per second isn't bad, either.
    How about three handlers, one of which is almost never called?
    """

    loudly do
      handler_id = make_ref()
      handler = fn _n, _m10s, _m6a, _c -> :... end
      :telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)
      :telemetry.attach(handler_id, event_prefix ++ [:stop], handler, nil)
      :telemetry.attach(handler_id, event_prefix ++ [:exception], handler, nil)

      {d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

      :telemetry.detach(handler_id)
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    explain """
    Three handlers that try to start and stop spans?
    """

    loudly do
      handler_id = make_ref()

      handle_start = fn _n, _m10s, %{ref: _}, _c ->
        Tracer.start_span("benchmark")
      end

      handle_stop = fn _n, _m10s, %{ref: _}, _c ->
        Tracer.end_span()
      end

      handle_exception = fn _n, _m10s, %{ref: _}, _c ->
        Tracer.set_status("Unknown")
        Tracer.end_span()
      end

      :telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
      :telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

      :telemetry.attach(
        {handler_id, :exception},
        event_prefix ++ [:exception],
        handle_exception,
        nil
      )

      {d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)
      for event <- [:start, :stop, :exception], do: :telemetry.detach({handler_id, event})
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    loudly do
      {:embarrassing_trailing_span_ctx?, OpenTelemetry.Tracer.current_span_ctx()}
    end

    explain """
    ~50K? Ouch! Not bad if we're wrapping something taking 1ms (~2% overhead?) or more,
    but tracing is definitely heavier than telemetry.

    We got ahead of ourselves, though. How about if we did tracing directly?
    """

    loudly do
      self_tracing = fn arg ->
        Tracer.start_span("benchmark")

        try do
          {:ok, arg}
        after
          Tracer.end_span()
        end
      end

      {d, result} = :timer.tc(fn -> for i <- 1..count, do: self_tracing.(i) end)

      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end

    loudly do
      {:embarrassing_trailing_span_ctx?, OpenTelemetry.Tracer.current_span_ctx()}
    end

    explain """
    Faster than doing it via :telemetry.span/3, but not that much faster. Back to the original
    version, let's get defensive. TODO explain concerns.
    """

    loudly do
      # :ok = :dbg.stop_clear()

      # {:ok, _} =
      #   :dbg.tracer(
      #     :process,
      #     {fn message, :state ->
      #        dump(message)
      #        :state
      #      end, :state}
      #   )

      for m <- [:ot_tracer, Process], do: :dbg.tp(m, [{:_, [], [{:return_trace}]}])
      :dbg.p(:all, :c)

      pdict_key = :d597a6960262870e
      handler_id = make_ref()
      tracer = :opentelemetry.get_tracer(__MODULE__)

      handle_start = fn _n, _m10s, %{ref: ref}, _c ->
        # get the previous span_ctx manually
        # TODO find way to reach create_span to avoid repeating :ot_ctx.get_value/2
        previous_span_ctx = :ot_tracer.current_span_ctx(tracer)
        span_ctx = :ot_tracer.start_span(tracer, "benchmark", %{})

        state = Process.get(pdict_key, %{})
        state = put_in(state, [ref], {previous_span_ctx, span_ctx})
        Process.put(pdict_key, state)
      end

      handle_stop = fn _n, _m10s, %{ref: ref}, _c ->
        state = Process.get(pdict_key, %{})
        {previously, state} = Map.pop(state, ref)
        Process.put(pdict_key, state)

        case {previously, Tracer.current_span_ctx()} do
          {nil, span_ctx} ->
            IO.puts("NO PREVIOUS #{__ENV__.file}:#{__ENV__.line}")
            # can't be defensive; started without ref?
            :ot_tracer.end_span(tracer, span_ctx)
            # automatically sets parent?
            # TODO remove crasher:
            0 = 1

          {{previous_span_ctx, span_ctx}, span_ctx} ->
            # nothing bad happened! end span, implicitly taking previous_span_ctx
            :ot_tracer.end_span(tracer, span_ctx)

          # assert :ot_tracer.current_span_ctx(tracer) == previous_span_ctx

          {{previous_span_ctx, span_ctx}, other_span_ctx} ->
            IO.puts("MISMATCH #{__ENV__.file}:#{__ENV__.line}")

            IO.warn("span mismatch")

            # expecting more push than pop, let's wind up the other one
            :ot_span.set_status(tracer, other_span_ctx, "Unknown")
            :ot_tracer.end_span(tracer, other_span_ctx)

            # ... and then back to normal:
            :ot_tracer.end_span(tracer, span_ctx)
            # :ot_tracer.set_span(tracer, previous_span_ctx)
            assert :ot_tracer.current_span_ctx(tracer) == previous_span_ctx
        end
      end

      handle_exception = fn _n, _m10s, %{ref: _}, _c ->
        # TODO also do all that defensive span popping:
        :ocp.set_status(2, "FAIL")
        Tracer.end_span()
      end

      :telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
      :telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

      :telemetry.attach(
        {handler_id, :exception},
        event_prefix ++ [:exception],
        handle_exception,
        nil
      )

      {d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)
      for event <- [:start, :stop, :exception], do: :telemetry.detach({handler_id, event})
      :ok = :dbg.stop_clear()
      %{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
    end
  end
end
	defmodule TelemetryTracingExperimentTest do
	use ExUnit.Case, async: true

	@doc """
	Dump a variable to standard output, ANSI formatted, and pretty.
	"""
	def dump(v) do
	opts = IEx.Config.inspect_opts()

	[:reset, "\n", get_in(opts, [:syntax_colors, :reset]) \|\| :reset, inspect(v, opts), :reset]
	\|> IO.ANSI.format()
	\|> IO.puts()
	end

	@doc """
	Dump a string back out, formatted as the function call.
	"""
	def explain(s) do
	opts = IEx.Config.inspect_opts()

	[
	:reset,
	"\nexplain ",
	get_in(opts, [:syntax_colors, :string]) \|\| :reset,
	"\"\"\"",
	for line <- s \|> String.trim_trailing() \|> String.split("\n") do
	["\n", line]
	end,
	"\n\"\"\"",
	:reset
	]
	\|> IO.ANSI.format()
	\|> IO.puts()
	end

	@doc """
	Dump some code to standard output, then run it, and `dump/1` its result also.
	"""
	defmacro loudly(do: block) do
	code =
	Macro.to_string(
	quote generated: true do
	loudly do
	unquote(block)
	end
	end
	)

	quote do
	IO.puts(["\n", unquote(code)])

	dump(unquote(block))
	end
	end

	@tag :experiments
	test "the whole thing" do
	explain("""
	How long is a second, in :timer native units? Let's measure a tenth of a second:
	""")

	loudly do
	{d, :ok} = :timer.tc(fn -> 1 \|> :timer.seconds() \|> div(10) \|> :timer.sleep() end)
	d * 10
	end

	explain("""
	1_000_000 and change, right? Looks like microseconds. Let's shift it to milliseconds.
	If we get 100, we're good:
	""")

	loudly do
	System.convert_time_unit(d, :microsecond, :millisecond)
	end

	explain("""
	How many times can we call :telemetry.span/3 in a second if there are no handlers?
	First, let's enforce our assumptions:
	""")

	loudly do
	event_prefix = [:d597a6960262870e]
	for %{id: id} <- :telemetry.list_handlers(event_prefix), do: :telemetry.detach(id)
	end

	explain("""
	If we separate out, as much as possible:

	* The code being measured (`{:ok, arg}`)
	* The code instrumenting it with `:telemetry.span/3` (the rest of `measure_me`)
	* The benchmark (the call to `:timer.tc/1`)

	... and measure 10K results, we'll get a rough idea of the overhead:
	""")

	loudly do
	measure_me = fn arg ->
	ref = make_ref()
	start_and_exception_metadata = %{arg: arg, ref: ref}

	:telemetry.span(event_prefix, start_and_exception_metadata, fn ->
	# THIS IS OUR ONLY LINE OF BUSINESS LOGIC CODE:
	result = {:ok, arg}
	# Back to the overhead:
	stop_metadata = %{arg: arg, ref: ref, result: result}
	{result, stop_metadata}
	end)
	end

	count = 10_000
	{d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	assert length(result) == count
	assert hd(result) == {:ok, 1}
	assert result \|> Enum.reverse() \|> hd() == {:ok, count}
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain("""
	~900K per second on a server CPU? Library vendors should feel safe sprinkling `:telemetry`
	about. Anyone attaching handlers should think of the extra impact, though. What if there's a
	handler attached to :start?
	""")

	loudly do
	handler_id = make_ref()
	handler = fn _n, _m10s, _m6a, _c -> :... end
	:telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain("""
	~800K calls per second isn't bad, either.
	How about three handlers, one of which is almost never called?
	""")

	loudly do
	handler_id = make_ref()
	handler = fn _n, _m10s, _m6a, _c -> :... end
	:telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)
	:telemetry.attach(handler_id, event_prefix ++ [:stop], handler, nil)
	:telemetry.attach(handler_id, event_prefix ++ [:exception], handler, nil)

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain("""
	Three handlers that try to start and stop spans?
	""")

	loudly do
	handler_id = make_ref()

	handle_start = fn _n, _m10s, %{ref: _}, _c ->
	:ocp.with_child_span("benchmark")
	end

	handle_stop = fn _n, _m10s, %{ref: _}, _c ->
	:ocp.set_status(0, "OK")
	:ocp.finish_span()
	end

	handle_exception = fn _n, _m10s, %{ref: _}, _c ->
	:ocp.set_status(2, "FAIL")
	:ocp.finish_span()
	end

	:telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
	:telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

	:telemetry.attach(
	{handler_id, :exception},
	event_prefix ++ [:exception],
	handle_exception,
	nil
	)

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain("""
	~50K? Ouch! Not bad if we're wrapping something taking 1ms (~2% overhead?) or more,
	but tracing is definitely heavier than telemetry.

	We got ahead of ourselves, though. How about if we did tracing directly?
	""")

	loudly do
	self_tracing = fn arg ->
	:ocp.with_child_span("benchmark")

	try do
	{:ok, arg}
	after
	:ocp.set_status(0, "OK")
	:ocp.finish_span()
	end
	end

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: self_tracing.(i) end)

	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain("""
	Faster than doing it via :telemetry.span/3, but not that much faster. Back to the original
	version, let's get defensive. TODO explain concerns.
	""")

	loudly do
	pdict_key = :d597a6960262870e
	handler_id = make_ref()

	handle_start = fn _n, _m10s, %{ref: ref}, _c ->
	previous_span_ctx = :ocp.current_span_ctx()
	span_ctx = :oc_trace.start_span("benchmark", previous_span_ctx)
	:ocp.with_span_ctx(span_ctx)

	pdict_key
	\|> Process.get(%{})
	\|> put_in([ref], {previous_span_ctx, span_ctx})
	\|> Process.put(pdict_key)
	end

	handle_stop = fn _n, _m10s, %{ref: ref}, _c ->
	state = Process.get(pdict_key, %{})
	{previously, state} = Map.pop(state, ref)
	Process.put(pdict_key, state)

	case {previously, :ocp.current_span_ctx()} do
	{nil, span_ctx} ->
	# can't be defensive; started without ref?
	previous_span_ctx = :oc_trace.parent_span_ctx(span_ctx)
	:oc_trace.set_status(0, "OK", span_ctx)
	:oc_trace.finish_span(span_ctx)
	:ocp.with_span_ctx(previous_span_ctx)

	{{previous_span_ctx, span_ctx}, span_ctx} ->
	# nothing bad happened!
	:oc_trace.set_status(0, "OK", span_ctx)
	:oc_trace.finish_span(span_ctx)
	:ocp.with_span_ctx(previous_span_ctx)

	{{previous_span_ctx, span_ctx}, other_span_ctx} ->
	IO.warn("span mismatch")

	# expecting more push than pop, let's wind up the other one
	:oc_trace.set_status(2, "UNKNOWN", other_span_ctx)
	:oc_trace.finish_span(other_span_ctx)

	# ... and then back to normal:
	:oc_trace.set_status(0, "OK", span_ctx)
	:oc_trace.finish_span(span_ctx)
	:ocp.with_span_ctx(previous_span_ctx)
	end
	end

	handle_exception = fn _n, _m10s, %{ref: _}, _c ->
	# TODO also do all that defensive span popping:
	:ocp.set_status(2, "FAIL")
	:ocp.finish_span()
	end

	:telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
	:telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

	:telemetry.attach(
	{handler_id, :exception},
	event_prefix ++ [:exception],
	handle_exception,
	nil
	)

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end
	end
	end
	defmodule ViaTelemetryTest do
	@moduledoc "Tracing hooked up with Telemetry, OpenTelemetry version."

	use ExUnit.Case, async: true

	alias OpenTelemetry.Tracer

	require OpenTelemetry.Tracer

	@doc """
	Dump a variable to standard output, ANSI formatted, and pretty.
	"""
	def dump(v) do
	opts = IEx.Config.inspect_opts()

	[:reset, "\n", get_in(opts, [:syntax_colors, :reset]) \|\| :reset, inspect(v, opts), :reset]
	\|> IO.ANSI.format()
	\|> IO.puts()
	end

	@doc """
	Dump a string back out, formatted as the function call.
	"""
	def explain(s) do
	opts = IEx.Config.inspect_opts()

	[
	:reset,
	"\nexplain ",
	get_in(opts, [:syntax_colors, :string]) \|\| :reset,
	"\"\"\"",
	for line <- s \|> String.trim_trailing() \|> String.split("\n") do
	["\n", line]
	end,
	"\n\"\"\"",
	:reset
	]
	\|> IO.ANSI.format()
	\|> IO.puts()
	end

	@doc """
	Dump some code to standard output, then run it, and `dump/1` its result also.
	"""
	defmacro loudly(do: block) do
	code =
	Macro.to_string(
	quote generated: true do
	loudly do
	unquote(block)
	end
	end
	)

	quote do
	IO.puts(["\n", unquote(code)])

	unquote(block)
	\|> dump()
	end
	end

	@tag :experiments
	test "the whole thing" do
	explain """
	TODO explain why I'm checking this
	"""

	loudly do
	:opentelemetry.get_tracer(__MODULE__)
	end

	explain """
	How long is a second, in :timer native units? Let's measure a tenth of a second:
	"""

	loudly do
	{d, :ok} = :timer.tc(fn -> 1 \|> :timer.seconds() \|> div(10) \|> :timer.sleep() end)
	d * 10
	end

	explain """
	1_000_000 and change, right? Looks like microseconds. Let's shift it to milliseconds.
	If we get 100, we're good:
	"""

	loudly do
	System.convert_time_unit(d, :microsecond, :millisecond)
	end

	explain """
	How many times can we call :telemetry.span/3 in a second if there are no handlers?
	First, let's enforce our assumptions:
	"""

	loudly do
	event_prefix = [:d597a6960262870e]
	for %{id: id} <- :telemetry.list_handlers(event_prefix), do: :telemetry.detach(id)
	end

	explain """
	If we separate out, as much as possible:

	* The code being measured (`{:ok, arg}`)
	* The code instrumenting it with `:telemetry.span/3` (the rest of `measure_me`)
	* The benchmark (the call to `:timer.tc/1`)

	... and measure 10K results, we'll get a rough idea of the overhead:
	"""

	loudly do
	measure_me = fn arg ->
	ref = make_ref()
	start_and_exception_metadata = %{arg: arg, ref: ref}

	:telemetry.span(event_prefix, start_and_exception_metadata, fn ->
	# THIS IS OUR ONLY LINE OF BUSINESS LOGIC CODE:
	result = {:ok, arg}
	# Back to the overhead:
	stop_metadata = %{arg: arg, ref: ref, result: result}
	{result, stop_metadata}
	end)
	end

	count = 10_000
	{d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	assert length(result) == count
	assert hd(result) == {:ok, 1}
	assert result \|> Enum.reverse() \|> hd() == {:ok, count}
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain """
	~900K per second on a server CPU? Library vendors should feel safe sprinkling `:telemetry`
	about. Anyone attaching handlers should think of the extra impact, though. What if there's a
	handler attached to :start?
	"""

	loudly do
	handler_id = make_ref()
	handler = fn _n, _m10s, _m6a, _c -> :... end
	:telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)

	{d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain """
	~800K calls per second isn't bad, either.
	How about three handlers, one of which is almost never called?
	"""

	loudly do
	handler_id = make_ref()
	handler = fn _n, _m10s, _m6a, _c -> :... end
	:telemetry.attach(handler_id, event_prefix ++ [:start], handler, nil)
	:telemetry.attach(handler_id, event_prefix ++ [:stop], handler, nil)
	:telemetry.attach(handler_id, event_prefix ++ [:exception], handler, nil)

	{d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)

	:telemetry.detach(handler_id)
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	explain """
	Three handlers that try to start and stop spans?
	"""

	loudly do
	handler_id = make_ref()

	handle_start = fn _n, _m10s, %{ref: _}, _c ->
	Tracer.start_span("benchmark")
	end

	handle_stop = fn _n, _m10s, %{ref: _}, _c ->
	Tracer.end_span()
	end

	handle_exception = fn _n, _m10s, %{ref: _}, _c ->
	Tracer.set_status("Unknown")
	Tracer.end_span()
	end

	:telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
	:telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

	:telemetry.attach(
	{handler_id, :exception},
	event_prefix ++ [:exception],
	handle_exception,
	nil
	)

	{d, result} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)
	for event <- [:start, :stop, :exception], do: :telemetry.detach({handler_id, event})
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	loudly do
	{:embarrassing_trailing_span_ctx?, OpenTelemetry.Tracer.current_span_ctx()}
	end

	explain """
	~50K? Ouch! Not bad if we're wrapping something taking 1ms (~2% overhead?) or more,
	but tracing is definitely heavier than telemetry.

	We got ahead of ourselves, though. How about if we did tracing directly?
	"""

	loudly do
	self_tracing = fn arg ->
	Tracer.start_span("benchmark")

	try do
	{:ok, arg}
	after
	Tracer.end_span()
	end
	end

	{d, result} = :timer.tc(fn -> for i <- 1..count, do: self_tracing.(i) end)

	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end

	loudly do
	{:embarrassing_trailing_span_ctx?, OpenTelemetry.Tracer.current_span_ctx()}
	end

	explain """
	Faster than doing it via :telemetry.span/3, but not that much faster. Back to the original
	version, let's get defensive. TODO explain concerns.
	"""

	loudly do
	# :ok = :dbg.stop_clear()

	# {:ok, _} =
	# :dbg.tracer(
	# :process,
	# {fn message, :state ->
	# dump(message)
	# :state
	# end, :state}
	# )

	for m <- [:ot_tracer, Process], do: :dbg.tp(m, [{:_, [], [{:return_trace}]}])
	:dbg.p(:all, :c)

	pdict_key = :d597a6960262870e
	handler_id = make_ref()
	tracer = :opentelemetry.get_tracer(__MODULE__)

	handle_start = fn _n, _m10s, %{ref: ref}, _c ->
	# get the previous span_ctx manually
	# TODO find way to reach create_span to avoid repeating :ot_ctx.get_value/2
	previous_span_ctx = :ot_tracer.current_span_ctx(tracer)
	span_ctx = :ot_tracer.start_span(tracer, "benchmark", %{})

	state = Process.get(pdict_key, %{})
	state = put_in(state, [ref], {previous_span_ctx, span_ctx})
	Process.put(pdict_key, state)
	end

	handle_stop = fn _n, _m10s, %{ref: ref}, _c ->
	state = Process.get(pdict_key, %{})
	{previously, state} = Map.pop(state, ref)
	Process.put(pdict_key, state)

	case {previously, Tracer.current_span_ctx()} do
	{nil, span_ctx} ->
	IO.puts("NO PREVIOUS #{__ENV__.file}:#{__ENV__.line}")
	# can't be defensive; started without ref?
	:ot_tracer.end_span(tracer, span_ctx)
	# automatically sets parent?
	# TODO remove crasher:
	0 = 1

	{{previous_span_ctx, span_ctx}, span_ctx} ->
	# nothing bad happened! end span, implicitly taking previous_span_ctx
	:ot_tracer.end_span(tracer, span_ctx)

	# assert :ot_tracer.current_span_ctx(tracer) == previous_span_ctx

	{{previous_span_ctx, span_ctx}, other_span_ctx} ->
	IO.puts("MISMATCH #{__ENV__.file}:#{__ENV__.line}")

	IO.warn("span mismatch")

	# expecting more push than pop, let's wind up the other one
	:ot_span.set_status(tracer, other_span_ctx, "Unknown")
	:ot_tracer.end_span(tracer, other_span_ctx)

	# ... and then back to normal:
	:ot_tracer.end_span(tracer, span_ctx)
	# :ot_tracer.set_span(tracer, previous_span_ctx)
	assert :ot_tracer.current_span_ctx(tracer) == previous_span_ctx
	end
	end

	handle_exception = fn _n, _m10s, %{ref: _}, _c ->
	# TODO also do all that defensive span popping:
	:ocp.set_status(2, "FAIL")
	Tracer.end_span()
	end

	:telemetry.attach({handler_id, :start}, event_prefix ++ [:start], handle_start, nil)
	:telemetry.attach({handler_id, :stop}, event_prefix ++ [:stop], handle_stop, nil)

	:telemetry.attach(
	{handler_id, :exception},
	event_prefix ++ [:exception],
	handle_exception,
	nil
	)

	{d, _} = :timer.tc(fn -> for i <- 1..count, do: measure_me.(i) end)
	for event <- [:start, :stop, :exception], do: :telemetry.detach({handler_id, event})
	:ok = :dbg.stop_clear()
	%{calls_per_sec: div(1_000_000 * count, d), each_µs: Float.floor(d / count, 1)}
	end
	end
	end