Skip to content

Instantly share code, notes, and snippets.

@bellthoven
Created September 13, 2012 04:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bellthoven/3711976 to your computer and use it in GitHub Desktop.
Save bellthoven/3711976 to your computer and use it in GitHub Desktop.
Profiling codepoints
Instructions:
- Apply the patch
- Compile elixir
- Run the tests
$ git apply /path/to/patch.diff
$ make compile
$ ./bin/elixir native.ex > /tmp/native.ex
$ ./bin/elixir unicode.ex > /tmp/unicode.ex
In my machine, native implementation was ~1.3x faster. Feel free to add more tests.
defmodule CodepointsProfiler do
def string, do: "áéíàà ¨&$#"
def unicode do
Enum.map 1..100, fn(x) -> String.codepoints_unicode("#{x}#{string}") end
end
def native do
Enum.map 1..100, fn(x) -> String.codepoints("#{x}#{string}") end
end
end
Code.require_file "codepoints_profiler.ex"
Erlang.fprof.apply fn -> CodepointsProfiler.native end, []
Erlang.fprof.profile
Erlang.fprof.analyse
diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex
index 08868fc..5c3d2b7 100644
--- a/lib/elixir/lib/string.ex
+++ b/lib/elixir/lib/string.ex
@@ -287,6 +287,33 @@ defmodule String do
Erlang.binary.copy(subject, n)
end
+ def codepoints_unicode(string) do
+ to_char = fn(char) -> :unicode.characters_to_binary([char], :utf8, :utf8) end
+ Enum.map(:unicode.characters_to_list(string, :utf8), to_char)
+ end
+
+ @doc """
+ Returns a list with codepoints
+ """
+ def codepoints(string) do
+ codepoints(string, [])
+ end
+
+ def codepoints(<<194, char, rest :: binary>>, buffer) when char in 161..191 do
+ codepoints(rest, buffer ++ [<<194, char>>])
+ end
+
+ def codepoints(<<195, char, rest :: binary>>, buffer) when char in 128..191 do
+ codepoints(rest, buffer ++ [<<195, char>>])
+ end
+
+ def codepoints(<<other, rest :: binary>>, buffer) do
+ codepoints(rest, buffer ++ [<<other>>])
+ end
+
+ def codepoints(<<>>, buffer) do
+ buffer
+ end
defp translate_replace_options([]), do: []
defp translate_replace_options(raw_options) do
diff --git a/lib/elixir/test/elixir/string_test.exs b/lib/elixir/test/elixir/string_test.exs
index c9afa9f..dc4d3b5 100644
--- a/lib/elixir/test/elixir/string_test.exs
+++ b/lib/elixir/test/elixir/string_test.exs
@@ -77,4 +77,10 @@ defmodule StringTest do
assert String.duplicate("&ã$", 2) == "&ã$&ã$"
end
+ test :codepoins do
+ assert String.codepoints("&é%Á#") == ["&","é","%","Á","#"]
+ assert String.codepoints("Élysés") == ["É","l","y","s","é","s"]
+ assert String.codepoints_unicode("áá") == ["á", "á"]
+ end
+
end
Code.require_file "codepoints_profiler.ex"
Erlang.fprof.apply fn -> CodepointsProfiler.unicode end, []
Erlang.fprof.profile
Erlang.fprof.analyse
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment