Created
September 13, 2012 04:57
-
-
Save bellthoven/3711976 to your computer and use it in GitHub Desktop.
Profiling codepoints
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Instructions: | |
- Apply the patch | |
- Compile elixir | |
- Run the tests | |
$ git apply /path/to/patch.diff | |
$ make compile | |
$ ./bin/elixir native.ex > /tmp/native.ex | |
$ ./bin/elixir unicode.ex > /tmp/unicode.ex | |
In my machine, native implementation was ~1.3x faster. Feel free to add more tests. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule CodepointsProfiler do | |
def string, do: "áéíàà ¨&$#" | |
def unicode do | |
Enum.map 1..100, fn(x) -> String.codepoints_unicode("#{x}#{string}") end | |
end | |
def native do | |
Enum.map 1..100, fn(x) -> String.codepoints("#{x}#{string}") end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Code.require_file "codepoints_profiler.ex" | |
Erlang.fprof.apply fn -> CodepointsProfiler.native end, [] | |
Erlang.fprof.profile | |
Erlang.fprof.analyse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex | |
index 08868fc..5c3d2b7 100644 | |
--- a/lib/elixir/lib/string.ex | |
+++ b/lib/elixir/lib/string.ex | |
@@ -287,6 +287,33 @@ defmodule String do | |
Erlang.binary.copy(subject, n) | |
end | |
+ def codepoints_unicode(string) do | |
+ to_char = fn(char) -> :unicode.characters_to_binary([char], :utf8, :utf8) end | |
+ Enum.map(:unicode.characters_to_list(string, :utf8), to_char) | |
+ end | |
+ | |
+ @doc """ | |
+ Returns a list with codepoints | |
+ """ | |
+ def codepoints(string) do | |
+ codepoints(string, []) | |
+ end | |
+ | |
+ def codepoints(<<194, char, rest :: binary>>, buffer) when char in 161..191 do | |
+ codepoints(rest, buffer ++ [<<194, char>>]) | |
+ end | |
+ | |
+ def codepoints(<<195, char, rest :: binary>>, buffer) when char in 128..191 do | |
+ codepoints(rest, buffer ++ [<<195, char>>]) | |
+ end | |
+ | |
+ def codepoints(<<other, rest :: binary>>, buffer) do | |
+ codepoints(rest, buffer ++ [<<other>>]) | |
+ end | |
+ | |
+ def codepoints(<<>>, buffer) do | |
+ buffer | |
+ end | |
defp translate_replace_options([]), do: [] | |
defp translate_replace_options(raw_options) do | |
diff --git a/lib/elixir/test/elixir/string_test.exs b/lib/elixir/test/elixir/string_test.exs | |
index c9afa9f..dc4d3b5 100644 | |
--- a/lib/elixir/test/elixir/string_test.exs | |
+++ b/lib/elixir/test/elixir/string_test.exs | |
@@ -77,4 +77,10 @@ defmodule StringTest do | |
assert String.duplicate("&ã$", 2) == "&ã$&ã$" | |
end | |
+ test :codepoins do | |
+ assert String.codepoints("&é%Á#") == ["&","é","%","Á","#"] | |
+ assert String.codepoints("Élysés") == ["É","l","y","s","é","s"] | |
+ assert String.codepoints_unicode("áá") == ["á", "á"] | |
+ end | |
+ | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Code.require_file "codepoints_profiler.ex" | |
Erlang.fprof.apply fn -> CodepointsProfiler.unicode end, [] | |
Erlang.fprof.profile | |
Erlang.fprof.analyse |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment