docker-compose run ruby-22
docker-compose run ruby-23
Last active
October 11, 2016 15:52
-
-
Save mtsmfm/38f46882c3d4ccde35c269594fc24ebc to your computer and use it in GitHub Desktop.
Ruby grapheme test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: '2' | |
services: | |
ruby-22: &ruby | |
image: ruby:2.2 | |
command: ruby test.rb | |
working_dir: /app | |
environment: | |
LANG: C.UTF-8 | |
LC_ALL: C.UTF-8 | |
volumes: | |
- .:/app | |
ruby-23: | |
<<: *ruby | |
image: ruby:2.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'open-uri' | |
require 'test/unit' | |
UNICODE_VERSION = | |
if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.3.0") | |
"8.0.0" | |
else | |
"7.0.0" | |
end | |
class TestGrapheme < Test::Unit::TestCase | |
# https://github.com/rails/rails/blob/v5.0.0.1/activesupport/test/multibyte_grapheme_break_conformance_test.rb#L37 | |
def test_breaks | |
each_line_of_break_tests do |*cols| | |
*clusters, comment = *cols | |
string = clusters.map {|c| c.pack("U*") }.join | |
assert_equal clusters, string.scan(/\X/).map(&:codepoints), comment | |
end | |
end | |
def each_line_of_break_tests(&block) | |
lines = 0 | |
max_test_lines = 0 # Don't limit below 21, because that's the header of the testfile | |
URI.parse("http://www.unicode.org/Public/#{UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt").open do |f| | |
until f.eof? || (max_test_lines > 21 && lines > max_test_lines) | |
lines += 1 | |
line = f.gets.chomp! | |
next if line.empty? || line.start_with?("#") | |
cols, comment = line.split("#") | |
# Cluster breaks are represented by ÷ | |
clusters = cols.split("÷").map { |e| e.strip }.reject { |e| e.empty? } | |
clusters = clusters.map do |cluster| | |
# Codepoints within each cluster are separated by × | |
codepoints = cluster.split("×").map { |e| e.strip }.reject { |e| e.empty? } | |
# codepoints are in hex in the test suite, pack wants them as integers | |
codepoints.map { |codepoint| codepoint.to_i(16) } | |
end | |
# The tests contain a solitary U+D800 <Non Private Use High | |
# Surrogate, First> character, which Ruby does not allow to stand | |
# alone in a UTF-8 string. So we'll just skip it. | |
next if clusters.flatten.include?(0xd800) | |
clusters << comment.strip | |
yield(*clusters) | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment