Created
April 8, 2013 18:21
-
-
Save anonymous/5339185 to your computer and use it in GitHub Desktop.
Some test for [bug 8210](http://bugs.ruby-lang.org/issues/8210)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: UTF-8 | |
# see [s.o. question](http://stackoverflow.com/questions/15779859/does-multibyte-character-interfere-with-end-line-character-within-a-regex) | |
p RUBY_VERSION | |
p __ENCODING__ unless RUBY_VERSION.start_with? "1.8" | |
class String | |
def last | |
self[-1,1] | |
end | |
end | |
# TEST 1 | |
s = "んにちは" | |
p "TEST 1: \"#{s}\" where last character \"#{s.last}\" is #{s.last.bytes.size} bytes" | |
# TEST 1.1 | |
test_for_zero_or_one_of = "ん" | |
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}" | |
# TEST 1.2 | |
test_for_zero_or_one_of = "ç" | |
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil' }" | |
n = s =~ /x?#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /x?#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}" | |
# TEST 1.3 | |
test_for_zero_or_one_of = "\n" | |
p "test for zero or one of \n [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
n = s =~ /\n?\z/u | |
p "s =~ /\n?\z/u #=> #{n ? n : 'nil' }" | |
n = s =~ /\n?\n?\z/u | |
p "s =~ /\n?\n?\z/u #=> #{n ? n : 'nil'}" | |
n = s =~ /\n?\n?\n?\z/u | |
p "s =~ /\n?\n?\n?\z/u #=> #{n ? n : 'nil'}" | |
p "NB: if the last multi-byte character of the string is 3 bytes , then the 'zero or one before' test only works when we test for at least 3 bytes (not 3 character) before" | |
###### | |
# TEST 2 | |
s = "in French there is the ç" | |
s_last_character = "ç" | |
p "TEST 2: \"#{s}\" where last character \"#{s.last}\" is #{s_last_character.bytes.size} bytes" | |
# TEST 2.1 | |
test_for_zero_or_one_of = "ん" | |
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}" | |
# TEST 2.2 | |
test_for_zero_or_one_of = "é" | |
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil' }" | |
n = s =~ /x?#{test_for_zero_or_one_of}?\z/u | |
p "s =~ /x?#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}" | |
# TEST 2.3 | |
test_for_zero_or_one_of = "\n" | |
p "test for zero or one of \n [#{test_for_zero_or_one_of.bytes.size} bytes]" | |
n = s =~ /#{test_for_zero_or_one_of}?\z/u | |
n = s =~ /\n?\z/u | |
p "s =~ /\n?\z/u #=> #{n ? n : 'nil' }" | |
n = s =~ /\n?\n?\z/u | |
p "s =~ /\n?\n?\z/u #=> #{n ? n : 'nil'}" | |
n = s =~ /\n?\n?\n?\z/u | |
p "s =~ /\n?\n?\n?\z/u #=> #{n ? n : 'nil'}" | |
p "NB: if the last multi-byte character of the string is 2 bytes , then the 'zero or one before' test only works when we test for at least 2 bytes (not 2 character) before" | |
###### | |
# TEST 3 | |
p "TEST 3 - When the multi-byte character is not at the end of the string there is no problem" | |
p "\"ç\" bytes: #{"ç".bytes.to_a.join(",")}" | |
s = "abçd" | |
p "string is \"#{s}\"" | |
n = s =~ /x?b/u | |
p " zero or one x preceding b, match:#{n}" | |
n = s =~ /x?d/u | |
p " zero or one x preceding d, match:#{n}" | |
n = s =~ /x?x?d/u | |
p " zero or two x preceding d, match:#{n}" | |
n = s =~ /x?ç/u | |
p " zero or one x preceding ç, match:#{n}" | |
n = s =~ /x?x?ç/u | |
p " zero or two x preceding ç, match:#{n}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment