public
anonymous / gist:5339185
Created

Some test for [bug 8210](http://bugs.ruby-lang.org/issues/8210)

  • Download Gist
gistfile1.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
# encoding: UTF-8
 
# see [s.o. question](http://stackoverflow.com/questions/15779859/does-multibyte-character-interfere-with-end-line-character-within-a-regex)
 
p RUBY_VERSION
p __ENCODING__ unless RUBY_VERSION.start_with? "1.8"
 
class String
def last
self[-1,1]
end
end
 
# TEST 1
s = "んにちは"
p "TEST 1: \"#{s}\" where last character \"#{s.last}\" is #{s.last.bytes.size} bytes"
# TEST 1.1
test_for_zero_or_one_of = "ん"
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}"
# TEST 1.2
test_for_zero_or_one_of = "ç"
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil' }"
n = s =~ /x?#{test_for_zero_or_one_of}?\z/u
p "s =~ /x?#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}"
# TEST 1.3
test_for_zero_or_one_of = "\n"
p "test for zero or one of \n [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
n = s =~ /\n?\z/u
p "s =~ /\n?\z/u #=> #{n ? n : 'nil' }"
n = s =~ /\n?\n?\z/u
p "s =~ /\n?\n?\z/u #=> #{n ? n : 'nil'}"
n = s =~ /\n?\n?\n?\z/u
p "s =~ /\n?\n?\n?\z/u #=> #{n ? n : 'nil'}"
p "NB: if the last multi-byte character of the string is 3 bytes , then the 'zero or one before' test only works when we test for at least 3 bytes (not 3 character) before"
 
######
# TEST 2
s = "in French there is the ç"
s_last_character = "ç"
p "TEST 2: \"#{s}\" where last character \"#{s.last}\" is #{s_last_character.bytes.size} bytes"
# TEST 2.1
test_for_zero_or_one_of = "ん"
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}"
# TEST 2.2
test_for_zero_or_one_of = "é"
p "test for zero or one of #{test_for_zero_or_one_of} [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
p "s =~ /#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil' }"
n = s =~ /x?#{test_for_zero_or_one_of}?\z/u
p "s =~ /x?#{test_for_zero_or_one_of}?\z/u #=> #{n ? n : 'nil'}"
# TEST 2.3
test_for_zero_or_one_of = "\n"
p "test for zero or one of \n [#{test_for_zero_or_one_of.bytes.size} bytes]"
n = s =~ /#{test_for_zero_or_one_of}?\z/u
n = s =~ /\n?\z/u
p "s =~ /\n?\z/u #=> #{n ? n : 'nil' }"
n = s =~ /\n?\n?\z/u
p "s =~ /\n?\n?\z/u #=> #{n ? n : 'nil'}"
n = s =~ /\n?\n?\n?\z/u
p "s =~ /\n?\n?\n?\z/u #=> #{n ? n : 'nil'}"
p "NB: if the last multi-byte character of the string is 2 bytes , then the 'zero or one before' test only works when we test for at least 2 bytes (not 2 character) before"
 
######
# TEST 3
p "TEST 3 - When the multi-byte character is not at the end of the string there is no problem"
p "\"ç\" bytes: #{"ç".bytes.to_a.join(",")}"
s = "abçd"
p "string is \"#{s}\""
n = s =~ /x?b/u
p " zero or one x preceding b, match:#{n}"
n = s =~ /x?d/u
p " zero or one x preceding d, match:#{n}"
n = s =~ /x?x?d/u
p " zero or two x preceding d, match:#{n}"
n = s =~ /x?ç/u
p " zero or one x preceding ç, match:#{n}"
n = s =~ /x?x?ç/u
p " zero or two x preceding ç, match:#{n}"

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.