Skip to content

Instantly share code, notes, and snippets.

@ScottPJones
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ScottPJones/4e6e8938f0559998f9fc to your computer and use it in GitHub Desktop.
Save ScottPJones/4e6e8938f0559998f9fc to your computer and use it in GitHub Desktop.
Tests for string conversions
# issue #11004 (#10959)
module TestConvert
global counttest = 0
global counterr = 0
incrtest() = (global counttest += 1)
increrr() = (global counterr += 1)
T = ""
byt = 0x0
macro testn(nam, arg)
quote
incrtest()
if !eval($(esc(arg)))
increrr()
print("Fail: ")
else
print("Pass: ");
end
println($nam)
end
end
macro test_throws(err, nam, arg)
quote
incrtest()
try
eval($(esc(arg)))
increrr()
print("Fail: ")
catch x;
print("Pass: ");
end
println($nam)
end
end
# Create some ASCII, UTF8, UTF16, and UTF32 strings
strAscii = "abcdefgh"
strA_8 = ("abcdefgh\uff")[1:8]
strL_8 = "abcdef\uff\uff"
str2_8 = "abcd\uff\uff\u7ff\u7ff"
str3_8 = "abcd\uff\uff\u7fff\u7fff"
str4_8 = "abcd\uff\u7ff\u7fff\U7ffff"
strS_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
strC_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000")
strZ_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80")
strz_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0")
strA_16 = utf16(strA_8)
strL_16 = utf16(strL_8)
str2_16 = utf16(str2_8)
str3_16 = utf16(str3_8)
str4_16 = utf16(str4_8)
strS_16 = utf16(strS_8)
strA_32 = utf32(strA_8)
strL_32 = utf32(strL_8)
str2_32 = utf32(str2_8)
str3_32 = utf32(str3_8)
str4_32 = utf32(str4_8)
strS_32 = utf32(strS_8)
@testn "utf8(strAscii) == strAscii" utf8(strAscii) == strAscii
@testn "utf16(strAscii) == strAscii" utf16(strAscii) == strAscii
@testn "utf32(strAscii) == strAscii" utf32(strAscii) == strAscii
for (strUTF8, strUTF16, strUTF32) in ((strA_8, strA_16, strA_32),
(strL_8, strL_16, strL_32),
(str2_8, str2_16, str2_32),
(str3_8, str3_16, str3_32),
(str4_8, str4_16, str4_32))
println("strUTF8 = $strUTF8, strUTF16 = $strUTF16, strUTF32 = $strUTF32")
@testn "utf16(strUTF8) == strUTF16" utf16(strUTF8) == strUTF16
@testn "utf32(strUTF8) == strUTF32" utf32(strUTF8) == strUTF32
@testn "utf8(strUTF16) == strUTF8" utf8(strUTF16) == strUTF8
@testn "utf32(strUTF16) == strUTF32" utf32(strUTF16) == strUTF32
@testn "utf8(strUTF32) == strUTF8" utf8(strUTF32) == strUTF8
@testn "utf16(strUTF32) == strUTF16" utf16(strUTF32) == strUTF16
end
# Test converting surrogate pairs
@testn "utf16(strS_8) == strC_8" utf16(strS_8) == strC_8
@testn "utf32(strS_8) == strC_8" utf32(strS_8) == strC_8
@testn "utf8(strS_16) == strC_8" utf8(strS_16) == strC_8
@testn "utf32(strS_16) == strC_8" utf32(strS_16) == strC_8
@testn "utf8(strS_32) == strC_8" utf8(strS_32) == strC_8
@testn "utf16(strS_32) == strC_8" utf16(strS_32) == strC_8
# Test converting overlong \0
# currently broken! (in utf8.jl)
@testn "utf8(strZ_8) == strz_8" utf8(strZ_8) == strz_8
@testn "utf16(strZ_8) == strz_8" utf16(strZ_8) == strz_8
@testn "utf32(strZ_8) == strz_8" utf32(strZ_8) == strz_8
# Test invalid sequences
for T in (UTF8String, UTF16String, UTF32String)
try
# Continuation byte not after lead
for byt in 0x80:0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt]))
end
# Test lead bytes
for byt in 0xc0:0xff
# Single lead byte at end of string
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt]))
# Lead followed by non-continuation character < 0x80
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0]))" convert(T, UTF8String(UInt8[byt,0]))
# Lead followed by non-continuation character > 0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0xc0]))" convert(T, UTF8String(UInt8[byt,0xc0]))
end
# Test overlong 2-byte
for byt in 0x81:0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc0,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc0,byt]))
end
for byt in 0x80:0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc1,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc1,byt]))
end
# Test overlong 3-byte
for byt in 0x80:0x9f
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xe0,0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[0xe0,byt,0x80]))
end
# Test overlong 4-byte
for byt in 0x80:0x8f
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xef,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80]))
end
# Test 4-byte > 0x10ffff
for byt in 0x90:0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xf4,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80]))
end
for byt in 0xf5:0xf7
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80]))
end
# Test 5-byte
for byt in 0xf8:0xfb
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80]))
end
# Test 6-byte
for byt in 0xfc:0xfd
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
end
# Test 7-byte
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))
# Three and above byte sequences
for byt in 0xe0:0xef
# Lead followed by only 1 continuation byte
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[byt,0x80]))
# Lead ended by non-continuation character < 0x80
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0]))
# Lead ended by non-continuation character > 0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0xc0]))
end
# 3-byte encoded surrogate character(s)
# Single surrogate
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80]))
# Not followed by surrogate
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
# Trailing surrogate first
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
# Followed by lead surrogate
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))
# Four byte sequences
for byt in 0xf0:0xf4
# Lead followed by only 2 continuation bytes
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80]))
# Lead followed by non-continuation character < 0x80
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0]))
# Lead followed by non-continuation character > 0xbf
@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0]))
end
catch exp ;
println("Error checking $T: 0x$(hex(byt))")
throw(exp)
end
end
println("Out of $(TestConvert.counttest) tests, $(TestConvert.counterr) failed")
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment