ScottPJones/testcvt.jl

## testcvt.jl
# issue #11004 (#10959)

module TestConvert

global counttest = 0
global counterr  = 0

incrtest() = (global counttest += 1)
increrr() = (global counterr += 1)

T = ""
byt = 0x0

macro testn(nam, arg)
    quote
    incrtest()
    if !eval($(esc(arg)))
        increrr()
        print("Fail: ")
    else
        print("Pass: ");
    end
    println($nam)
    end
end

macro test_throws(err, nam, arg)
    quote
    incrtest()
    try
        eval($(esc(arg)))
        increrr()
        print("Fail: ")
    catch x;
        print("Pass: ");
    end
    println($nam)
    end
end

# Create some ASCII, UTF8, UTF16, and UTF32 strings
strAscii = "abcdefgh"
strA_8 = ("abcdefgh\uff")[1:8]
strL_8 = "abcdef\uff\uff"
str2_8 = "abcd\uff\uff\u7ff\u7ff"
str3_8 = "abcd\uff\uff\u7fff\u7fff"
str4_8 = "abcd\uff\u7ff\u7fff\U7ffff"
strS_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
strC_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000")
strZ_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80")
strz_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0")

strA_16 = utf16(strA_8)
strL_16 = utf16(strL_8)
str2_16 = utf16(str2_8)
str3_16 = utf16(str3_8)
str4_16 = utf16(str4_8)
strS_16 = utf16(strS_8)
strA_32 = utf32(strA_8)
strL_32 = utf32(strL_8)
str2_32 = utf32(str2_8)
str3_32 = utf32(str3_8)
str4_32 = utf32(str4_8)
strS_32 = utf32(strS_8)
@testn "utf8(strAscii) == strAscii"  utf8(strAscii) == strAscii
@testn "utf16(strAscii) == strAscii" utf16(strAscii) == strAscii
@testn "utf32(strAscii) == strAscii" utf32(strAscii) == strAscii
for (strUTF8, strUTF16, strUTF32) in ((strA_8, strA_16, strA_32),
                                      (strL_8, strL_16, strL_32),
                                      (str2_8, str2_16, str2_32),
                                      (str3_8, str3_16, str3_32),
                                      (str4_8, str4_16, str4_32))
    println("strUTF8 = $strUTF8, strUTF16 = $strUTF16, strUTF32 = $strUTF32")
    @testn "utf16(strUTF8) == strUTF16" utf16(strUTF8) == strUTF16
    @testn "utf32(strUTF8) == strUTF32" utf32(strUTF8) == strUTF32
    @testn "utf8(strUTF16) == strUTF8" utf8(strUTF16) == strUTF8
    @testn "utf32(strUTF16) == strUTF32" utf32(strUTF16) == strUTF32
    @testn "utf8(strUTF32)  == strUTF8" utf8(strUTF32)  == strUTF8
    @testn "utf16(strUTF32) == strUTF16" utf16(strUTF32)  == strUTF16
end

# Test converting surrogate pairs
@testn "utf16(strS_8) == strC_8"  utf16(strS_8) == strC_8
@testn "utf32(strS_8) == strC_8"  utf32(strS_8) == strC_8
@testn "utf8(strS_16) == strC_8"  utf8(strS_16) == strC_8
@testn "utf32(strS_16) == strC_8" utf32(strS_16) == strC_8
@testn "utf8(strS_32)  == strC_8" utf8(strS_32) == strC_8
@testn "utf16(strS_32) == strC_8" utf16(strS_32) == strC_8

# Test converting overlong \0
# currently broken! (in utf8.jl)
@testn "utf8(strZ_8)  == strz_8" utf8(strZ_8)  == strz_8
@testn "utf16(strZ_8) == strz_8" utf16(strZ_8) == strz_8
@testn "utf32(strZ_8) == strz_8" utf32(strZ_8) == strz_8

# Test invalid sequences

for T in (UTF8String, UTF16String, UTF32String)
    try
    # Continuation byte not after lead
    for byt in 0x80:0xbf
        @test_throws ArgumentError "convert($T,  UTF8String(UInt8[0x$(hex(byt))]))" convert(T,  UTF8String(UInt8[byt]))
    end

    # Test lead bytes
    for byt in 0xc0:0xff
        # Single lead byte at end of string
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt]))
        # Lead followed by non-continuation character < 0x80
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0]))" convert(T, UTF8String(UInt8[byt,0]))
        # Lead followed by non-continuation character > 0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0xc0]))" convert(T, UTF8String(UInt8[byt,0xc0]))
    end

    # Test overlong 2-byte
    for byt in 0x81:0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc0,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc0,byt]))
    end
    for byt in 0x80:0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc1,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc1,byt]))
    end

    # Test overlong 3-byte
    for byt in 0x80:0x9f
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xe0,0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[0xe0,byt,0x80]))
    end

    # Test overlong 4-byte
    for byt in 0x80:0x8f
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xef,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80]))
    end

    # Test 4-byte > 0x10ffff
    for byt in 0x90:0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xf4,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80]))
    end
    for byt in 0xf5:0xf7
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80]))
    end

    # Test 5-byte
    for byt in 0xf8:0xfb
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80]))
    end

    # Test 6-byte
    for byt in 0xfc:0xfd
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
    end

    # Test 7-byte
    @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))

    # Three and above byte sequences
    for byt in 0xe0:0xef
        # Lead followed by only 1 continuation byte
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[byt,0x80]))
        # Lead ended by non-continuation character < 0x80
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0]))
        # Lead ended by non-continuation character > 0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0xc0]))
    end

    # 3-byte encoded surrogate character(s)
    # Single surrogate
    @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80]))
    # Not followed by surrogate
    @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
    # Trailing surrogate first
    @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
    # Followed by lead surrogate
    @test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))

    # Four byte sequences
    for byt in 0xf0:0xf4
        # Lead followed by only 2 continuation bytes
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80]))
        # Lead followed by non-continuation character < 0x80
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0]))
        # Lead followed by non-continuation character > 0xbf
        @test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0]))
    end
    catch exp ;
        println("Error checking $T: 0x$(hex(byt))")
        throw(exp)
    end
end

println("Out of $(TestConvert.counttest) tests, $(TestConvert.counterr) failed")

end
	# issue #11004 (#10959)

	module TestConvert

	global counttest = 0
	global counterr = 0

	incrtest() = (global counttest += 1)
	increrr() = (global counterr += 1)

	T = ""
	byt = 0x0

	macro testn(nam, arg)
	quote
	incrtest()
	if !eval($(esc(arg)))
	increrr()
	print("Fail: ")
	else
	print("Pass: ");
	end
	println($nam)
	end
	end

	macro test_throws(err, nam, arg)
	quote
	incrtest()
	try
	eval($(esc(arg)))
	increrr()
	print("Fail: ")
	catch x;
	print("Pass: ");
	end
	println($nam)
	end
	end

	# Create some ASCII, UTF8, UTF16, and UTF32 strings
	strAscii = "abcdefgh"
	strA_8 = ("abcdefgh\uff")[1:8]
	strL_8 = "abcdef\uff\uff"
	str2_8 = "abcd\uff\uff\u7ff\u7ff"
	str3_8 = "abcd\uff\uff\u7fff\u7fff"
	str4_8 = "abcd\uff\u7ff\u7fff\U7ffff"
	strS_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
	strC_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\U10000")
	strZ_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xc0\x80")
	strz_8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\0")

	strA_16 = utf16(strA_8)
	strL_16 = utf16(strL_8)
	str2_16 = utf16(str2_8)
	str3_16 = utf16(str3_8)
	str4_16 = utf16(str4_8)
	strS_16 = utf16(strS_8)
	strA_32 = utf32(strA_8)
	strL_32 = utf32(strL_8)
	str2_32 = utf32(str2_8)
	str3_32 = utf32(str3_8)
	str4_32 = utf32(str4_8)
	strS_32 = utf32(strS_8)
	@testn "utf8(strAscii) == strAscii" utf8(strAscii) == strAscii
	@testn "utf16(strAscii) == strAscii" utf16(strAscii) == strAscii
	@testn "utf32(strAscii) == strAscii" utf32(strAscii) == strAscii
	for (strUTF8, strUTF16, strUTF32) in ((strA_8, strA_16, strA_32),
	(strL_8, strL_16, strL_32),
	(str2_8, str2_16, str2_32),
	(str3_8, str3_16, str3_32),
	(str4_8, str4_16, str4_32))
	println("strUTF8 = $strUTF8, strUTF16 = $strUTF16, strUTF32 = $strUTF32")
	@testn "utf16(strUTF8) == strUTF16" utf16(strUTF8) == strUTF16
	@testn "utf32(strUTF8) == strUTF32" utf32(strUTF8) == strUTF32
	@testn "utf8(strUTF16) == strUTF8" utf8(strUTF16) == strUTF8
	@testn "utf32(strUTF16) == strUTF32" utf32(strUTF16) == strUTF32
	@testn "utf8(strUTF32) == strUTF8" utf8(strUTF32) == strUTF8
	@testn "utf16(strUTF32) == strUTF16" utf16(strUTF32) == strUTF16
	end

	# Test converting surrogate pairs
	@testn "utf16(strS_8) == strC_8" utf16(strS_8) == strC_8
	@testn "utf32(strS_8) == strC_8" utf32(strS_8) == strC_8
	@testn "utf8(strS_16) == strC_8" utf8(strS_16) == strC_8
	@testn "utf32(strS_16) == strC_8" utf32(strS_16) == strC_8
	@testn "utf8(strS_32) == strC_8" utf8(strS_32) == strC_8
	@testn "utf16(strS_32) == strC_8" utf16(strS_32) == strC_8

	# Test converting overlong \0
	# currently broken! (in utf8.jl)
	@testn "utf8(strZ_8) == strz_8" utf8(strZ_8) == strz_8
	@testn "utf16(strZ_8) == strz_8" utf16(strZ_8) == strz_8
	@testn "utf32(strZ_8) == strz_8" utf32(strZ_8) == strz_8

	# Test invalid sequences

	for T in (UTF8String, UTF16String, UTF32String)
	try
	# Continuation byte not after lead
	for byt in 0x80:0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt]))
	end

	# Test lead bytes
	for byt in 0xc0:0xff
	# Single lead byte at end of string
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt))]))" convert(T, UTF8String(UInt8[byt]))
	# Lead followed by non-continuation character < 0x80
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0]))" convert(T, UTF8String(UInt8[byt,0]))
	# Lead followed by non-continuation character > 0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0xc0]))" convert(T, UTF8String(UInt8[byt,0xc0]))
	end

	# Test overlong 2-byte
	for byt in 0x81:0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc0,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc0,byt]))
	end
	for byt in 0x80:0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xc1,0x$(hex(byt))]))" convert(T, UTF8String(UInt8[0xc1,byt]))
	end

	# Test overlong 3-byte
	for byt in 0x80:0x9f
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xe0,0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[0xe0,byt,0x80]))
	end

	# Test overlong 4-byte
	for byt in 0x80:0x8f
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xef,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xef,byt,0x80,0x80]))
	end

	# Test 4-byte > 0x10ffff
	for byt in 0x90:0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xf4,0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[0xf4,byt,0x80,0x80]))
	end
	for byt in 0xf5:0xf7
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80]))
	end

	# Test 5-byte
	for byt in 0xf8:0xfb
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80]))
	end

	# Test 6-byte
	for byt in 0xfc:0xfd
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0x80,0x80,0x80]))
	end

	# Test 7-byte
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))" convert(T, UTF8String(UInt8[0xfe,0x80,0x80,0x80,0x80,0x80,0x80]))

	# Three and above byte sequences
	for byt in 0xe0:0xef
	# Lead followed by only 1 continuation byte
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80]))" convert(T, UTF8String(UInt8[byt,0x80]))
	# Lead ended by non-continuation character < 0x80
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0]))
	# Lead ended by non-continuation character > 0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0xc0]))
	end

	# 3-byte encoded surrogate character(s)
	# Single surrogate
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80]))
	# Not followed by surrogate
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0x80,0x80]))
	# Trailing surrogate first
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xb0,0x80,0xed,0xb0,0x80]))
	# Followed by lead surrogate
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))" convert(T, UTF8String(UInt8[0xed,0xa0,0x80,0xed,0xa0,0x80]))

	# Four byte sequences
	for byt in 0xf0:0xf4
	# Lead followed by only 2 continuation bytes
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80]))" convert(T, UTF8String(UInt8[byt,0x80,0x80]))
	# Lead followed by non-continuation character < 0x80
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0]))
	# Lead followed by non-continuation character > 0xbf
	@test_throws ArgumentError "convert($T, UTF8String(UInt8[0x$(hex(byt)),0x80,0x80,0xc0]))" convert(T, UTF8String(UInt8[byt,0x80,0x80,0xc0]))
	end
	catch exp ;
	println("Error checking $T: 0x$(hex(byt))")
	throw(exp)
	end
	end

	println("Out of $(TestConvert.counttest) tests, $(TestConvert.counterr) failed")

	end