Skip to content

Instantly share code, notes, and snippets.

@ScottPJones
Created May 25, 2015 21:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ScottPJones/79ed895f05f85f333d84 to your computer and use it in GitHub Desktop.
Save ScottPJones/79ed895f05f85f333d84 to your computer and use it in GitHub Desktop.
Benchmark test of length, validation, convert
VERSION < v"0.4-" && (typealias AbstractString String)
function tst{T <: AbstractString}(str::T, max::Int)
local l
for i=1:max
l = length(str)
end
l
end
function cvt8{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = utf8(str)
end
out
end
function cvt16{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = utf16(str)
end
out
end
function cvt32{T <: AbstractString}(str::T, max::Int)
local out
for i=1:max
out = utf32(str)
end
out
end
function tstva{T <: AbstractString}(str::T, max::Int)
local l
for i=1:max
l = is_valid_ascii(str)
end
l
end
function tstv8{T <: AbstractString}(str::T, max::Int)
local l
for i=1:max
l = is_valid_utf8(str)
end
l
end
function tstv16{T <: AbstractString}(str::T, max::Int)
local l
for i=1:max
l = is_valid_utf16(str)
end
l
end
function tstv32{T <: AbstractString}(str::T, max::Int)
local l
for i=1:max
l = is_valid_utf32(str)
end
l
end
function tstall(flg::Bool, str::String, n::Int, strUTF8::UTF8String, strUTF16::UTF16String, strUTF32::UTF32String)
println("\n\n$str: Looping $n times, length=$(length(strUTF32))")
println("UTF-8: $(sizeof(strUTF8)), UTF-16: $(sizeof(strUTF16)), UTF-32: $(sizeof(strUTF32))\n")
tstlen(n, strUTF8, strUTF16, strUTF32)
tstval(n, strUTF8, strUTF16, strUTF32)
flg && tstcvt(n, strUTF8, strUTF16, strUTF32)
""
end
function tstval(n::Int, strUTF8::UTF8String, strUTF16::UTF16String, strUTF32::UTF32String)
print("UTF-8 valid: ")
@time tstv8(strUTF8, n)
print("UTF-16 valid: ")
@time tstv16(strUTF16, n)
try
is_valid_utf32(strUTF32)
print("UTF-32 valid: ")
@time tstv32(strUTF32, n)
catch ;
end
println()
""
end
function tstlen(n::Int, strUTF8::UTF8String, strUTF16::UTF16String, strUTF32::UTF32String)
print("UTF-8 length: ")
@time tst(strUTF8,n)
print("UTF-16 length: ")
@time tst(strUTF16,n)
print("UTF-32 length: ")
@time tst(strUTF32,n)
println()
""
end
function tstascii(n::Int, str::ASCIIString)
println("\nASCII: Looping $n times, length=$(length(str))")
print("length: ")
@time tst(str,n)
print("is_valid_ascii: ")
@time tstva(str, n)
print("Convert to UTF-8: ")
@time cvt8(str,n)
print("Convert to UTF-16:")
@time cvt16(str,n)
print("Convert to UTF-32:")
@time cvt32(str,n)
""
end
function tstcvt(n::Int, strUTF8::UTF8String, strUTF16::UTF16String, strUTF32::UTF32String)
print("UTF-8 convert to UTF-16: ")
@time cvt16(strUTF8,n)
print("UTF-8 convert to UTF-32: ")
@time cvt32(strUTF8,n)
print("UTF-16 convert to UTF-8: ")
@time cvt8(strUTF16,n)
print("UTF-16 convert to UTF-32: ")
@time cvt32(strUTF16,n)
print("UTF-32 convert to UTF-8: ")
@time cvt8(strUTF32,n)
print("UTF-32 convert to UTF-16: ")
@time cvt16(strUTF32,n)
""
end
function tstsiz(flg,n,strAscii,strA_UTF8,strL_UTF8,str2_UTF8,str3_UTF8,str4_UTF8,strS_UTF8)
strA_UTF16 = utf16(strA_UTF8)
strL_UTF16 = utf16(strL_UTF8)
str2_UTF16 = utf16(str2_UTF8)
str3_UTF16 = utf16(str3_UTF8)
str4_UTF16 = utf16(str4_UTF8)
strS_UTF16 = utf16(strS_UTF8)
strA_UTF32 = utf32(strA_UTF8)
strL_UTF32 = utf32(strL_UTF8)
str2_UTF32 = utf32(str2_UTF8)
str3_UTF32 = utf32(str3_UTF8)
str4_UTF32 = utf32(str4_UTF8)
strS_UTF32 = utf32(strS_UTF8)
tstascii(n,strAscii)
tstall(flg,"ASCII:",n,strA_UTF8,strA_UTF16,strA_UTF32)
tstall(flg,"Latin1:",n,strL_UTF8,strL_UTF16,strL_UTF32)
tstall(flg,"2-byte:",n,str2_UTF8,str2_UTF16,str2_UTF32)
tstall(flg,"3-byte:",n,str3_UTF8,str3_UTF16,str3_UTF32)
tstall(flg,"4-byte:",n,str4_UTF8,str4_UTF16,str4_UTF32)
tstall(flg,"Surrogates:",n,strS_UTF8,strS_UTF16,strS_UTF32)
end
function dotest(n,flg::Bool = true)
# Create some ASCII, UTF8, UTF16, and UTF32 strings
baseascii = "abcdefghijklmnop\uff"
binstr = b"abcdefghijk\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80\xed\xaf\xbf\xed\xbf\xbf"
strAscii = "abcd"
strA_UTF8 = baseascii[1:4]
strL_UTF8 = "ab\uff\uff"
str2_UTF8 = "ab\uff\u7ff"
str3_UTF8 = "a\uff\u7ff\u7fff"
str4_UTF8 = "a\uff\u7fff\U7ffff"
strS_UTF8 = UTF8String(b"\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
strAscii = "abcdefgh"
strA_UTF8 = baseascii[1:8]
strL_UTF8 = "abcdef\uff\uff"
str2_UTF8 = "abcd\uff\uff\u7ff\u7ff"
str3_UTF8 = "abcd\uff\uff\u7fff\u7fff"
str4_UTF8 = "abcd\uff\u7ff\u7fff\U7ffff"
strS_UTF8 = UTF8String(b"abcd\xc3\xbf\xdf\xbf\xe7\xbf\xbf\xed\xa0\x80\xed\xb0\x80")
tstsiz(flg,n,strAscii,strA_UTF8,strL_UTF8,str2_UTF8,str3_UTF8,str4_UTF8,strS_UTF8)
strAscii = "abcdefghijklmnop"
strA_UTF8 = baseascii[1:16]
strL_UTF8 = "abcdefghijk\uff\uff\uff\uff\uff"
str2_UTF8 = "abcdefghijk\uff\uff\uff\u7ff\u7ff"
str3_UTF8 = "abcdefghijk\uff\uff\uff\u7fff\u7fff"
str4_UTF8 = "abcdefghijk\uff\u7ff\u7fff\U7ffff\U0fffff"
for i=1:9
strAscii ^= 4
strA_UTF8 ^= 4
strL_UTF8 ^= 4
str2_UTF8 ^= 4
str3_UTF8 ^= 4
str4_UTF8 ^= 4
binstr = vcat(binstr,binstr,binstr,binstr)
tstsiz(flg,n,strAscii,strA_UTF8,strL_UTF8,str2_UTF8,str3_UTF8,str4_UTF8,UTF8String(binstr))
end
end
""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment