Skip to content

Instantly share code, notes, and snippets.

@ilyannn
Last active August 29, 2015 14:05
Show Gist options
  • Save ilyannn/76934ffc80738685c22c to your computer and use it in GitHub Desktop.
Save ilyannn/76934ffc80738685c22c to your computer and use it in GitHub Desktop.
UTF8 conversion routines test
#!/usr/bin/env xcrun swift -O3
//
// utf.swift
//
//
// Created by Ilya Nikokoshev on 8/22/14.
//
//
import Foundation
var value:UInt32 = 0x1FFFF
// The original.
func utf8_1(inout buffer: [UInt8]) {
// value is of type UInt32
if value <= 0x007F {
buffer.append(UInt8(value))
}
else if 0x0080 <= value && value <= 0x07FF {
buffer.append(UInt8(value / 64 + 192))
buffer.append(UInt8(value % 64 + 128))
}
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) {
buffer.append(UInt8(value / 4096 + 224))
buffer.append(UInt8((value % 4096) / 64 + 128))
buffer.append(UInt8(value % 64 + 128))
}
else {
buffer.append(UInt8(value / 262144 + 240))
buffer.append(UInt8((value % 262144) / 4_096 + 128))
buffer.append(UInt8((value % 4096) / 64 + 128))
buffer.append(UInt8(value % 64 + 128))
}
}
// Universal for any 32-bit number
func encode8(inout buffer:[UInt8]) {
var result:[UInt8] = []
var current = value
func extract(bits:UInt32, plus:UInt32) {
result += [UInt8(current % (1 << bits) | plus)]
current >>= bits
}
// The ASCII set takes one byte.
if current < 1 << 7 {
extract(7, 0)
} else {
var factor:UInt32 = 1 << 6
// Then we produce bytes of the form 10xxxxxx.
while current >= factor {
extract(6, 1 << 7)
factor >>= 1
}
// And the first byte contains some metadata.
extract(6, 1 << 8 - 2 * factor)
}
buffer.extend(reverse(result))
}
func utf8_$(inout buffer: [UInt8]) {
// Adds a number that fits into UInt8 into buffer.
let $:UInt32 -> () = { buffer.append(UInt8($0)) }
if value <= 0x007F {
$(value)
}
else if 0x0080 <= value && value <= 0x07FF {
$(value / 64 + 192)
$(value % 64 + 128)
}
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) {
$(value / 4096 + 224)
$((value % 4096) / 64 + 128)
$(value % 64 + 128)
}
else {
$(value / 262144 + 240)
$((value % 262144) / 4_096 + 128)
$((value % 4096) / 64 + 128)
$(value % 64 + 128)
}
}
func utf8_2(inout buffer: [UInt8]) {
if value <= 0x007F {
buffer.append(UInt8(value))
}
else if 0x0080 <= value && value <= 0x07FF {
buffer.append(UInt8(value &/ 64) &+ 192)
buffer.append(UInt8(value &% 64) &+ 128)
}
else if (0x0800 <= value && value <= 0xD7FF) || (0xE000 <= value && value <= 0xFFFF) {
buffer.append(UInt8(value &/ 4096) &+ 224)
buffer.append(UInt8((value &% 4096) &/ 64) &+ 128)
buffer.append(UInt8(value &% 64 &+ 128))
}
else {
buffer.append(UInt8(value &/ 262144) &+ 240)
buffer.append(UInt8((value &% 262144) &/ 4096) &+ 128)
buffer.append(UInt8((value &% 4096) &/ 64) &+ 128)
buffer.append(UInt8(value &% 64) &+ 128)
}
}
var scalar = UnicodeScalar(value)
func utf8std(inout buffer: [UInt8]) {
buffer.extend(String(scalar).utf8)
}
var buffer_std:[UInt8] = []
utf8std(&buffer_std)
var buffer_test1:[UInt8] = []
var buffer_test2:[UInt8] = []
func time(f:()->()) -> NSTimeInterval {
let start = NSDate()
var count = 0
while count < 1000 {
count++
f()
}
return -start.timeIntervalSinceNow
}
func test(prefix:String, f:(inout [UInt8]) -> ()) {
var buffer:[UInt8] = []
println("\(prefix) \(time({f(&buffer)}))")
}
test("original ", utf8_1)
test("small cast", utf8_2)
test("with $ ", utf8_$)
test("universal ", encode8)
test("standard ", utf8std)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment