Skip to content

Instantly share code, notes, and snippets.

@jepers
Last active November 29, 2015 03:07
Show Gist options
  • Save jepers/4358a1d02c94617d899f to your computer and use it in GitHub Desktop.
Save jepers/4358a1d02c94617d899f to your computer and use it in GitHub Desktop.
Testing Swift's ability to optimize code using simple custom Float4 and Float4x4 value types, comparing to SIMD counterparts, subscript implementation turns out to make a big difference for F4x4 while not for F4 ...
//==============================================================================
// This program is possibly demonstrating an issue/opportunity for improvement
// of the optimizer.
// (Tested with Xcode 7.2 beta 4, OS X 10.10.5, Macbook Pro late 2013)
// (Compiled (-O -gnone) as command line app)
//==============================================================================
//
// There is a test at the end which measures the time it takes to do some
// matrix (and vector) type operations.
//
// It does this using SIMD float4x4 vs my F4x4 (and float4 vs my F4).
//
// F4x4 is identical in structure to F4, and it has four F4 as elements.
//
// The program defines two ways of implementing the subscripts, Subs1 and Subs2.
//
// So F4x4 and F4 can be set to use one of them by conforming to Subs1 or Subs2.
//
// But here is the thing:
//
// Using Subs1 or Subs2 for F4 doesn't affect the timings of the test, BUT:
// Using Subs1 or Subs2 for F4x4 will produce ~ 8 times faster or slower code.
// (The + operator for F4x4 uses F4 and its + operator and subscript.)
//
// So how come Subs1/Subs2 makes a difference for F4x4 and not for F4?
//
// (And also, using F4x4:Subs2 with safety checks disabled makes it even slower.
// Why should disabling safety checks make it slower?)
//
//------------------------------------------------------------------------------
import Cocoa
import simd
//------------------------------------------------------------------------------
// NOTE: Protocols, protocol extensions and generics are used only to make the
// code smaller and clearer. The issue(?) has nothing to do with these features.
// (I've tried and the results are the same with no protocols or generics.)
//------------------------------------------------------------------------------
protocol FourElementSubscriptable : CustomStringConvertible {
typealias Element
subscript(index: Int) -> Element { get set }
var elements: (Element, Element, Element, Element) { get set }
}
extension FourElementSubscriptable {
var description: String { return "\(elements)" }
}
//------------------------------------------------------------------------------
// Subs1 and Subs2 are the two different ways of implementing subscripts:
//------------------------------------------------------------------------------
protocol Subs1 : FourElementSubscriptable {}
protocol Subs2 : FourElementSubscriptable {}
extension Subs1 {
subscript(index: Int) -> Element {
get {
switch index {
case 0: return elements.0
case 1: return elements.1
case 2: return elements.2
case 3: return elements.3
default: fatalError("Index out of bounds")
}
}
set {
switch index {
case 0: elements.0 = newValue
case 1: elements.1 = newValue
case 2: elements.2 = newValue
case 3: elements.3 = newValue
default: fatalError("Index out of bounds")
}
}
}
}
extension Subs2 {
subscript(index: Int) -> Element {
get {
precondition(index >= 0 && index < 4)
var selfCopy = self; return withUnsafePointer(&selfCopy) { UnsafePointer<Element>($0)[index] }
}
set {
precondition(index >= 0 && index < 4)
withUnsafeMutablePointer(&self) { UnsafeMutablePointer<Element>($0)[index] = newValue }
}
}
}
//==============================================================================
// F4 - To be compared to SIMD float4.
// NOTE: Using Subs1 or Subs2 for this type will not affect the test timings.
//==============================================================================
struct F4 : Subs1 { // <--- Same timings no matter if using Subs1 or Subs2.
var elements : (Float, Float, Float, Float)
init() { elements = (0, 0, 0, 0) }
init(_ elements: (Float, Float, Float, Float)) { self.elements = elements }
func mapWith(var other: F4, @noescape transform: (Float, Float) -> Float) -> F4 {
for i in 0 ..< 4 { other[i] = transform(self[i], other[i]) }; return other
}
}
func +(lhs: F4, rhs: F4) -> F4 { return lhs.mapWith(rhs, transform: +) }
//==============================================================================
// F4x4 - To be compared to SIMD float4x4.
// NOTE: Using Subs2 instead of Subs1 will make it about 8 times slower.
//==============================================================================
struct F4x4 : Subs1 { // <--- Try Subs1 & Subs2, NOTE: Subs2 is 8x slower, why?
var elements: (F4, F4, F4, F4)
init() { elements = (F4(), F4(), F4(), F4()) }
init(_ elements: (F4, F4, F4, F4)) { self.elements = elements }
func mapWith(var other: F4x4, @noescape transform: (F4, F4) -> F4) -> F4x4 {
for i in 0 ..< 4 { other[i] = transform(self[i], other[i]) }; return other
}
}
func +(lhs: F4x4, rhs: F4x4) -> F4x4 { return lhs.mapWith(rhs, transform: +) }
//==============================================================================
// Some convenience (no effect on issue; same without protocols and generics)
//==============================================================================
protocol Testable : CustomStringConvertible {
init()
static func random() -> Self
func +(lhs: Self, rhs: Self) -> Self
}
extension Testable {
static func random(num: Int) -> [Self] {
var a = [Self](count: num, repeatedValue: Self())
for i in a.indices { a[i] = Self.random() }
return a
}
static func random4() -> (Self, Self, Self, Self) { return (.random(), .random(), .random(), .random()) }
}
extension float4 : CustomStringConvertible { public var description: String { return "(\(x), \(y), \(z), \(w))" } }
extension float4x4 : CustomStringConvertible { public var description: String { return "(\(self[0]), \(self[1]), \(self[2]), \(self[3]))" } }
extension Float : Testable { static func random() -> Float { return Float(Double(arc4random()) / Double(UInt32.max)) } }
extension float4 : Testable { static func random() -> float4 { return float4(Float.random(4)) } }
extension float4x4 : Testable { static func random() -> float4x4 { return float4x4(float4.random(4)) } }
extension F4 : Testable { static func random() -> F4 { return F4(Float.random4()) } }
extension F4x4 : Testable { static func random() -> F4x4 { return F4x4(F4.random4()) } }
//==============================================================================
// The test
//==============================================================================
func test<T: Testable>(_: T.Type){
let num = 10_000_000
print("Preparing ... (arc4random() is slow ...)")
let a = T.random(num)
print("Testing \(T.self):")
for _ in 0 ..< 4 {
var sum = T()
let t0 = CACurrentMediaTime()
for i in 0 ..< num { sum = sum + a[i] }
let t1 = CACurrentMediaTime()
print(String(format: "time: %8.6f ( deadcodeeliminationprevention: \(sum.description.hashValue) )", t1 - t0))
}
}
func subsVariant<T: Subs1>(_:T.Type) -> String { return "Subs1" }
func subsVariant<T: Subs2>(_:T.Type) -> String { return "Subs2" }
print("Testing float4x4 and F4x4 : \(subsVariant(F4x4)) <--- Edit code to make F4x4 conform to Subs1 / Subs2 to see difference.")
test(float4x4)
test(F4x4)
print("\nMight as well run the test for float4 and F4 : \(subsVariant(F4)) too:")
test(float4)
test(F4)
//------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment