snowzurfer/3DPointsFromDepth.swift

## 3DPointsFromDepth.swift
import ARKit
import SceneKit

let horizontalPoints = 256 / 2
let verticalPoints = 192 / 2
var depthNodes = [SCNNode]()
var parentDebugNodes = SCNNode()
var sceneView: ARSCNView!

// Somewhere during setup
func setup() {
    let configuration = ARWorldTrackingConfiguration()
    configuration.frameSemantics = .smoothedSceneDepth
    sceneView.session.run(configuration)
    sceneView.scene.rootNode.addChildNode(parentDebugNodes)

    let sizeGeomPredictions = 0.005
    let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0)
    geom.firstMaterial?.diffuse.contents = UIColor.green

    for _ in 0..<(horizontalPoints * verticalPoints) {
        let node = SCNNode(geometry: geom)
        self.parentDebugNodes.addChildNode(node)
        self.depthNodes.append(node)
    }
}


func session(_ session: ARSession, didUpdate frame: ARFrame) {
    guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else {
        return
    }
    let capturedImage = frame.capturedImage

    let lockFlags = CVPixelBufferLockFlags.readOnly
    CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags)
    defer {
      CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags)
    }

    let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)!
    let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self)

    // The `.size` accessor simply read the CVPixelBuffer's width and height in pixels.
    //
    // They are the same ratio:
    // 1920 x 1440 = 1440 x 1920 = 0.75
    let depthMapSize = smoothedDepth.size
    // 192 x 256 = 0.75
    let capturedImageSize = capturedImage.size

    var cameraIntrinsics = frame.camera.intrinsics
    let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y))
    let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x,
                               y: Float(capturedImageSize.y) / depthResolution.y )
    // Make the camera intrinsics be with respect to Depth.
    cameraIntrinsics[0][0] /= scaleRes.x
    cameraIntrinsics[1][1] /= scaleRes.y

    cameraIntrinsics[2][0] /= scaleRes.x
    cameraIntrinsics[2][1] /= scaleRes.y

    // This will be the long size, because of the rotation
    let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints)
    let halfHorizontalStep = horizontalStep / 2
    // This will be the short size, because of the rotation
    let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints)
    let halfVerticalStep = verticalStep / 2

     for h in 0..<horizontalPoints {
        for v in 0..<verticalPoints {
            let x = Float(h) * horizontalStep + halfHorizontalStep
            let y = Float(v) * verticalStep + halfVerticalStep
            let depthMapPoint = simd_float2(x, y)

            // Sample depth
            let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint))

            let wp = worldPoint(depthMapPixelPoint: depthMapPoint,
                                depth: metricDepth,
                                cameraIntrinsics: cameraIntrinsics,
                                // This is crucial: you need to always use the view matrix for Landscape Right.
                                viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse)
            let node = self.depthNodes[v * horizontalPoints + h]
            node.simdWorldPosition = wp
        }
    }
}

## Utils.swift
func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float {
    let baseAddressIndex = at.y * size.x + at.x
    return Float(pointer[baseAddressIndex])
}

// This also works. Adapted from:
// https://developer.apple.com/forums/thread/676368
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
     let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth
     let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z)
     let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1)
     return (worldPoint / worldPoint.w)[SIMD3(0,1,2)]
}

// This one is adapted from:
// http://nicolas.burrus.name/index.php/Research/KinectCalibration
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
    let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0])
    let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1]
    // Y is UP in camera space, vs it being DOWN in image space.
    let localPoint = simd_float3(xrw, -yrw, -depth)
    let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1)
    return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z)
}

extension CVPixelBuffer {
    var size: SIMD2<Int> {
        let width = CVPixelBufferGetWidthOfPlane(self, 0)
        let height = CVPixelBufferGetHeightOfPlane(self, 0)
        return  .init(x: width, y: height)
    }
}
	import ARKit
	import SceneKit

	let horizontalPoints = 256 / 2
	let verticalPoints = 192 / 2
	var depthNodes = [SCNNode]()
	var parentDebugNodes = SCNNode()
	var sceneView: ARSCNView!

	// Somewhere during setup
	func setup() {
	let configuration = ARWorldTrackingConfiguration()
	configuration.frameSemantics = .smoothedSceneDepth
	sceneView.session.run(configuration)
	sceneView.scene.rootNode.addChildNode(parentDebugNodes)

	let sizeGeomPredictions = 0.005
	let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0)
	geom.firstMaterial?.diffuse.contents = UIColor.green

	for _ in 0..<(horizontalPoints * verticalPoints) {
	let node = SCNNode(geometry: geom)
	self.parentDebugNodes.addChildNode(node)
	self.depthNodes.append(node)
	}
	}


	func session(_ session: ARSession, didUpdate frame: ARFrame) {
	guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else {
	return
	}
	let capturedImage = frame.capturedImage

	let lockFlags = CVPixelBufferLockFlags.readOnly
	CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags)
	defer {
	CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags)
	}

	let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)!
	let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self)

	// The `.size` accessor simply read the CVPixelBuffer's width and height in pixels.
	//
	// They are the same ratio:
	// 1920 x 1440 = 1440 x 1920 = 0.75
	let depthMapSize = smoothedDepth.size
	// 192 x 256 = 0.75
	let capturedImageSize = capturedImage.size

	var cameraIntrinsics = frame.camera.intrinsics
	let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y))
	let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x,
	y: Float(capturedImageSize.y) / depthResolution.y )
	// Make the camera intrinsics be with respect to Depth.
	cameraIntrinsics[0][0] /= scaleRes.x
	cameraIntrinsics[1][1] /= scaleRes.y

	cameraIntrinsics[2][0] /= scaleRes.x
	cameraIntrinsics[2][1] /= scaleRes.y

	// This will be the long size, because of the rotation
	let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints)
	let halfHorizontalStep = horizontalStep / 2
	// This will be the short size, because of the rotation
	let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints)
	let halfVerticalStep = verticalStep / 2

	for h in 0..<horizontalPoints {
	for v in 0..<verticalPoints {
	let x = Float(h) * horizontalStep + halfHorizontalStep
	let y = Float(v) * verticalStep + halfVerticalStep
	let depthMapPoint = simd_float2(x, y)

	// Sample depth
	let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint))

	let wp = worldPoint(depthMapPixelPoint: depthMapPoint,
	depth: metricDepth,
	cameraIntrinsics: cameraIntrinsics,
	// This is crucial: you need to always use the view matrix for Landscape Right.
	viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse)
	let node = self.depthNodes[v * horizontalPoints + h]
	node.simdWorldPosition = wp
	}
	}
	}
	func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float {
	let baseAddressIndex = at.y * size.x + at.x
	return Float(pointer[baseAddressIndex])
	}

	// This also works. Adapted from:
	// https://developer.apple.com/forums/thread/676368
	func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
	let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth
	let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z)
	let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1)
	return (worldPoint / worldPoint.w)[SIMD3(0,1,2)]
	}

	// This one is adapted from:
	// http://nicolas.burrus.name/index.php/Research/KinectCalibration
	func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
	let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0])
	let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1]
	// Y is UP in camera space, vs it being DOWN in image space.
	let localPoint = simd_float3(xrw, -yrw, -depth)
	let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1)
	return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z)
	}

	extension CVPixelBuffer {
	var size: SIMD2<Int> {
	let width = CVPixelBufferGetWidthOfPlane(self, 0)
	let height = CVPixelBufferGetHeightOfPlane(self, 0)
	return .init(x: width, y: height)
	}
	}