Skip to content

Instantly share code, notes, and snippets.

@danilogr
Forked from snowzurfer/3DPointsFromDepth.swift
Created February 8, 2022 18:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danilogr/818aea4d79e6c96d11d2ec9d4b7a2cca to your computer and use it in GitHub Desktop.
Save danilogr/818aea4d79e6c96d11d2ec9d4b7a2cca to your computer and use it in GitHub Desktop.
3D world points from ARKit depth
import ARKit
import SceneKit
let horizontalPoints = 256 / 2
let verticalPoints = 192 / 2
var depthNodes = [SCNNode]()
var parentDebugNodes = SCNNode()
var sceneView: ARSCNView!
// Somewhere during setup
func setup() {
let configuration = ARWorldTrackingConfiguration()
configuration.frameSemantics = .smoothedSceneDepth
sceneView.session.run(configuration)
sceneView.scene.rootNode.addChildNode(parentDebugNodes)
let sizeGeomPredictions = 0.005
let geom = SCNBox(width: sizeGeomPredictions, height: sizeGeomPredictions, length: sizeGeomPredictions, chamferRadius: 0)
geom.firstMaterial?.diffuse.contents = UIColor.green
for _ in 0..<(horizontalPoints * verticalPoints) {
let node = SCNNode(geometry: geom)
self.parentDebugNodes.addChildNode(node)
self.depthNodes.append(node)
}
}
func session(_ session: ARSession, didUpdate frame: ARFrame) {
guard let smoothedDepth = frame.smoothedSceneDepth?.depthMap else {
return
}
let capturedImage = frame.capturedImage
let lockFlags = CVPixelBufferLockFlags.readOnly
CVPixelBufferLockBaseAddress(smoothedDepth, lockFlags)
defer {
CVPixelBufferUnlockBaseAddress(smoothedDepth, lockFlags)
}
let baseAddress = CVPixelBufferGetBaseAddressOfPlane(smoothedDepth, 0)!
let depthByteBuffer = baseAddress.assumingMemoryBound(to: Float32.self)
// The `.size` accessor simply read the CVPixelBuffer's width and height in pixels.
//
// They are the same ratio:
// 1920 x 1440 = 1440 x 1920 = 0.75
let depthMapSize = smoothedDepth.size
// 192 x 256 = 0.75
let capturedImageSize = capturedImage.size
var cameraIntrinsics = frame.camera.intrinsics
let depthResolution = simd_float2(x: Float(depthMapSize.x), y: Float(depthMapSize.y))
let scaleRes = simd_float2(x: Float(capturedImageSize.x) / depthResolution.x,
y: Float(capturedImageSize.y) / depthResolution.y )
// Make the camera intrinsics be with respect to Depth.
cameraIntrinsics[0][0] /= scaleRes.x
cameraIntrinsics[1][1] /= scaleRes.y
cameraIntrinsics[2][0] /= scaleRes.x
cameraIntrinsics[2][1] /= scaleRes.y
// This will be the long size, because of the rotation
let horizontalStep = Float(depthMapSize.x) / Float(self.horizontalPoints)
let halfHorizontalStep = horizontalStep / 2
// This will be the short size, because of the rotation
let verticalStep = Float(depthMapSize.y) / Float(self.verticalPoints)
let halfVerticalStep = verticalStep / 2
for h in 0..<horizontalPoints {
for v in 0..<verticalPoints {
let x = Float(h) * horizontalStep + halfHorizontalStep
let y = Float(v) * verticalStep + halfVerticalStep
let depthMapPoint = simd_float2(x, y)
// Sample depth
let metricDepth = sampleDepthRaw(depthByteBuffer, size: depthMapSize, at: .init(depthMapPoint))
let wp = worldPoint(depthMapPixelPoint: depthMapPoint,
depth: metricDepth,
cameraIntrinsics: cameraIntrinsics,
// This is crucial: you need to always use the view matrix for Landscape Right.
viewMatrixInverted: frame.camera.viewMatrix(for: .landscapeRight).inverse)
let node = self.depthNodes[v * horizontalPoints + h]
node.simdWorldPosition = wp
}
}
}
func sampleDepthRaw(_ pointer: UnsafeMutablePointer<Float32>, size: SIMD2<Int>, at: SIMD2<Int>) -> Float {
let baseAddressIndex = at.y * size.x + at.x
return Float(pointer[baseAddressIndex])
}
// This also works. Adapted from:
// https://developer.apple.com/forums/thread/676368
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsicsInverted: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
let localPoint = cameraIntrinsicsInverted * simd_float3(depthMapPixelPoint, 1) * -depth
let localPointSwappedX = simd_float3(-localPoint.x, localPoint.y, localPoint.z)
let worldPoint = viewMatrixInverted * simd_float4(localPointSwappedX, 1)
return (worldPoint / worldPoint.w)[SIMD3(0,1,2)]
}
// This one is adapted from:
// http://nicolas.burrus.name/index.php/Research/KinectCalibration
func worldPoint(depthMapPixelPoint: SIMD2<Float>, depth: Float, cameraIntrinsics: simd_float3x3, viewMatrixInverted: simd_float4x4) -> SIMD3<Float> {
let xrw = ((depthMapPixelPoint.x - cameraIntrinsics[2][0]) * depth / cameraIntrinsics[0][0])
let yrw = (depthMapPixelPoint.y - cameraIntrinsics[2][1]) * depth / cameraIntrinsics[1][1]
// Y is UP in camera space, vs it being DOWN in image space.
let localPoint = simd_float3(xrw, -yrw, -depth)
let worldPoint = viewMatrixInverted * simd_float4(localPoint, 1)
return simd_float3(worldPoint.x, worldPoint.y, worldPoint.z)
}
extension CVPixelBuffer {
var size: SIMD2<Int> {
let width = CVPixelBufferGetWidthOfPlane(self, 0)
let height = CVPixelBufferGetHeightOfPlane(self, 0)
return .init(x: width, y: height)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment