Skip to content

Instantly share code, notes, and snippets.

@alexdong
Last active July 18, 2023 22:40
Show Gist options
  • Save alexdong/006f2761ac2add592843bbc47489e4da to your computer and use it in GitHub Desktop.
Save alexdong/006f2761ac2add592843bbc47489e4da to your computer and use it in GitHub Desktop.
Measure the performance of loading 10,000 Embeddings with 1024 Float32 from a binary data file
import Accelerate
import Dispatch
import Foundation
import os
let logger = Logger(subsystem: "photos.keepers", category: "EmbeddingVectorSearchEngine")
typealias Embedding = [Float32]
typealias Distance = Float32
final class EmbeddingVectorSearchEngine {
static let shared = EmbeddingVectorSearchEngine()
let fileURL: URL
var uuids: [UUID] = []
var embeddings: [Embedding] = []
// private init prevents others from using the default initializer for this class
private init() {
let containerURL = FileManager.default.containerURL(forSecurityApplicationGroupIdentifier: "group.photos.keepers.app")!
fileURL = containerURL.appendingPathComponent("Embeddings.dat")
}
public func insert(_ inputs: [(UUID, Embedding)]) {
/// First, update the in-memory records, then write it to disk
uuids += inputs.map { $0.0 }
embeddings += inputs.map { $0.1 }
var data = Data()
for (uuid, embedding) in inputs {
let uuidTuple = uuid.uuid
let uuidBytes: [UInt8] = [
uuidTuple.0, uuidTuple.1, uuidTuple.2, uuidTuple.3,
uuidTuple.4, uuidTuple.5, uuidTuple.6, uuidTuple.7,
uuidTuple.8, uuidTuple.9, uuidTuple.10, uuidTuple.11,
uuidTuple.12, uuidTuple.13, uuidTuple.14, uuidTuple.15
]
data.append(contentsOf: uuidBytes)
for floatVal in embedding {
let floatValData = withUnsafeBytes(of: floatVal) { Data($0) }
data.append(floatValData)
}
}
if FileManager.default.fileExists(atPath: fileURL.path()) {
if let fileUpdater = try? FileHandle(forWritingTo: fileURL) {
fileUpdater.seekToEndOfFile()
fileUpdater.write(data)
fileUpdater.closeFile()
}
} else {
try! data.write(to: fileURL)
}
}
public func load() {
let data = try! Data(contentsOf: fileURL, options: .alwaysMapped)
let frameWidth = 16 + 1024 * 4
let totalEmbeddings = data.count / frameWidth // calculate total number of embeddings to process
// Preallocate memory with a placeholder UUID
// This function preallocates memory for performance reasons. Preallocating memory:
// 1) Helps to avoid memory fragmentation: Allocating all needed memory at once makes it more likely
// that the data will be stored contiguously in memory, improving access speed.
// 2) Avoids a synchronisation point: When appending to an array from multiple threads, we need to
// synchronize access to the array which can become a bottleneck. By preallocating, each thread
// can write to a distinct region of the array, eliminating the need for synchronization.
let placeholderUUID = UUID()
uuids = [UUID](repeating: placeholderUUID, count: totalEmbeddings)
embeddings = [Embedding](repeating: Embedding(repeating: 0.0, count: 1024), count: totalEmbeddings)
DispatchQueue.concurrentPerform(iterations: totalEmbeddings) { index in
let start = data.index(data.startIndex, offsetBy: index * frameWidth)
let uuidEnd = data.index(start, offsetBy: 16)
let uuidData = data[start..<uuidEnd]
let uuidBytes = Array(uuidData)
uuids[index] = UUID(uuid: (
uuidBytes[0], uuidBytes[1], uuidBytes[2], uuidBytes[3],
uuidBytes[4], uuidBytes[5], uuidBytes[6], uuidBytes[7],
uuidBytes[8], uuidBytes[9], uuidBytes[10], uuidBytes[11],
uuidBytes[12], uuidBytes[13], uuidBytes[14], uuidBytes[15]
))
var embedding = Embedding(repeating: 0.0, count: 1024)
var floatStart = uuidEnd
for i in 0..<1024 {
let floatEnd = data.index(floatStart, offsetBy: 4)
let floatData = data[floatStart..<floatEnd]
floatStart = floatEnd
embedding[i] = floatData.withUnsafeBytes { $0.load(as: Float32.self) }
}
embeddings[index] = embedding
}
}
public func find(input: Embedding, threshold: Distance) -> [UUID] {
// The `find` function computes the similarity score between a given vector and a list of embedding vectors.
// It runs tasks concurrently, which can result in a significant speedup. The number of
// concurrent tasks is set to be three times the number of available CPU cores.
//
// Each task is responsible for a specific chunk of the embeddings list. An embedding vector is considered a match if
// its similarity score with the input vector is greater than a given threshold.
let numOfCores = ProcessInfo.processInfo.activeProcessorCount
let numOfTasks = numOfCores * 3
let chunkSize = (embeddings.count + numOfTasks - 1) / numOfTasks
var results: [Int] = []
let resultQueue = DispatchQueue(label: "photos.keepers.vectorSearch.find", attributes: .concurrent)
DispatchQueue.concurrentPerform(iterations: numOfTasks) { taskIndex in
let startIndex = taskIndex * chunkSize
let endIndex = min(startIndex + chunkSize, embeddings.count)
for i in startIndex..<endIndex {
let distance = similarityScore(Array(input), embeddings[i])
if distance > threshold {
resultQueue.sync(flags: .barrier) {
results.append(i)
}
}
}
}
resultQueue.sync { }
return results.map { uuids[$0] }
}
private func similarityScore(_ vector1: Embedding, _ vector2: Embedding) -> Distance {
/* vDSP uses CPU's SIMD to perform the calculations. We could also use Metal Shader
and Matrix operation to avoid using a for loop to call `similarityScore` for each.
However, given the number of embeddings and the complexities of the Metal Shader,
let's stick with vDSP for now.
*/
var dotProduct: Float = 0.0
var vector1Norm: Float = 0.0
var vector2Norm: Float = 0.0
let count = vDSP_Length(vector1.count)
vDSP_dotpr(vector1, 1, vector2, 1, &dotProduct, count)
vDSP_svesq(vector1, 1, &vector1Norm, count)
vDSP_svesq(vector2, 1, &vector2Norm, count)
return dotProduct / (sqrt(vector1Norm) * sqrt(vector2Norm))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment