Skip to content

Instantly share code, notes, and snippets.

@activcoding
Created November 1, 2023 14:46
Show Gist options
  • Save activcoding/144caf996c7553b425f60f3aefe735a7 to your computer and use it in GitHub Desktop.
Save activcoding/144caf996c7553b425f60f3aefe735a7 to your computer and use it in GitHub Desktop.
Fuzzy Search in Swift
import Foundation
import CollectionConcurrencyKit
enum FuzzySearch {
/// Searches an array of view models for occurrences of a fuzzy search query.
///
/// This function takes a fuzzy search `query` and an array of `URL`s, and returns a new array that contains only
/// those url's that match the query.
/// The function uses the `score` function to calculate a score for each url and
/// includes only those url's whose scores are greater than 0.0.
/// The resulting array is then sorted by a score, in descending order.
///
/// - Parameters:
/// - query: A `String` value representing the fuzzy search query.
/// - urls: An array of `URL`s, each representing a file, to search within.
/// - Returns: An array of `URL`s that match the fuzzy search query, sorted by score.
static func search(query: String, in urls: [URL]) async -> [URL] {
let queryTokens = query.lowercased().components(separatedBy: .whitespacesAndNewlines)
return await urls
.concurrentMap { (url: $0, score: score(tokens: queryTokens, url: $0)) }
.filter { $0.score > .zero }
.sorted { $0.score > $1.score }
.map(\.url)
}
/// Calculates the score of the fuzzy search query against a text string.
///
/// This function takes a fuzzy search `query` and a `text` string,
/// and calculates a score based on how well the `query` matches the `text`.
/// The function is case-insensitive and calculates the score by iterating through each token in the `query`,
/// finding all occurrences of the token in the `text`, and calculating a proximity score for each occurrence.
/// The final score is the average of all token scores weighted by their proximity scores.
///
/// - Parameters:
/// - query: A `String` value representing the fuzzy search query.
/// - url: A `URL` value representing the filePath to search within.
/// - Returns: A `Double` value representing the calculated score.
private static func score(tokens: [String], url: URL) -> Double {
let text = url.lastPathComponent.lowercased()
var score: Double = 0.0
for token in tokens {
let ranges = text.ranges(of: token)
if !ranges.isEmpty {
let tokenScore = Double(token.count) / Double(text.count)
let proximityScore = proximityScoreForRanges(ranges, text: text)
let levenshteinScore = Double(levenshteinDistance(from: String(token), to: text)) / Double(text.count)
score += (tokenScore * proximityScore) * (1 - levenshteinScore)
}
}
if let date = getLastModifiedDate(for: url.path) {
return (score / Double(tokens.count)) * Double(calculateDateScore(for: date))
} else {
return (score / Double(tokens.count))
}
}
/// Calculates the proximity score based on an array of ranges.
///
/// This function takes an array of `Range<String.Index>` objects and calculates a proximity score.
/// The higher the score, the closer the ranges are to each other in the original string.
///
/// - Parameter ranges: An array of `Range<String.Index>` objects representing the positions of matched substrings.
/// - Returns: A `Double` value representing the proximity score.
private static func proximityScoreForRanges(_ ranges: [Range<String.Index>], text: String) -> Double {
let sortedRanges = ranges.sorted(by: { $0.lowerBound < $1.lowerBound })
var score: Double = 1.0
for index in 1..<sortedRanges.count {
let previousRange = sortedRanges[index - 1]
let currentRange = sortedRanges[index]
let distance = currentRange.lowerBound.utf16Offset(in: text)
- previousRange.upperBound.utf16Offset(in: text)
let proximity = 1.0 / Double(distance)
score += proximity
}
return score / Double(sortedRanges.count)
}
/// Retrieve the last modification date for a given file path.
///
/// This function attempts to retrieve the last modification date of a file located at the specified file path.
/// If the file path is valid and the modification date can be retrieved,
/// the function returns a `Date` object representing the modification date.
/// If an error occurs or the file path is invalid, the function returns `nil`.
///
/// - Parameter filePath: The file path for which to retrieve the last modification date.
/// - Returns: The last modification date as a `Date?` (optional) value,
/// or `nil` if an error occurs or the file path is invalid.
private static func getLastModifiedDate(for filePath: String) -> Date? {
let fileManger = FileManager.default
do {
let attributes = try fileManger.attributesOfItem(atPath: filePath)
let modificationDate = attributes[.modificationDate] as? Date
return modificationDate
} catch {
return nil
}
}
/// Calculate the date score for a given file's modification date.
///
/// This function calculates the date score based on the time difference
/// between the current date and the file's modification date,
/// using an exponential decay function with a half-life of 3600 seconds (1 hour).
/// The score will be higher for more recently modified files.
///
/// - Parameter modificationDate: The file's modification date.
/// - Returns: The date score as a `Double` value.
private static func calculateDateScore(for modificationDate: Date) -> Double {
let now = Date()
let timeDiff = now.timeIntervalSince(modificationDate)
let halfLife: Double = 3600 // decay half-life in seconds
let decayFactor = log(2) / halfLife
let score = exp(-decayFactor * timeDiff)
return score + 0.01
}
/// Calculates the Levenshtein distance between two input strings.
///
/// - Parameters:
/// - sourceString: The source string to compare against the target string;
/// - targetString: The target string to compare against the source string.
/// - Returns: The Levenshtein distance between `sourceString` and `targetString`.
/// An integer representing the minimum number of
/// insertions, deletions, or substitutions required to transform the source string into the target string.
private static func levenshteinDistance(from sourceString: String, to targetString: String) -> Int {
let source = Array(sourceString)
let target = Array(targetString)
let sourceCount = source.count
let targetCount = target.count
guard sourceCount > 0 else {
return targetCount
}
guard targetCount > 0 else {
return sourceCount
}
var distanceMatrix = Array(repeating: Array(repeating: 0, count: targetCount + 1), count: sourceCount + 1)
for rowIndex in 0...sourceCount {
distanceMatrix[rowIndex][0] = rowIndex
}
for columnIndex in 0...targetCount {
distanceMatrix[0][columnIndex] = columnIndex
}
for rowIndex in 1...sourceCount {
for columnIndex in 1...targetCount {
let cost = source[rowIndex - 1] == target[columnIndex - 1] ? 0 : 1
let deletionCost = distanceMatrix[rowIndex - 1][columnIndex] + 1
let insertionCost = distanceMatrix[rowIndex][columnIndex - 1] + 1
let substitutionCost = distanceMatrix[rowIndex - 1][columnIndex - 1] + cost
distanceMatrix[rowIndex][columnIndex] = min(deletionCost, insertionCost, substitutionCost)
}
}
return distanceMatrix[sourceCount][targetCount]
}
}
extension String {
/// This function is case-insensitive and returns an array of `Range<String.Index>` objects representing
/// the positions of all occurrences of the `searchString` within the original string.
///
/// - Parameter searchString: A `String` value to search for within the original string.
/// - Returns: An array of `Range<String.Index>` objects representing the
/// positions of all occurrences of `searchString`.
func ranges(of searchString: String) -> [Range<String.Index>] {
var result: [Range<String.Index>] = []
var searchStartIndex = startIndex
while let range = self[searchStartIndex..<endIndex].range(of: searchString, options: .caseInsensitive) {
result.append(range)
searchStartIndex = range.upperBound
}
return result
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment