Skip to content

Instantly share code, notes, and snippets.

@nerdsupremacist
Created May 13, 2020 03:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nerdsupremacist/284a93d44f2ad456e67ac82263370b29 to your computer and use it in GitHub Desktop.
Save nerdsupremacist/284a93d44f2ad456e67ac82263370b29 to your computer and use it in GitHub Desktop.
Persistent Cache construct with Hashable Keys
import Foundation
public struct OrderedSet<E: Hashable>: Equatable, Collection {
public typealias Element = E
public typealias Index = Int
public typealias Indices = Range<Int>
private var array: [Element]
private var set: Set<Element>
public init() {
self.array = []
self.set = Set()
}
public init(_ array: [Element]) {
self.init()
append(contentsOf: array)
}
public var count: Int { return array.count }
public var isEmpty: Bool { return array.isEmpty }
public var contents: [Element] { return array }
public func contains(_ member: Element) -> Bool {
return set.contains(member)
}
@discardableResult
public mutating func append(_ newElement: Element) -> Bool {
let inserted = set.insert(newElement).inserted
if inserted {
array.append(newElement)
}
return inserted
}
public mutating func append<S: Sequence>(contentsOf sequence: S) where S.Element == Element {
for element in sequence {
append(element)
}
}
@discardableResult
public mutating func remove(_ element: Element) -> Element? {
guard let removed = set.remove(element) else { return nil }
if let index = array.firstIndex(of: element) {
array.remove(at: index)
}
return removed
}
public mutating func removeFirst() -> Element {
let firstElement = array.removeFirst()
set.remove(firstElement)
return firstElement
}
public mutating func removeLast() -> Element {
let lastElement = array.removeLast()
set.remove(lastElement)
return lastElement
}
}
extension OrderedSet: ExpressibleByArrayLiteral {
public init(arrayLiteral elements: Element...) {
self.init(elements)
}
}
extension OrderedSet: RandomAccessCollection {
public var startIndex: Int { return contents.startIndex }
public var endIndex: Int { return contents.endIndex }
public subscript(index: Int) -> Element {
return contents[index]
}
}
public func == <T>(lhs: OrderedSet<T>, rhs: OrderedSet<T>) -> Bool {
return lhs.contents == rhs.contents
}
extension OrderedSet: Hashable where Element: Hashable { }
extension OrderedSet {
static func + (lhs: OrderedSet<Element>, rhs: Element) -> OrderedSet<Element> {
var copy = lhs
copy.append(rhs)
return copy
}
static func + (lhs: OrderedSet<Element>, rhs: OrderedSet<Element>) -> OrderedSet<Element> {
var copy = lhs
copy.append(contentsOf: rhs)
return copy
}
}
extension Sequence {
func flatMap<V>(_ transform: (Element) throws -> OrderedSet<V>) rethrows -> OrderedSet<V> {
return try reduce(into: []) { $0.append(contentsOf: try transform($1)) }
}
}
extension Sequence {
func map<V: Hashable>(_ transform: (Element) throws -> V) rethrows -> OrderedSet<V> {
return try reduce(into: []) { $0.append(try transform($1)) }
}
}
import PathKit
public class PersistentCache<Key: Hashable> {
private let folder: Path
private let index: Path
private let capacity: Int
private let hasher: Hasher = Hasher.constantAccrossExecutions()
private var hashStore: OrderedSet<Int>
public init(folder: Path, capacity: Int) throws {
self.folder = folder
self.index = folder + ".index"
self.capacity = capacity
if !folder.exists {
try folder.mkpath()
}
if index.exists {
let bytes = Array(try index.read())
hashStore = OrderedSet(bytes.rebound(to: Int.self))
} else {
hashStore = []
}
}
deinit {
try! store()
}
public func load(key: Key) throws -> Data? {
let hash = computeHash(of: key)
let file = self.file(for: hash)
if file.exists {
assert(hashStore.contains(hash))
use(hash: hash)
return try file.read()
} else {
return nil
}
}
public func store(data: Data, for key: Key) throws {
let hash = computeHash(of: key)
if hashStore.contains(hash) && hashStore.count >= capacity {
try evictLeastRecentlyUsed()
}
use(hash: hash)
try file(for: hash).write(data)
}
}
extension PersistentCache {
private struct StringCannotBeStoredInCacheError : Error {
let string: String
let encoding: String.Encoding
}
public func load(key: Key, encoding: String.Encoding = .utf8) throws -> String? {
return try load(key: key).flatMap { String(data: $0, encoding: encoding) }
}
public func store(string: String, for key: Key, encoding: String.Encoding = .utf8) throws {
guard let data = string.data(using: encoding) else { throw StringCannotBeStoredInCacheError(string: string, encoding: encoding) }
try store(data: data, for: key)
}
}
extension PersistentCache {
private func file(for hash: Int) -> Path {
return folder + String(hash)
}
private func computeHash(of key: Key) -> Int {
var hasher = self.hasher
key.hash(into: &hasher)
return hasher.finalize()
}
private func use(hash: Int) {
hashStore.remove(hash)
hashStore.append(hash)
}
private func store() throws {
let data = Data(buffer: hashStore.rebound(to: UInt8.self))
try index.write(data)
}
private func evictLeastRecentlyUsed() throws {
let leastRecentlyUsed = hashStore.removeFirst()
let file = self.file(for: leastRecentlyUsed)
try file.delete()
}
}
extension Collection {
fileprivate func rebound<T>(to type: T.Type) -> UnsafeBufferPointer<T> {
let pointer = UnsafeMutableBufferPointer<Element>.allocate(capacity: count)
_ = pointer.initialize(from: self)
let rawPointer = UnsafeRawPointer(pointer.baseAddress!)
let size = count * MemoryLayout<Element>.size / MemoryLayout<T>.size
return UnsafeBufferPointer(start: rawPointer.assumingMemoryBound(to: type), count: size)
}
fileprivate func rebound<T>(to type: T.Type) -> [T] {
let pointer = self.rebound(to: type) as UnsafeBufferPointer<T>
let rebound = Array(pointer)
pointer.deallocate()
return rebound
}
}
extension Hasher {
// Stolen from https://github.com/apple/swift/blob/master/stdlib/public/core/SipHash.swift
// in order to replicate the exact format in bytes
private struct _State {
private var v0: UInt64 = 0x736f6d6570736575
private var v1: UInt64 = 0x646f72616e646f6d
private var v2: UInt64 = 0x6c7967656e657261
private var v3: UInt64 = 0x7465646279746573
private var v4: UInt64 = 0
private var v5: UInt64 = 0
private var v6: UInt64 = 0
private var v7: UInt64 = 0
}
fileprivate static func constantAccrossExecutions() -> Hasher {
let offset = MemoryLayout<Hasher>.size - MemoryLayout<_State>.size
var hasher = Hasher()
withUnsafeMutableBytes(of: &hasher) { pointer in
pointer.baseAddress!.storeBytes(of: _State(), toByteOffset: offset, as: _State.self)
}
return hasher
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment