Skip to content

Instantly share code, notes, and snippets.

@acerosalazar
Last active July 7, 2018 10:11
Show Gist options
  • Save acerosalazar/5749f338a7daf903db7266f146768b62 to your computer and use it in GitHub Desktop.
Save acerosalazar/5749f338a7daf903db7266f146768b62 to your computer and use it in GitHub Desktop.
A bidirectional, memory efficient file reader. Good for uses cases where only a portion of the file needs to be read – e.g read last 10 lines of the file.
import Foundation
// MARK: -
class FileReader: Sequence, IteratorProtocol {
// MARK: - Private Members
private let file: URL
private let delimiter: Data
private let chunkSize: Int
private let maxChunks: Int
private let readBackwards: Bool
private let encoding: String.Encoding
private let fileHandle: FileHandle
private var buffer: Data
// MARK: - Initializers
init(file: URL,
delimiter: String = "\n",
encoding: String.Encoding = .utf8,
chunkSize: Int = 4_096,
maxChunks: Int = 2,
fromEndOfFile: Bool = false) throws {
self.file = file
self.delimiter = delimiter.data(using: .utf8)!
self.encoding = encoding
self.chunkSize = chunkSize
self.maxChunks = maxChunks
self.readBackwards = fromEndOfFile
self.buffer = Data()
self.fileHandle = try FileHandle(forReadingFrom: file)
self.fileHandle.seek(toFileOffset: readBackwards ? .eof : .bof)
}
// MARK: - IteratorProtocol
func next() -> String? {
var nextLine: String?
(buffer, nextLine) = { () -> (Data, String?) in
for attempts in 0..<(maxChunks + 1) {
if let range = buffer.range(of: delimiter, options: readBackwards ? .backwards : []) {
let (lhs, rhs) = buffer.split(byRange: range)
return readBackwards ? (lhs, rhs.string()) : (rhs, lhs.string())
} else if attempts < maxChunks {
loadBuffer()
guard buffer.count > 0 else { return (Data(), nil) }
} else {
return (Data(), buffer.string(encoding: encoding))
}
}
return (Data(), nil)
}()
return nextLine
}
// MARK: - Private API
private func loadBuffer() {
buffer = { () -> Data in
if readBackwards {
guard fileHandle.offsetInFile > 0 else { return buffer }
let offset = UInt64(Swift.max(0, Int(fileHandle.offsetInFile) - chunkSize))
let length = Int(fileHandle.offsetInFile - offset)
return fileHandle.readData(fromOffset: offset, ofLength: length, withRewind: true) + buffer
} else {
let offset = fileHandle.offsetInFile
let length = chunkSize
return buffer + fileHandle.readData(fromOffset: offset, ofLength: length, withRewind: false)
}
}()
}
}
// MARK: -
extension Data {
func split(byRange range: Range<Data.Index>) -> (lhs: Data, rhs: Data) {
let lhs = prefix(upTo: range.lowerBound)
let rhs = suffix(from: range.upperBound)
return (lhs, rhs)
}
func string(encoding: String.Encoding = .utf8) -> String? {
return String(data: self, encoding: encoding)
}
}
// MARK: -
extension FileHandle {
enum FileOffset {
case eof
case bof
}
func seek(toFileOffset offset: FileOffset) {
switch offset {
case .eof:
seekToEndOfFile()
case .bof:
seek(toFileOffset: 0)
}
}
func readData(fromOffset offset: UInt64, ofLength length: Int, withRewind rewind: Bool) -> Data {
seek(toFileOffset: offset)
let data = readData(ofLength: length)
if rewind { seek(toFileOffset: offset) }
return data
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment