Created
September 8, 2019 08:12
-
-
Save Jegge/597e2c75276b73a74f4104b37a385006 to your computer and use it in GitHub Desktop.
A TextReader reads single characters, full lines or complete texts from an URL or file with an arbitrary string encoding and line delimiter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// TextReader.swift | |
// | |
// Created by Sebastian Boettcher on 05.09.19. | |
// Copyright © 2019 Sebastian Boettcher. All rights reserved. | |
// | |
import Foundation | |
private func isReadable (fileHandle: FileHandle) -> Bool { | |
let flags = fcntl(fileHandle.fileDescriptor, F_GETFL) | |
return flags != -1 && ((flags & O_ACCMODE == O_RDONLY) || (flags & O_ACCMODE == O_RDWR)) | |
} | |
/// A `TextReader` can read single characters, full lines or complete texts from an `URL` or file | |
/// with an arbitrary `String.Encoding` and line delimiter. | |
/// | |
/// Most creation methods for `TextReader` cause the text reader object to take ownership of the associated file handle. | |
/// This means that the text reader object both creates the file handle and is responsible for closing it later, usually | |
/// when the text reader object itself is deallocated. If you want to use a text reader with a file handle that you | |
/// created, use the `init(fileHandle:closeOnDealloc:encoding:delimiter:)` method. Pass `false` for the `closeOnDealloc` | |
/// parameter if you want to deallocate the file handle object by yourself. | |
/// | |
/// When you supply your own file handle, it must be readable or else a precondition check will fail when creating | |
/// the text reader. | |
/// | |
/// Characterwise seeking is supported, albeit it is very slow due to the fact that there may be no one-to-one | |
/// relationship inbetween the offset in the file and the offset of a character in the text. Thus seeking requires | |
/// scanning the file. Don't do it on large files. | |
public class TextReader { | |
private let _chunkSize: Int = 4096 | |
private let _fileHandle: FileHandle | |
private var _dataBuffer: Data | |
private let _dataDelimiter: Data | |
private var _lineBuffer: String = "" | |
private let _closeOnDealloc: Bool | |
/// Gets the preferred encoding. | |
public let encoding: String.Encoding | |
/// Gets the delimiter used to determine line boundaries. | |
public let delimiter: String | |
/// Gets the current position in the text. | |
private(set) var offsetInText: UInt64 = 0 | |
/// Uses an existing file handle for reading. | |
/// | |
/// - Parameters: | |
/// - fileHandle: The file handle to use. If the handle is not readable, a precondition check if fail. | |
/// - closeOnDealloc: If set to `true`, the text reader takes ownership of the the fileHandle and closes | |
/// it when the text reader object itself gets deallocated. | |
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`. | |
/// - delimiter: The delimiter to separate the lines, defaults to "\n". | |
/// | |
/// - Precondition: `fileHandle` must be readable. | |
/// - Returns: A `TextReader` that can read text. | |
init (fileHandle: FileHandle, closeOnDealloc: Bool, encoding: String.Encoding = .utf8, delimiter: String = "\n") { | |
precondition(isReadable(fileHandle: fileHandle), "file handle is not readable") | |
self.encoding = encoding | |
self.delimiter = delimiter | |
self._fileHandle = fileHandle | |
self._closeOnDealloc = closeOnDealloc | |
self._dataBuffer = Data(capacity: self._chunkSize) | |
self._dataDelimiter = delimiter.data(using: encoding)! | |
} | |
/// Opens an URL for reading. | |
/// | |
/// - Parameters: | |
/// - url: The url to open for reading. | |
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`. | |
/// - delimiter: The delimiter to separate the lines, defaults to "\n". | |
/// | |
/// - Throws: `NSError` if the underlying `FileHandle` fails to open the file. | |
/// - Returns: A `TextReader` that can read text. | |
convenience init (from url: URL, encoding: String.Encoding = .utf8, delimiter: String = "\n") throws { | |
let file = try FileHandle(forReadingFrom: url) | |
self.init(fileHandle: file, closeOnDealloc: true, encoding: encoding, delimiter: delimiter) | |
} | |
/// Opens a file for reading. | |
/// | |
/// - Parameters: | |
/// - path: The path to the file to open for reading. | |
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`. | |
/// - delimiter: The delimiter to separate the lines, defaults to "\n". | |
/// | |
/// - Throws: `NSError` if the underlying `FileHandle` fails to open the file. | |
/// - Returns: A `TextReader` that can read text. | |
convenience init (atPath path: String, encoding: String.Encoding = .utf8, delimiter: String = "\n") throws { | |
let file = try FileHandle(forReadingFrom: URL(fileURLWithPath: path)) | |
self.init(fileHandle: file, closeOnDealloc: true, encoding: encoding, delimiter: delimiter) | |
} | |
deinit { | |
if self._closeOnDealloc { | |
self._fileHandle.closeFile() | |
} | |
} | |
private func readLineInternal () -> String? { | |
while true { | |
if let range = self._dataBuffer.range(of: self._dataDelimiter) { | |
let line = String(data: self._dataBuffer.subdata(in: 0..<range.lowerBound), encoding: self.encoding)! | |
self._dataBuffer.removeSubrange(0..<range.upperBound) | |
return line | |
} | |
let buffer = self._fileHandle.readData(ofLength: self._chunkSize) | |
if !buffer.isEmpty { | |
self._dataBuffer.append(buffer) | |
continue | |
} | |
if !_dataBuffer.isEmpty { | |
let line = String(data: self._dataBuffer, encoding: self.encoding) | |
self._dataBuffer.removeAll() | |
return line | |
} | |
return nil | |
} | |
} | |
private func readInternal (consume: Bool) -> Character? { | |
// serves the request out of the line buffer and replenishes it from the data buffer if needed | |
while true { | |
if let char = self._lineBuffer.first { | |
if consume { | |
self._lineBuffer = String(self._lineBuffer.dropFirst()) | |
self.offsetInText += 1 | |
} | |
return char | |
} | |
guard let line = self.readLineInternal() else { | |
return nil | |
} | |
self._lineBuffer = line + self.delimiter | |
} | |
} | |
/// Gets the next available `Character` to be read without changing the current file location. | |
/// - Returns: The next available `Character` or `nil` if no such `Character` exists. | |
func peek () -> Character? { | |
return self.readInternal(consume: false) | |
} | |
/// Reads the next available `Character`. | |
/// - Returns: The next available `Character` or `nil` if no such `Character` exists. | |
func read () -> Character? { | |
return self.readInternal(consume: true) | |
} | |
/// Reads the next available line. | |
/// - Returns: The next available line without delimiter as a `String` or `nil` if no such line exists. | |
func readLine () -> String? { | |
// first serve the request out of the line buffer, if it is not empty | |
if !self._lineBuffer.isEmpty { | |
let line = String(self._lineBuffer.dropLast()) | |
self._lineBuffer.removeAll() | |
self.offsetInText += UInt64(line.count + self.delimiter.count) | |
return line | |
} | |
// then serves the request out of the data buffer and replenishes it from the file | |
if let line = self.readLineInternal() { | |
self.offsetInText += UInt64(line.count + self.delimiter.count) | |
return line | |
} | |
return nil | |
} | |
/// Reads the remainder of the file. | |
/// - Returns: A `String` containing the remainder of the file or `nil` if nothing could be read. | |
func readToEndOfFile () -> String? { | |
var line: String? | |
// first serves the request from the line buffer (if not empty) | |
if !self._lineBuffer.isEmpty { | |
line = self._lineBuffer | |
self.offsetInText += UInt64(self._lineBuffer.count + self.delimiter.count) | |
self._lineBuffer.removeAll() | |
} | |
// then serves the request from the data buffer (if not empty) | |
if !self._dataBuffer.isEmpty, let rest = String(data: self._dataBuffer, encoding: self.encoding) { | |
line = (line ?? "") + rest | |
self.offsetInText += UInt64(rest.count) | |
} | |
// finally serves the request from the remainder of the file | |
if let rest = String(data: self._fileHandle.readDataToEndOfFile(), encoding: self.encoding) { | |
line = (line ?? "") + rest | |
self.offsetInText += UInt64(rest.count) | |
} | |
return line | |
} | |
/// Rewinds the file to the beginning. | |
func rewind () { | |
self.offsetInText = 0 | |
self._lineBuffer.removeAll() | |
self._dataBuffer.removeAll() | |
self._fileHandle.seek(toFileOffset: 0) | |
} | |
/// Seeks to the given character offset in the text. | |
/// This is painfully slow, since the file needs to be scanned again. | |
/// - Parameters: | |
/// - offset: the offset of the character in the text we want to seek to. | |
func seek (toTextOffset offset: UInt64) { | |
if offset == self.offsetInText { | |
return | |
} | |
if offset < self.offsetInText { | |
self.rewind() | |
} | |
while self.offsetInText < offset && self.read() != nil { } // a call to read() updates self.offsetInText | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment