Skip to content

Instantly share code, notes, and snippets.

@Jegge
Created September 8, 2019 08:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jegge/597e2c75276b73a74f4104b37a385006 to your computer and use it in GitHub Desktop.
Save Jegge/597e2c75276b73a74f4104b37a385006 to your computer and use it in GitHub Desktop.
A TextReader reads single characters, full lines or complete texts from an URL or file with an arbitrary string encoding and line delimiter
//
// TextReader.swift
//
// Created by Sebastian Boettcher on 05.09.19.
// Copyright © 2019 Sebastian Boettcher. All rights reserved.
//
import Foundation
private func isReadable (fileHandle: FileHandle) -> Bool {
let flags = fcntl(fileHandle.fileDescriptor, F_GETFL)
return flags != -1 && ((flags & O_ACCMODE == O_RDONLY) || (flags & O_ACCMODE == O_RDWR))
}
/// A `TextReader` can read single characters, full lines or complete texts from an `URL` or file
/// with an arbitrary `String.Encoding` and line delimiter.
///
/// Most creation methods for `TextReader` cause the text reader object to take ownership of the associated file handle.
/// This means that the text reader object both creates the file handle and is responsible for closing it later, usually
/// when the text reader object itself is deallocated. If you want to use a text reader with a file handle that you
/// created, use the `init(fileHandle:closeOnDealloc:encoding:delimiter:)` method. Pass `false` for the `closeOnDealloc`
/// parameter if you want to deallocate the file handle object by yourself.
///
/// When you supply your own file handle, it must be readable or else a precondition check will fail when creating
/// the text reader.
///
/// Characterwise seeking is supported, albeit it is very slow due to the fact that there may be no one-to-one
/// relationship inbetween the offset in the file and the offset of a character in the text. Thus seeking requires
/// scanning the file. Don't do it on large files.
public class TextReader {
private let _chunkSize: Int = 4096
private let _fileHandle: FileHandle
private var _dataBuffer: Data
private let _dataDelimiter: Data
private var _lineBuffer: String = ""
private let _closeOnDealloc: Bool
/// Gets the preferred encoding.
public let encoding: String.Encoding
/// Gets the delimiter used to determine line boundaries.
public let delimiter: String
/// Gets the current position in the text.
private(set) var offsetInText: UInt64 = 0
/// Uses an existing file handle for reading.
///
/// - Parameters:
/// - fileHandle: The file handle to use. If the handle is not readable, a precondition check if fail.
/// - closeOnDealloc: If set to `true`, the text reader takes ownership of the the fileHandle and closes
/// it when the text reader object itself gets deallocated.
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`.
/// - delimiter: The delimiter to separate the lines, defaults to "\n".
///
/// - Precondition: `fileHandle` must be readable.
/// - Returns: A `TextReader` that can read text.
init (fileHandle: FileHandle, closeOnDealloc: Bool, encoding: String.Encoding = .utf8, delimiter: String = "\n") {
precondition(isReadable(fileHandle: fileHandle), "file handle is not readable")
self.encoding = encoding
self.delimiter = delimiter
self._fileHandle = fileHandle
self._closeOnDealloc = closeOnDealloc
self._dataBuffer = Data(capacity: self._chunkSize)
self._dataDelimiter = delimiter.data(using: encoding)!
}
/// Opens an URL for reading.
///
/// - Parameters:
/// - url: The url to open for reading.
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`.
/// - delimiter: The delimiter to separate the lines, defaults to "\n".
///
/// - Throws: `NSError` if the underlying `FileHandle` fails to open the file.
/// - Returns: A `TextReader` that can read text.
convenience init (from url: URL, encoding: String.Encoding = .utf8, delimiter: String = "\n") throws {
let file = try FileHandle(forReadingFrom: url)
self.init(fileHandle: file, closeOnDealloc: true, encoding: encoding, delimiter: delimiter)
}
/// Opens a file for reading.
///
/// - Parameters:
/// - path: The path to the file to open for reading.
/// - encoding: The desired encoding, defaults to `String.Encoding.utf8`.
/// - delimiter: The delimiter to separate the lines, defaults to "\n".
///
/// - Throws: `NSError` if the underlying `FileHandle` fails to open the file.
/// - Returns: A `TextReader` that can read text.
convenience init (atPath path: String, encoding: String.Encoding = .utf8, delimiter: String = "\n") throws {
let file = try FileHandle(forReadingFrom: URL(fileURLWithPath: path))
self.init(fileHandle: file, closeOnDealloc: true, encoding: encoding, delimiter: delimiter)
}
deinit {
if self._closeOnDealloc {
self._fileHandle.closeFile()
}
}
private func readLineInternal () -> String? {
while true {
if let range = self._dataBuffer.range(of: self._dataDelimiter) {
let line = String(data: self._dataBuffer.subdata(in: 0..<range.lowerBound), encoding: self.encoding)!
self._dataBuffer.removeSubrange(0..<range.upperBound)
return line
}
let buffer = self._fileHandle.readData(ofLength: self._chunkSize)
if !buffer.isEmpty {
self._dataBuffer.append(buffer)
continue
}
if !_dataBuffer.isEmpty {
let line = String(data: self._dataBuffer, encoding: self.encoding)
self._dataBuffer.removeAll()
return line
}
return nil
}
}
private func readInternal (consume: Bool) -> Character? {
// serves the request out of the line buffer and replenishes it from the data buffer if needed
while true {
if let char = self._lineBuffer.first {
if consume {
self._lineBuffer = String(self._lineBuffer.dropFirst())
self.offsetInText += 1
}
return char
}
guard let line = self.readLineInternal() else {
return nil
}
self._lineBuffer = line + self.delimiter
}
}
/// Gets the next available `Character` to be read without changing the current file location.
/// - Returns: The next available `Character` or `nil` if no such `Character` exists.
func peek () -> Character? {
return self.readInternal(consume: false)
}
/// Reads the next available `Character`.
/// - Returns: The next available `Character` or `nil` if no such `Character` exists.
func read () -> Character? {
return self.readInternal(consume: true)
}
/// Reads the next available line.
/// - Returns: The next available line without delimiter as a `String` or `nil` if no such line exists.
func readLine () -> String? {
// first serve the request out of the line buffer, if it is not empty
if !self._lineBuffer.isEmpty {
let line = String(self._lineBuffer.dropLast())
self._lineBuffer.removeAll()
self.offsetInText += UInt64(line.count + self.delimiter.count)
return line
}
// then serves the request out of the data buffer and replenishes it from the file
if let line = self.readLineInternal() {
self.offsetInText += UInt64(line.count + self.delimiter.count)
return line
}
return nil
}
/// Reads the remainder of the file.
/// - Returns: A `String` containing the remainder of the file or `nil` if nothing could be read.
func readToEndOfFile () -> String? {
var line: String?
// first serves the request from the line buffer (if not empty)
if !self._lineBuffer.isEmpty {
line = self._lineBuffer
self.offsetInText += UInt64(self._lineBuffer.count + self.delimiter.count)
self._lineBuffer.removeAll()
}
// then serves the request from the data buffer (if not empty)
if !self._dataBuffer.isEmpty, let rest = String(data: self._dataBuffer, encoding: self.encoding) {
line = (line ?? "") + rest
self.offsetInText += UInt64(rest.count)
}
// finally serves the request from the remainder of the file
if let rest = String(data: self._fileHandle.readDataToEndOfFile(), encoding: self.encoding) {
line = (line ?? "") + rest
self.offsetInText += UInt64(rest.count)
}
return line
}
/// Rewinds the file to the beginning.
func rewind () {
self.offsetInText = 0
self._lineBuffer.removeAll()
self._dataBuffer.removeAll()
self._fileHandle.seek(toFileOffset: 0)
}
/// Seeks to the given character offset in the text.
/// This is painfully slow, since the file needs to be scanned again.
/// - Parameters:
/// - offset: the offset of the character in the text we want to seek to.
func seek (toTextOffset offset: UInt64) {
if offset == self.offsetInText {
return
}
if offset < self.offsetInText {
self.rewind()
}
while self.offsetInText < offset && self.read() != nil { } // a call to read() updates self.offsetInText
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment