Skip to content

Instantly share code, notes, and snippets.

@s-aska
Created June 23, 2016 09:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save s-aska/63dc5cf620c12d2b372cee3d517a64c8 to your computer and use it in GitHub Desktop.
Save s-aska/63dc5cf620c12d2b372cee3d517a64c8 to your computer and use it in GitHub Desktop.
Scanner
//
// DataScanner.swift
// DataScanner
//
// Created by Shinichiro Aska on 8/20/15.
// Copyright © 2015 Shinichiro Aska. All rights reserved.
//
import Foundation
/// A simple text scanner which can parse NSMutableData using delimiter module, but as a class.
public class DataScanner {
/// Constants to use nextLine() and hasNextLine().
struct Static {
static let dataCR = "\r".data(using: String.Encoding.utf8)!
static let dataLF = "\n".data(using: String.Encoding.utf8)!
static let options = Data.SearchOptions(rawValue: 0)
}
/// buffer.
public var data: Data
/// delimiter to use with no arguments next() and hasNext().
public var delimiter: Data?
/**
Create A MutableDataScanner Instance
*/
public init() {
self.data = Data()
self.delimiter = nil
}
/**
Create A MutableDataScanner Instance
- parameter delimiter: to use with no arguments next() and hasNext().
*/
public init(delimiter: Data) {
self.data = Data()
self.delimiter = delimiter
}
/**
Create A MutableDataScanner Instance
- parameter delimiter: to use with no arguments next() and hasNext().
*/
public init(delimiter: String) {
self.data = Data()
self.delimiter = delimiter.data(using: String.Encoding.utf8)!
}
/**
Appends the content of another Data object to the buffer.
The data object whose content is to be appended to the contents of the buffer.
- parameter data: Data to be added to the buffer.
*/
public func append(_ data: Data) {
self.data.append(data)
}
/**
It returns data for the specified length from the specified read start position,
and then removed from the buffer.
- parameter offset: reading start position
- parameter length: reading data length
- returns: Data of specified length
*/
public func read(offset: Int, length: Int) -> Data? {
if offset > data.count {
return nil
}
let length = min(length, data.count - offset)
let chunk = data.subdata(in: offset..<offset + length)
data.replaceBytes(in: 0..<offset + length, with: Data())
return chunk
}
/**
It returns data for the specified length, and then removed from the buffer.
- parameter length: reading data length
- returns: Data of specified length
*/
public func read(length: Int) -> Data? {
if data.count == 0 {
return nil
}
let length = min(length, data.count)
let line = data.subdata(in: 0..<length)
data.replaceBytes(in: 0..<length, with: Data())
return line
}
/**
Returns true if it contains a delimiter in buffer.
- returns: true if it contains a delimiter in buffer.
*/
public func hasNext() -> Bool {
guard let delimiter = delimiter else {
fatalError("hasNext() need delimiter."
+ " eg: MutableDataScanner(delimiter: Data or String)")
}
return self.hasNext(delimiter)
}
/**
It returns the data to the next delimiter, and removes it from the buffer.
If there is no delimiter in the buffer, it returns nil.
It does not include delimiter in the data.
- returns: data to the next delimiter.
*/
public func next() -> Data? {
guard let delimiter = delimiter else {
fatalError("next() need delimiter. eg: MutableDataScanner(delimiter: Data or String)")
}
return self.next(delimiter)
}
/**
Returns true if it contains a delimiter in buffer.
- parameter delimiter: delimiter data
- returns: true if it contains a delimiter in buffer.
*/
public func hasNext(_ delimiter: String) -> Bool {
guard let delimiter = delimiter.data(using: String.Encoding.utf8) else {
fatalError("dataUsingEncoding(NSUTF8StringEncoding) failure.")
}
return self.hasNext(delimiter)
}
/**
It returns the data to the next delimiter, and removes it from the buffer.
If there is no delimiter in the buffer, it returns nil.
It does not include delimiter in the data.
- parameter delimiter: delimiter data
- returns: data to the next delimiter.
*/
public func next(_ delimiter: String) -> Data? {
guard let delimiter = delimiter.data(using: String.Encoding.utf8) else {
fatalError("dataUsingEncoding(NSUTF8StringEncoding) failure.")
}
return self.next(delimiter)
}
/**
Returns true if it contains a delimiter in buffer.
- parameter delimiter: delimiter data
- returns: true if it contains a delimiter in buffer.
*/
public func hasNext(_ delimiter: Data) -> Bool {
guard let _ = data.range(of: delimiter) else {
return false
}
return true
}
/**
It returns the data to the next delimiter, and removes it from the buffer.
If there is no delimiter in the buffer, it returns nil.
It does not include delimiter in the data.
- parameter delimiter: delimiter data
- returns: data to the next delimiter.
*/
public func next(_ delimiter: Data) -> Data? {
guard let range = data.range(of: delimiter) else {
return nil
}
let line = data.subdata(in: 0..<range.lowerBound)
data.replaceBytes(in: 0..<range.upperBound, with: Data())
return line
}
/**
Returns true if the buffer there is a line break
It considers the CRLF or LF and line feed.
- returns: true if the buffer there is a line break
*/
public func hasNextLine() -> Bool {
return data.range(of: Static.dataLF) != nil
}
/**
It returns the following line, and then removed from the buffer.
If there is no new line in the buffer, it returns nil.
It considers the CRLF or LF and line feed.
- returns: the next line
*/
public func nextLine() -> Data? {
guard let range = data.range(of: Static.dataLF) else {
return nil
}
let line: Data
let rcRange = data.range(of: Static.dataCR,
options: Static.options,
in: range.lowerBound - 1..<range.lowerBound)
if rcRange != nil {
line = data.subdata(in: 0..<range.lowerBound - 1)
} else {
line = data.subdata(in: 0..<range.lowerBound)
}
data.replaceBytes(in: 0..<range.upperBound, with: Data())
return line
}
}
//
// DataScannerTests.swift
// DataScannerTests
//
// Created by Shinichiro Aska on 8/20/15.
// Copyright © 2015 Shinichiro Aska. All rights reserved.
//
import XCTest
import MutableDataScanner
class DataScannerTests: XCTestCase {
override func setUp() {
super.setUp()
}
override func tearDown() {
super.tearDown()
}
func testInitDefault() {
let scanner = DataScanner()
XCTAssertEqual(scanner.delimiter, nil)
}
func testInitWithDelimiterData() {
let scanner = DataScanner(delimiter: "\t".dataValue)
XCTAssertEqual(scanner.delimiter?.stringValue, "\t")
}
func testInitWithDelimiterString() {
let scanner = DataScanner(delimiter: "\t")
XCTAssertEqual(scanner.delimiter?.stringValue, "\t")
}
func testReadLength() {
let scanner = DataScanner()
let data = "0123456789abcdefghijklmnopqrstuvwxyz".dataValue
scanner.append(data)
XCTAssertEqual(scanner.read(length: 10)!.stringValue, "0123456789", "read length")
XCTAssertEqual(scanner.read(length: 10)!.stringValue, "abcdefghij", "read length")
XCTAssertEqual(scanner.read(length: scanner.data.count)!.stringValue,
"klmnopqrstuvwxyz", "read length")
XCTAssertEqual(scanner.read(length: 1), nil)
}
func testReadLengthOver() {
let scanner = DataScanner()
let data = "012345".dataValue
scanner.append(data)
XCTAssertEqual(scanner.read(length: 100)!.stringValue, "012345", "read length")
XCTAssertEqual(scanner.read(length: 1), nil)
}
func testReadOffsetLength() {
let scanner = DataScanner()
let data = "0123456789abcdefghijklmnopqrstuvwxyz".dataValue
scanner.append(data)
XCTAssertEqual(scanner.read(offset: 3, length: 7)!.stringValue, "3456789", "read length")
XCTAssertEqual(scanner.read(offset: 3, length: 7)!.stringValue, "defghij", "read length")
XCTAssertEqual(scanner.read(offset: 3, length: scanner.data.count)!.stringValue,
"nopqrstuvwxyz", "read length")
XCTAssertEqual(scanner.read(offset: 3, length: 1), nil)
}
func testNextLine() {
let scanner = DataScanner()
let data = "1\n1\r\n1\r1".dataValue
var count = 0
scanner.append(data)
while let line = scanner.nextLine() {
XCTAssertEqual(line.stringValue, "1")
count += 1
}
XCTAssertEqual(count, 2, "data count")
XCTAssertEqual(scanner.data.count, 3, "buffer length")
}
func testNext() {
let scanner = DataScanner(delimiter: "\r\n")
let data = "012345\nabcdefg\r\n".dataValue
var count = 0
scanner.append(data)
while let line = scanner.next() {
XCTAssertEqual(line.stringValue, "012345\nabcdefg")
count += 1
}
XCTAssertEqual(count, 1, "data count")
XCTAssertEqual(scanner.data.count, 0, "buffer length")
}
func testPerformanceAutoDelimiter() {
self.measure {
let scanner = DataScanner()
let data = "12345\n12345\r\n12345\r12345\n12345\r\n12345".dataValue
for _ in 1...10000 {
scanner.append(data)
while let _ = scanner.nextLine() {
}
}
}
}
func testPerformanceSpecificDelimiter() {
self.measure {
let scanner = DataScanner(delimiter: "\n")
let data = "12345\n12345\r\n12345\r12345\n12345\r\n12345".dataValue
for _ in 1...10000 {
scanner.append(data)
while let _ = scanner.next() {
}
}
}
}
func testPerformanceSplitReader() {
let data = "12345\n12345\r\n12345\r12345\n12345\r\n12345".dataValue
var buffer = Data()
self.measure {
for _ in 1...10000 {
buffer.append(data)
if let string = NSString(data: buffer, encoding: String.Encoding.utf8.rawValue) {
var array = string.components(separatedBy: "\n")
if let last = array.popLast()?.data(using: String.Encoding.utf8)! {
buffer = last
} else {
buffer = Data()
}
for _ in array {
}
}
}
}
}
}
private extension Data {
var stringValue: NSString {
return NSString(data: self, encoding: String.Encoding.utf8.rawValue)!
}
}
private extension String {
var dataValue: Data {
return self.data(using: String.Encoding.utf8)!
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment