jpsim/LineEndsFind.mm

## README.md

      
    Raw
  

              README.md
            
          
    With "👨‍👩‍👧‍👦\n1\n2\n" as input:
$ swiftc LineEndsFind.swift && ./LineEndsFind
Unicode unsafe time for 1M strings: 1.12317534297472
Unicode safe time for 1M strings: 3.4386172790546
Unicode unsafe result: [11, 13, 15]
Unicode safe result: [1, 3, 5]
$ clang++ LineEndsFind.mm -ObjC++ -std=c++14 -fobjc-arc -framework QuartzCore -o main && ./main
Unicode unsafe time for 1M strings: 0.876858
Unicode unsafe result: 11 13 15


## LineEndsFind.mm
#import <Foundation/Foundation.h>
#import <QuartzCore/QuartzCore.h>
#import <iostream>
#import <vector> // Needed for gist to compile.
#pragma mark - Pure Implementation Functions
const static unichar kUTF16Newline = (unichar)'\n'; // old naming habits die hard!
/**
 * Calculates an array of line end "positions" for a given string.
 * The equivalent Swift function was `(String) -> [Int]` or `(NSString) -> [Int]`
 *
 * In this context a "position" is the zero-based index of a newline
 * character in the string as if it were an array of UTF-16 codepoints.
 *
 * @param s the string.
 * @return: an array of newline positions.
 */
std::vector<size_t> LineEndsFind(NSString* s) {
    assert(s);

    std::vector<size_t> lineEnds;
    unichar *const start = (unichar *)[s cStringUsingEncoding:NSUTF16StringEncoding];
    unichar *current = start;
    while (*current != 0) {
        unichar c = *current;
        if (c == kUTF16Newline) {
            lineEnds.push_back(current - start);
        }
        current++;
    }
    return lineEnds;
}

int main() {
    auto t1 = CACurrentMediaTime();
    for (int i = 0; i < 1'000'000; ++i) {
        LineEndsFind(@"👨‍👩‍👧‍👦\n1\n2\n");
    }
    auto t2 = CACurrentMediaTime();
    auto duration = t2 -t1;
    std::cout << "Unicode unsafe time for 1M strings: " << duration << '\n';

    std::cout << "Unicode unsafe result: ";
    for (const auto & pos : LineEndsFind(@"👨‍👩‍👧‍👦\n1\n2\n")) {
        std::cout << pos << ' ';
    }
    std::cout << '\n';

    return 0;
}

## LineEndsFind.swift
import Foundation
import QuartzCore

// Fast unicode unsafe implementation
public func makeLineEndArray(string: NSString) -> [Int] {
    var lineEnds = [Int]()
    for i in 0..<string.length where string.character(at: i) == 10 {
        lineEnds.append(i)
    }
    return lineEnds
}

// Slow unicode-safe implementation
public func makeUnicodeSafeLineEndArray(string: String) -> [Int] {
    var lineEnds = [Int]()
    for (index, char) in string.enumerated() where char == "\n" {
        lineEnds.append(index)
    }
    return lineEnds

    /* Even slower functional approach:
    return string.enumerated()
        .filter { $0.1 == "\n" }
        .map { $0.0 }
    */
}

do {
  let t1 = CACurrentMediaTime()
  for _ in 0..<1_000_000 {
      _ = makeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n")
  }
  let t2 = CACurrentMediaTime()
  print("Unicode unsafe time for 1M strings: \(t2-t1)")
}

do {
  let t1 = CACurrentMediaTime()
  for _ in 0..<1_000_000 {
      _ = makeUnicodeSafeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n")
  }
  let t2 = CACurrentMediaTime()
  print("Unicode safe time for 1M strings: \(t2-t1)")
}

print("Unicode unsafe result: \(makeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n"))")
print("Unicode safe result: \(makeUnicodeSafeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n"))")

## LineEndsFind2.swift
    /// 2nd more Swifty attempt UTF16 view.
    /// Didn't return just for testiing
    public class func makeLineEndArray2(string: String) {
        precondition(!string.isEmpty)
        var lineEnds = [Int]()

        for i in 0..<string.utf16.count {
            if string.utf16[String.UTF16View.Index(i)] == 10 {
                lineEnds.append(i)
            }
        }
        print("From swift: lineEndsCount = \(lineEnds.count)")
    }
	#import <Foundation/Foundation.h>
	#import <QuartzCore/QuartzCore.h>
	#import <iostream>
	#import <vector> // Needed for gist to compile.
	#pragma mark - Pure Implementation Functions
	const static unichar kUTF16Newline = (unichar)'\n'; // old naming habits die hard!
	/**
	* Calculates an array of line end "positions" for a given string.
	* The equivalent Swift function was `(String) -> [Int]` or `(NSString) -> [Int]`
	*
	* In this context a "position" is the zero-based index of a newline
	* character in the string as if it were an array of UTF-16 codepoints.
	*
	* @param s the string.
	* @return: an array of newline positions.
	*/
	std::vector<size_t> LineEndsFind(NSString* s) {
	assert(s);

	std::vector<size_t> lineEnds;
	unichar const start = (unichar )[s cStringUsingEncoding:NSUTF16StringEncoding];
	unichar *current = start;
	while (*current != 0) {
	unichar c = *current;
	if (c == kUTF16Newline) {
	lineEnds.push_back(current - start);
	}
	current++;
	}
	return lineEnds;
	}

	int main() {
	auto t1 = CACurrentMediaTime();
	for (int i = 0; i < 1'000'000; ++i) {
	LineEndsFind(@"👨‍👩‍👧‍👦\n1\n2\n");
	}
	auto t2 = CACurrentMediaTime();
	auto duration = t2 -t1;
	std::cout << "Unicode unsafe time for 1M strings: " << duration << '\n';

	std::cout << "Unicode unsafe result: ";
	for (const auto & pos : LineEndsFind(@"👨‍👩‍👧‍👦\n1\n2\n")) {
	std::cout << pos << ' ';
	}
	std::cout << '\n';

	return 0;
	}
	import Foundation
	import QuartzCore

	// Fast unicode unsafe implementation
	public func makeLineEndArray(string: NSString) -> [Int] {
	var lineEnds = [Int]()
	for i in 0..<string.length where string.character(at: i) == 10 {
	lineEnds.append(i)
	}
	return lineEnds
	}

	// Slow unicode-safe implementation
	public func makeUnicodeSafeLineEndArray(string: String) -> [Int] {
	var lineEnds = [Int]()
	for (index, char) in string.enumerated() where char == "\n" {
	lineEnds.append(index)
	}
	return lineEnds

	/* Even slower functional approach:
	return string.enumerated()
	.filter { $0.1 == "\n" }
	.map { $0.0 }
	*/
	}

	do {
	let t1 = CACurrentMediaTime()
	for _ in 0..<1_000_000 {
	_ = makeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n")
	}
	let t2 = CACurrentMediaTime()
	print("Unicode unsafe time for 1M strings: \(t2-t1)")
	}

	do {
	let t1 = CACurrentMediaTime()
	for _ in 0..<1_000_000 {
	_ = makeUnicodeSafeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n")
	}
	let t2 = CACurrentMediaTime()
	print("Unicode safe time for 1M strings: \(t2-t1)")
	}

	print("Unicode unsafe result: \(makeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n"))")
	print("Unicode safe result: \(makeUnicodeSafeLineEndArray(string: "👨‍👩‍👧‍👦\n1\n2\n"))")
	/// 2nd more Swifty attempt UTF16 view.
	/// Didn't return just for testiing
	public class func makeLineEndArray2(string: String) {
	precondition(!string.isEmpty)
	var lineEnds = [Int]()

	for i in 0..<string.utf16.count {
	if string.utf16[String.UTF16View.Index(i)] == 10 {
	lineEnds.append(i)
	}
	}
	print("From swift: lineEndsCount = \(lineEnds.count)")
	}