-
-
Save austinzheng/d6c674780a58cb63832c4df3f809e683 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
let N = 1000000 | |
func generateTestData() -> [String] { | |
var a = [String]() | |
for _ in 0..<N { | |
a.append(",,abc, 123 ,x, , more more more,\u{A0}and yet more, ") | |
} | |
return a | |
} | |
func splitAndTrim(s: String, sep: UInt16) -> [String] { | |
var result = [String]() | |
result.reserveCapacity(10) | |
let space = NSCharacterSet.whitespaceAndNewlineCharacterSet() | |
let cs = s.utf16 | |
let eos = cs.endIndex | |
var begin = eos | |
var end = eos | |
for i in cs.startIndex..<eos { | |
let c = cs[i] | |
if c == sep { | |
result.append(begin == eos ? "" : String(cs[begin..<end.successor()])) | |
begin = eos | |
end = eos | |
} else if !space.characterIsMember(c) { | |
if begin == eos { | |
begin = i | |
} | |
end = i | |
} | |
} | |
result.append(begin == eos ? "" : String(cs[begin..<end.successor()])) | |
return result | |
} | |
func doSplits(data: [String]) -> Int { | |
var count = 0 | |
let sep = ",".utf16.first! | |
for s in data { | |
let parts = splitAndTrim(s, sep: sep) | |
count += parts.count | |
} | |
return count | |
} | |
let data = generateTestData() | |
let start = NSDate() | |
let sum = doSplits(data) | |
print("elapsed: \(NSDate().timeIntervalSinceDate(start))") | |
print("sum: \(sum)") | |
/* | |
And here's the Java code: | |
import java.util.List; | |
import java.util.ArrayList; | |
public class ParseTest { | |
static int N = 1000000; | |
public static void main(String[] args) { | |
List<String> data = generateTestData(); | |
for (int i = 0; i < 3; ++i) { | |
long start = System.currentTimeMillis(); | |
int sum = doSplits(data); | |
System.out.println("elapsed: " + | |
((System.currentTimeMillis() - start) / 1000.0 + " seconds")); | |
System.out.println("sum: " + sum); | |
} | |
} | |
static int doSplits(List<String> a) { | |
int count = 0; | |
for (String s : a) { | |
List<String> parts = splitAndTrim(s, ','); | |
count += parts.size(); | |
} | |
return count; | |
} | |
static List<String> splitAndTrim(String s, char sep) { | |
List<String> result = new ArrayList<>(10); | |
int eos = s.length(); | |
int begin = eos; | |
int end = eos; | |
for (int i = 0; i != eos; ++i) { | |
char c = s.charAt(i); | |
if (c == sep) { | |
result.add(begin == eos ? "" : s.substring(begin, end + 1)); | |
begin = eos; | |
end = eos; | |
} else if (!Character.isSpaceChar(c)) { | |
if (begin == eos) { | |
begin = i; | |
} | |
end = i; | |
} | |
} | |
result.add(begin == eos ? "" : s.substring(begin, end + 1)); | |
return result; | |
} | |
static List<String> generateTestData() { | |
List<String> a = new ArrayList<>(); | |
for (int i = 0; i < N; i++) { | |
a.add(",,abc, 123 ,x, , more more more,\u00A0and yet more, "); | |
} | |
return a; | |
} | |
} | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment