Created
July 28, 2011 19:55
-
-
Save dcsobral/1112401 to your computer and use it in GitHub Desktop.
Faster char split
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Since this link got passed around some, I'm going to paste here the | |
// versions made by sirthias. If you want to see the old ones, | |
// look up the history. | |
// Well, actually, these are non-pimped versions. See the comments to | |
// a link where they are provided as pimped methods on String. | |
import scala.annotation.tailrec | |
def fastSplit(s: String, delimiter: Char): List[String] = { | |
@tailrec | |
def split(end: Int, elements: List[String]): List[String] = { | |
val ix = s.lastIndexOf(delimiter, end - 1) | |
if (ix < 0) | |
s.substring(0, end) :: elements | |
else | |
split(ix, s.substring(ix + 1, end) :: elements) | |
} | |
split(s.length, Nil) | |
} | |
// And this one produces a Stream, so you don't need to split any more segments than you need. | |
def lazySplit(s: String, delimiter: Char): Stream[String] = { // based on an implemented by Jed Wesley-Smith | |
def split(start: Int): Stream[String] = { | |
val ix = s.indexOf(delimiter, start) | |
if (ix < 0) | |
Stream.cons(s.substring(start), Stream.Empty) | |
else | |
Stream.cons(s.substring(start, ix), split(ix + 1)) | |
} | |
split(0) | |
} | |
// This produces an Iterator, which is faster than Stream | |
def splitIterator(s: String, delimiter: Char): Iterator[String] = new Iterator[String] { | |
private var hasNextFlag = true | |
var start = 0 | |
def hasNext = hasNextFlag | |
def next = | |
if (hasNext) { | |
val ix = s.indexOf(delimiter, start) | |
val result = if (ix < 0) { | |
hasNextFlag = false | |
s.substring(start) | |
} else s.substring(start, ix) | |
start = ix + 1 | |
result | |
} else throw new java.util.NoSuchElementException("next on empty iterator") | |
} |
You can make the eager version of this even cleaner and more efficient by searching for the delimiter from the back of the string rather than the front.
I just committed a polished version of these two implementations (with added tests) to spray:
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
didn't realise about the forking actually, nice!
yeah, this one can have nicer memory usage particularly if you don't iterate across all elements