Created
October 12, 2014 17:48
-
-
Save ghidalgo3/dbba3b5d1ccd0f611896 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package s_mach.string | |
import scala.collection.mutable.ArrayBuffer | |
import scala.util.matching.Regex | |
trait WordSplitter { | |
def split(s: String) : Iterator[String] | |
/** | |
* This method handles an optional prefix on a split | |
* @param optPrefix optional prefix | |
* @param str string to split | |
* @param regex regex to split by. | |
* @return iterator of words that match the regex and the optional prefix prepended | |
*/ | |
protected def splitterAccumulate(optPrefix : Option[String], str : String, regex : Regex) : Iterator[String] = { | |
//TODO There's probably a less ugly way to do this | |
val accum = ArrayBuffer[String]() | |
optPrefix.collect{ case str => accum.append(str) } | |
regex.findAllIn(str).foreach { | |
w => accum.append(w) | |
} | |
accum.iterator | |
} | |
} | |
class WhitespaceWordSplitter extends WordSplitter { | |
import WordSplitter.whiteSpace | |
override def split(s: String): Iterator[String] = whiteSpace.split(s).iterator | |
} | |
class WhitespaceOrUnderscoreWordSplitter extends WordSplitter { | |
import WordSplitter.whiteSpaceOrUnderscores | |
override def split(s: String): Iterator[String] = whiteSpaceOrUnderscores.split(s).iterator | |
} | |
class CamelCaseWordSplitter extends WordSplitter { | |
import WordSplitter.{allLowerPrefix, capitalizedWord} | |
override def split(s: String): Iterator[String] = { | |
splitterAccumulate(allLowerPrefix.findFirstIn(s), s, capitalizedWord) | |
} | |
} | |
class PascalCaseWordSplitter extends WordSplitter { | |
import WordSplitter.{capitalizedWord} | |
override def split(s: String): Iterator[String] = { | |
splitterAccumulate(None, s, capitalizedWord) | |
} | |
} | |
object WordSplitter { | |
val whiteSpace = """\s+""".r | |
val whiteSpaceOrUnderscores = """(\s|_)+""".r | |
val allLowerPrefix = """[a-z_]+""".r | |
val capitalizedWord = """([A-Z]+[a-z_0-9]*)""".r | |
object Whitespace extends WhitespaceWordSplitter | |
object WhitespaceOrUnderscore extends WhitespaceOrUnderscoreWordSplitter | |
object CamelCase extends CamelCaseWordSplitter | |
object PascalCase extends PascalCaseWordSplitter | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment