Created
June 13, 2012 15:48
-
-
Save tamtam180/2924907 to your computer and use it in GitHub Desktop.
なんか構造が美しくないのでリファクタしたい・・
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class StringUtils { | |
private static final char NO_BREAK_SPACE = '\u00A0'; | |
private static final char NARROW_NOBREAK_SPACE = '\u202F'; | |
private static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF'; | |
private static final char WORD_JOINER = '\u2060'; | |
private static final char FIGURE_SPACE = '\u2007'; | |
public static boolean isSpace(char c) { | |
if (Character.isWhitespace(c)) { | |
return true; | |
} | |
// isWhitespace()で判定できない空白文字 | |
switch (c) { | |
case NO_BREAK_SPACE: | |
case NARROW_NOBREAK_SPACE: | |
case ZERO_WIDTH_NOBREAK_SPACE: | |
case WORD_JOINER: | |
case FIGURE_SPACE: | |
return true; | |
} | |
return false; | |
} | |
public static CharSequence trim(CharSequence str) { | |
if (str == null) { | |
return null; | |
} | |
int len = str.length(); | |
int head = 0, tail = len -1; | |
for (; head < len && isSpace(str.charAt(head)); head++); | |
if (head == len) { | |
return ""; | |
} | |
for (; tail >= 0 && isSpace(str.charAt(tail)); tail--); | |
return (head == 0 && tail == len -1) ? str : str.subSequence(head, tail + 1); | |
} | |
public static List<String> tokenize(String str) { | |
ArrayList<String> tokens = new ArrayList<String>(); | |
if (str == null || str.length() == 0) { | |
return tokens; | |
} | |
int i = 0; | |
char[] chars = str.toCharArray(); | |
while (true) { | |
while (i < chars.length && isSpace(chars[i])) { | |
i++; | |
} | |
if (i >= chars.length) { | |
break; | |
} | |
StringBuilder buffer = new StringBuilder(); | |
if (chars[i] == '"') { | |
i++; // SkipDQ | |
while (i < chars.length && chars[i] != '"') { | |
buffer.append(chars[i++]); | |
} | |
i++; // SkipDQ | |
} else { | |
while (i < chars.length && !isSpace(chars[i])) { | |
buffer.append(chars[i++]); | |
} | |
} | |
if (buffer.length() > 0) { | |
String token = trim(buffer).toString(); | |
if (!token.isEmpty()) { | |
tokens.add(token); | |
} | |
} | |
} | |
return tokens; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Test | |
public void トークン分割テスト_1() { | |
List<String> tokens = StringUtils.tokenize(null); | |
assertThat(tokens.size(), is(0)); | |
} | |
@Test | |
public void トークン分割テスト_2() { | |
List<String> tokens = StringUtils.tokenize(""); | |
assertThat(tokens.size(), is(0)); | |
} | |
@Test | |
public void トークン分割テスト_3() { | |
List<String> tokens = StringUtils.tokenize("あ"); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ")); | |
} | |
@Test | |
public void トークン分割テスト_4() { | |
List<String> tokens = StringUtils.tokenize(" あ "); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ")); | |
} | |
@Test | |
public void トークン分割テスト_5() { | |
List<String> tokens = StringUtils.tokenize(" あ "); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ")); | |
} | |
@Test | |
public void トークン分割テスト_6() { | |
List<String> tokens = StringUtils.tokenize(" あ い "); | |
assertThat(tokens.size(), is(2)); | |
assertThat(tokens.get(0), is("あ")); | |
assertThat(tokens.get(1), is("い")); | |
} | |
@Test | |
public void トークン分割テスト_7() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い "); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ い")); | |
} | |
@Test | |
public void トークン分割テスト_8() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い \" う え お "); | |
assertThat(tokens.size(), is(4)); | |
assertThat(tokens.get(0), is("あ い")); | |
assertThat(tokens.get(1), is("う")); | |
assertThat(tokens.get(2), is("え")); | |
assertThat(tokens.get(3), is("お")); | |
} | |
@Test | |
public void トークン分割テスト_9() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い \" う え お \""); | |
assertThat(tokens.size(), is(4)); | |
assertThat(tokens.get(0), is("あ い")); | |
assertThat(tokens.get(1), is("う")); | |
assertThat(tokens.get(2), is("え")); | |
assertThat(tokens.get(3), is("お")); | |
} | |
@Test | |
public void トークン分割テスト_10() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い う え お \""); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ い う え お")); | |
} | |
@Test | |
public void トークン分割テスト_11() { | |
List<String> tokens = StringUtils.tokenize(" \" あ い う え お \" "); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ い う え お")); | |
} | |
@Test | |
public void トークン分割テスト_12() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い う え お\"\""); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ い う え お")); | |
} | |
@Test | |
public void トークン分割テスト_13() { | |
List<String> tokens = StringUtils.tokenize(" \"あ い う え お\"\" "); | |
assertThat(tokens.size(), is(1)); | |
assertThat(tokens.get(0), is("あ い う え お")); | |
} |
わかりやすくするとか言っておいて、連番なので意味ないですね。
小宮氏のをいったんコピペでもってきた
処理を追加していったら衰退してしまった・・。
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
テスト追加。
テストケースのメソッド名を日本語にする事で何のテストかわかりやすくするライフハック。良い子は真似しちゃだめだとおもいます。