Skip to content

Instantly share code, notes, and snippets.

@tamtam180
Created June 13, 2012 15:48
Show Gist options
  • Save tamtam180/2924907 to your computer and use it in GitHub Desktop.
Save tamtam180/2924907 to your computer and use it in GitHub Desktop.
なんか構造が美しくないのでリファクタしたい・・
public class StringUtils {
private static final char NO_BREAK_SPACE = '\u00A0';
private static final char NARROW_NOBREAK_SPACE = '\u202F';
private static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF';
private static final char WORD_JOINER = '\u2060';
private static final char FIGURE_SPACE = '\u2007';
public static boolean isSpace(char c) {
if (Character.isWhitespace(c)) {
return true;
}
// isWhitespace()で判定できない空白文字
switch (c) {
case NO_BREAK_SPACE:
case NARROW_NOBREAK_SPACE:
case ZERO_WIDTH_NOBREAK_SPACE:
case WORD_JOINER:
case FIGURE_SPACE:
return true;
}
return false;
}
public static CharSequence trim(CharSequence str) {
if (str == null) {
return null;
}
int len = str.length();
int head = 0, tail = len -1;
for (; head < len && isSpace(str.charAt(head)); head++);
if (head == len) {
return "";
}
for (; tail >= 0 && isSpace(str.charAt(tail)); tail--);
return (head == 0 && tail == len -1) ? str : str.subSequence(head, tail + 1);
}
public static List<String> tokenize(String str) {
ArrayList<String> tokens = new ArrayList<String>();
if (str == null || str.length() == 0) {
return tokens;
}
int i = 0;
char[] chars = str.toCharArray();
while (true) {
while (i < chars.length && isSpace(chars[i])) {
i++;
}
if (i >= chars.length) {
break;
}
StringBuilder buffer = new StringBuilder();
if (chars[i] == '"') {
i++; // SkipDQ
while (i < chars.length && chars[i] != '"') {
buffer.append(chars[i++]);
}
i++; // SkipDQ
} else {
while (i < chars.length && !isSpace(chars[i])) {
buffer.append(chars[i++]);
}
}
if (buffer.length() > 0) {
String token = trim(buffer).toString();
if (!token.isEmpty()) {
tokens.add(token);
}
}
}
return tokens;
}
}
@Test
public void トークン分割テスト_1() {
List<String> tokens = StringUtils.tokenize(null);
assertThat(tokens.size(), is(0));
}
@Test
public void トークン分割テスト_2() {
List<String> tokens = StringUtils.tokenize("");
assertThat(tokens.size(), is(0));
}
@Test
public void トークン分割テスト_3() {
List<String> tokens = StringUtils.tokenize("あ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ"));
}
@Test
public void トークン分割テスト_4() {
List<String> tokens = StringUtils.tokenize(" あ ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ"));
}
@Test
public void トークン分割テスト_5() {
List<String> tokens = StringUtils.tokenize(" あ ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ"));
}
@Test
public void トークン分割テスト_6() {
List<String> tokens = StringUtils.tokenize(" あ い ");
assertThat(tokens.size(), is(2));
assertThat(tokens.get(0), is("あ"));
assertThat(tokens.get(1), is("い"));
}
@Test
public void トークン分割テスト_7() {
List<String> tokens = StringUtils.tokenize(" \"あ い ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ い"));
}
@Test
public void トークン分割テスト_8() {
List<String> tokens = StringUtils.tokenize(" \"あ い \" う え お ");
assertThat(tokens.size(), is(4));
assertThat(tokens.get(0), is("あ い"));
assertThat(tokens.get(1), is("う"));
assertThat(tokens.get(2), is("え"));
assertThat(tokens.get(3), is("お"));
}
@Test
public void トークン分割テスト_9() {
List<String> tokens = StringUtils.tokenize(" \"あ い \" う え お \"");
assertThat(tokens.size(), is(4));
assertThat(tokens.get(0), is("あ い"));
assertThat(tokens.get(1), is("う"));
assertThat(tokens.get(2), is("え"));
assertThat(tokens.get(3), is("お"));
}
@Test
public void トークン分割テスト_10() {
List<String> tokens = StringUtils.tokenize(" \"あ い う え お \"");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ い う え お"));
}
@Test
public void トークン分割テスト_11() {
List<String> tokens = StringUtils.tokenize(" \" あ い う え お \" ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ い う え お"));
}
@Test
public void トークン分割テスト_12() {
List<String> tokens = StringUtils.tokenize(" \"あ い う え お\"\"");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ い う え お"));
}
@Test
public void トークン分割テスト_13() {
List<String> tokens = StringUtils.tokenize(" \"あ い う え お\"\" ");
assertThat(tokens.size(), is(1));
assertThat(tokens.get(0), is("あ い う え お"));
}
@tamtam180
Copy link
Author

小宮氏のをいったんコピペでもってきた

@tamtam180
Copy link
Author

処理を追加していったら衰退してしまった・・。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment