Created
August 16, 2013 12:58
-
-
Save tomoyamkung/6249745 to your computer and use it in GitHub Desktop.
[Java]LineIterator の isValidLine() メソッドをオーバーライドしたサンプル
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.tomoyamkung.commons.io; | |
import static org.hamcrest.CoreMatchers.*; | |
import static org.junit.Assert.*; | |
import java.io.FileNotFoundException; | |
import java.io.FileReader; | |
import java.io.Reader; | |
import org.apache.commons.io.LineIterator; | |
import org.junit.Test; | |
public class LineIteratorTest { | |
@Test | |
public void コメント行を読み飛ばすLineIterator() throws FileNotFoundException { | |
// Setup | |
FileReader reader = new FileReader("testdata/commons_io/lineiteratortest/test1.csv"); | |
// Exercise | |
CommentSkipLineIterator sut = new CommentSkipLineIterator(reader); | |
// Verify | |
String actual = sut.nextLine(); | |
assertThat("1行目のコメント行が読み飛ばされること", actual, is("1, aaa, あああ, 2013/08/01 10:00:00")); | |
actual = sut.nextLine(); | |
assertThat(actual, is("2, bbb, かかか, 2013/08/02 11:00:00")); | |
actual = sut.nextLine(); | |
assertThat("4行目のコメント行が読み飛ばされること", actual, is("4, ddd, たたた, 2013/08/04 13:00:00")); | |
actual = sut.nextLine(); | |
assertThat("6行目のコメント行が読み飛ばされること", actual, is("6, fff, ははは, 2013/08/06 15:00:00")); | |
} | |
/** | |
* "#" から始まる行を読み飛ばす LineIterator クラス。 | |
* | |
* @author tomoyamkung | |
* | |
*/ | |
class CommentSkipLineIterator extends LineIterator { | |
/** | |
* コンストラクタ。 | |
* | |
* @param reader | |
* @throws IllegalArgumentException ファイルの指定が正しくない場合 | |
*/ | |
public CommentSkipLineIterator(Reader reader) | |
throws IllegalArgumentException { | |
super(reader); | |
} | |
/* (non-Javadoc) | |
* @see org.apache.commons.io.LineIterator#isValidLine(java.lang.String) | |
*/ | |
@Override | |
protected boolean isValidLine(String line) { | |
// "#" から始まる行ならば false を返す | |
return !line.startsWith("#"); | |
} | |
} | |
@Test | |
public void カラムのフォーマットが一致しない行を読み飛ばすLineIterator() throws Exception { | |
// Setup | |
FileReader reader = new FileReader("testdata/commons_io/lineiteratortest/test2.csv"); | |
// Exercise | |
ColumnFormatCheckLineIterator sut = new ColumnFormatCheckLineIterator(reader, 3); | |
// Verify | |
String actual = sut.nextLine(); | |
assertThat("1行目は取得できること", actual, is("1, aaa, あああ, 2013/08/01 10:00:00")); | |
actual = sut.nextLine(); | |
assertThat("2行目は読み飛ばされ、3行目は取得できること", actual, is("3, ccc, さささ, 2013/08/03 12:00:00")); | |
actual = sut.nextLine(); | |
assertThat("4、5行目は読み飛ばされ、6行目は取得できること", actual, is("6, fff, ははは, 2013/08/06 15:00:00")); | |
} | |
/** | |
* カラムのフォーマットが正しくない行を読み飛ばす LineIterator クラス。 | |
* | |
* @author tomoyamkung | |
* | |
*/ | |
class ColumnFormatCheckLineIterator extends LineIterator { | |
private int checkColumnPosition; | |
/** | |
* コンストラクタ。 | |
* | |
* @param reader | |
* @param checkColumnPosition フォーマットを確認するカラム位置(ゼロ始まり) | |
* @throws IllegalArgumentException | |
*/ | |
public ColumnFormatCheckLineIterator(Reader reader, int checkColumnPosition) | |
throws IllegalArgumentException { | |
super(reader); | |
this.checkColumnPosition = checkColumnPosition; | |
} | |
/* (non-Javadoc) | |
* @see org.apache.commons.io.LineIterator#isValidLine(java.lang.String) | |
*/ | |
@Override | |
protected boolean isValidLine(String line) { | |
// 4カラム目のフォーマットが "yyyy/MM/dd HH:mm:ss" になっていれば true | |
String column = line.split(",")[checkColumnPosition].trim(); | |
return column.matches("\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2}"); | |
} | |
} | |
} |
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 1.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# "#" から始まる行はコメント扱い | |
1, aaa, あああ, 2013/08/01 10:00:00 | |
2, bbb, かかか, 2013/08/02 11:00:00 | |
# 3, ccc, さささ, 2013/08/03 12:00:00 | |
4, ddd, たたた, 2013/08/04 13:00:00 | |
## 5, eee, ななな, 2013/08/05 14:00:00 | |
6, fff, ははは, 2013/08/06 15:00:00 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 | aaa | あああ | 2013/08/01 10:00:00 | |
---|---|---|---|---|
2 | bbb | かかか | 2013-08-02 11:00:00 | |
3 | ccc | さささ | 2013/08/03 12:00:00 | |
4 | ddd | たたた | 2013/08/04 13:00:000 | |
5 | eee | ななな | 2013/08/05 | |
6 | fff | ははは | 2013/08/06 15:00:00 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment