https://github.com/hata/embulk-decoder-commons-compress
ls -1 sample.*
sample.tar.bz2
sample.tgz
sample.zip
in:
type: file
path_prefix: "/private/tmp/hoge4/sample"
decoders:
- type: commons-compress
out:
type: stdout
hsato-mbp:hoge4 hsato$ cat config.yml
in:
type: file
path_prefix: /private/tmp/hoge4/sample
decoders:
- {type: commons-compress}
parser:
charset: UTF-8
newline: CRLF
type: csv
delimiter: ','
quote: '"'
null_string: 'NULL'
trim_if_not_quoted: false
skip_header_lines: 1
allow_extra_columns: false
allow_optional_columns: false
columns:
- {name: c0, type: long}
- {name: c1, type: long}
- {name: c2, type: timestamp, format: '%Y-%m-%d %H:%M:%S'}
- {name: c3, type: timestamp, format: '%Y%m%d'}
- {name: c4, type: string}
out: {type: stdout}
embulk run config.yml
2015-09-29 12:58:52.302 +0900: Embulk v0.7.4
2015-09-29 12:58:54.957 +0900 [INFO] (transaction): Loaded plugin embulk-decoder-commons-compress (0.3.0)
2015-09-29 12:58:54.987 +0900 [INFO] (transaction): Listing local files at directory '/private/tmp/hoge4' filtering filename by prefix 'sample'
2015-09-29 12:58:54.996 +0900 [INFO] (transaction): Loading files [/private/tmp/hoge4/sample.tar.bz2, /private/tmp/hoge4/sample.tgz, /private/tmp/hoge4/sample.zip]
2015-09-29 12:58:55.093 +0900 [INFO] (transaction): {done: 0 / 3, running: 0}
2015-09-29 12:58:55.392 +0900 [WARN] (task-0001): Skipped line 6 (Too few columns): ./csv/sample_02.csv000644 000766 000000 00000000355 12602404421 014560 0ustar00hsatowheel000000 000000 id,account,time,purchase,comment
2015-09-29 12:58:55.392 +0900 [WARN] (task-0000): Skipped line 6 (Too few columns): ./csv/sample_02.csv000644 000766 000000 00000000355 12602404421 014560 0ustar00hsatowheel000000 000000 id,account,time,purchase,comment
2015-09-29 12:58:55.402 +0900 [WARN] (task-0002): Skipped line 6 (java.lang.NumberFormatException: For input string: "id"): id,account,time,purchase,comment
2015-09-29 12:58:55.406 +0900 [WARN] (task-0000): Skipped line 11 (Too few columns):
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
2015-09-29 12:58:55.415 +0900 [INFO] (transaction): {done: 1 / 3, running: 2}
2015-09-29 12:58:55.416 +0900 [WARN] (task-0001): Skipped line 11 (Too few columns):
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
1,32864,2015-01-27 19:23:49,20150127,embulk
2015-09-29 12:58:55.420 +0900 [INFO] (transaction): {done: 2 / 3, running: 1}
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,Embulk "csv" parser plugin
4,11270,2015-01-29 11:54:36,20150129,
2015-09-29 12:58:55.421 +0900 [INFO] (transaction): {done: 3 / 3, running: 0}
2015-09-29 12:58:55.427 +0900 [INFO] (main): Committed.
2015-09-29 12:58:55.427 +0900 [INFO] (main): Next config diff: {"in":{"last_path":"/private/tmp/hoge4/sample.zip"},"out":{}}