Created
March 23, 2013 08:43
-
-
Save royseto/5227039 to your computer and use it in GitHub Desktop.
Example output for Cascalog hfs-delimited with :strict? false
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns example.core | |
(use [cascalog.api] | |
[cascalog.more-taps :only (hfs-delimited)]) | |
(:gen-class)) | |
(defn -main [in & args] | |
(?<- (stdout) | |
[?a ?b] | |
((hfs-delimited in :delimiter "|" :strict? false) ?a ?b))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
a | b | |
c | d | |
e | f | g |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defproject textdelimitednonstrict "0.1.0-SNAPSHOT" | |
:description "Example for text-delimited with strict turned off" | |
:uberjar-name "textdelimitednonstrict.jar" | |
:aot [example.core] | |
:main example.core | |
:dependencies [[org.clojure/clojure "1.4.0"] | |
[cascalog "1.10.0"] | |
[cascalog-more-taps "0.3.0"]] | |
:profiles {:provided {:dependencies [[org.apache.hadoop/hadoop-core "0.20.2-dev"]]}}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ hadoop jar target/textdelimitednonstrict.jar data/input/input.txt | |
13/03/23 01:37:46 INFO util.HadoopUtil: resolving application jar from found main method on: example.core | |
13/03/23 01:37:46 INFO planner.HadoopPlanner: using application jar: /Users/royseto/code/text-delimited-nonstrict/target/textdelimitednonstrict.jar | |
13/03/23 01:37:46 INFO property.AppProps: using app.id: F36F81B58D0E20318934FB0F1FD8C9F3 | |
13/03/23 01:37:46 INFO util.Version: Concurrent, Inc - Cascading 2.0.0 | |
13/03/23 01:37:46 INFO flow.Flow: [] starting | |
13/03/23 01:37:46 INFO flow.Flow: [] source: Hfs["TextDelimited[[UNKNOWN]->[ALL]]"]["data/input/input.txt"]"] | |
13/03/23 01:37:46 INFO flow.Flow: [] sink: StdoutTap["SequenceFile[[UNKNOWN]->['?a', '?b']]"]["/var/folders/z9/w5by_fyj6nl7hhq2ch2hw18r0000gn/T/temp92114624513592247321364027866242401000"]"] | |
13/03/23 01:37:46 INFO flow.Flow: [] parallel execution is enabled: false | |
13/03/23 01:37:46 INFO flow.Flow: [] starting jobs: 1 | |
13/03/23 01:37:46 INFO flow.Flow: [] allocating threads: 1 | |
13/03/23 01:37:46 INFO flow.FlowStep: [] starting step: (1/1) ...2247321364027866242401000 | |
13/03/23 01:37:46 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
13/03/23 01:37:47 WARN snappy.LoadSnappy: Snappy native library not loaded | |
13/03/23 01:37:47 INFO mapred.FileInputFormat: Total input paths to process : 1 | |
13/03/23 01:37:47 INFO flow.FlowStep: [] submitted hadoop job: job_local_0001 | |
13/03/23 01:37:47 INFO mapred.Task: Using ResourceCalculatorPlugin : null | |
13/03/23 01:37:47 INFO hadoop.TupleSerialization: using default comparator: cascalog.hadoop.DefaultComparator | |
13/03/23 01:37:47 INFO io.MultiInputSplit: current split input path: file:/Users/royseto/code/text-delimited-nonstrict/data/input/input.txt | |
13/03/23 01:37:47 INFO mapred.MapTask: numReduceTasks: 0 | |
13/03/23 01:37:47 INFO hadoop.TupleSerialization: using default comparator: cascalog.hadoop.DefaultComparator | |
13/03/23 01:37:47 INFO hadoop.FlowMapper: sourcing from: Hfs["TextDelimited[[UNKNOWN]->[ALL]]"]["data/input/input.txt"]"] | |
13/03/23 01:37:47 INFO hadoop.FlowMapper: sinking to: StdoutTap["SequenceFile[[UNKNOWN]->['?a', '?b']]"]["/var/folders/z9/w5by_fyj6nl7hhq2ch2hw18r0000gn/T/temp92114624513592247321364027866242401000"]"] | |
13/03/23 01:37:47 ERROR stream.TrapHandler: caught Throwable, no trap available, rethrowing | |
cascading.tuple.TupleException: operation added the wrong number of fields, expected: ['?a', '?b'], got result size: 3 | |
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:82) | |
at cascading.operation.Identity.operate(Identity.java:110) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:86) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:38) | |
at cascading.flow.stream.SourceStage.map(SourceStage.java:102) | |
at cascading.flow.stream.SourceStage.run(SourceStage.java:58) | |
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:124) | |
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436) | |
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372) | |
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) | |
13/03/23 01:37:47 ERROR stream.SourceStage: caught throwable | |
cascading.tuple.TupleException: operation added the wrong number of fields, expected: ['?a', '?b'], got result size: 3 | |
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:82) | |
at cascading.operation.Identity.operate(Identity.java:110) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:86) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:38) | |
at cascading.flow.stream.SourceStage.map(SourceStage.java:102) | |
at cascading.flow.stream.SourceStage.run(SourceStage.java:58) | |
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:124) | |
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436) | |
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372) | |
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) | |
13/03/23 01:37:47 WARN mapred.LocalJobRunner: job_local_0001 | |
cascading.tuple.TupleException: operation added the wrong number of fields, expected: ['?a', '?b'], got result size: 3 | |
at cascading.tuple.TupleEntryCollector.add(TupleEntryCollector.java:82) | |
at cascading.operation.Identity.operate(Identity.java:110) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:86) | |
at cascading.flow.stream.FunctionEachStage.receive(FunctionEachStage.java:38) | |
at cascading.flow.stream.SourceStage.map(SourceStage.java:102) | |
at cascading.flow.stream.SourceStage.run(SourceStage.java:58) | |
at cascading.flow.hadoop.FlowMapper.run(FlowMapper.java:124) | |
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:436) | |
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:372) | |
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:212) | |
13/03/23 01:37:47 WARN flow.FlowStep: [] task completion events identify failed tasks | |
13/03/23 01:37:47 WARN flow.FlowStep: [] task completion events count: 0 | |
13/03/23 01:37:47 INFO flow.Flow: [] stopping all jobs | |
13/03/23 01:37:47 INFO flow.FlowStep: [] stopping: (1/1) ...2247321364027866242401000 | |
13/03/23 01:37:47 INFO flow.Flow: [] stopped all jobs | |
13/03/23 01:37:47 INFO util.Hadoop18TapUtil: deleting temp path /var/folders/z9/w5by_fyj6nl7hhq2ch2hw18r0000gn/T/temp92114624513592247321364027866242401000/_temporary | |
Exception in thread "main" cascading.flow.FlowException: local step failed | |
at cascading.flow.planner.FlowStepJob.blockOnJob(FlowStepJob.java:191) | |
at cascading.flow.planner.FlowStepJob.start(FlowStepJob.java:137) | |
at cascading.flow.planner.FlowStepJob.call(FlowStepJob.java:122) | |
at cascading.flow.planner.FlowStepJob.call(FlowStepJob.java:42) | |
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303) | |
at java.util.concurrent.FutureTask.run(FutureTask.java:138) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:895) | |
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:918) | |
at java.lang.Thread.run(Thread.java:680) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment