Skip to content

Instantly share code, notes, and snippets.

Created September 2, 2015 20:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/b02a82401a30a69f1cff to your computer and use it in GitHub Desktop.
Save anonymous/b02a82401a30a69f1cff to your computer and use it in GitHub Desktop.
Exception with BloomFilterJoinStrategy
org.apache.crunch.CrunchRuntimeException: Can't find local cache file for '/tmp/crunch-1758064214/p1'
at org.apache.crunch.io.impl.ReadableDataImpl.getCacheFilePath(ReadableDataImpl.java:81)
at org.apache.crunch.io.impl.ReadableDataImpl.access$000(ReadableDataImpl.java:42)
at org.apache.crunch.io.impl.ReadableDataImpl$1.apply(ReadableDataImpl.java:93)
at org.apache.crunch.io.impl.ReadableDataImpl$1.apply(ReadableDataImpl.java:90)
at com.google.common.collect.Lists$TransformingRandomAccessList.get(Lists.java:451)
at java.util.AbstractList$Itr.next(AbstractList.java:358)
at com.google.common.collect.Iterables$3.next(Iterables.java:508)
at com.google.common.collect.Iterables$3.next(Iterables.java:501)
at com.google.common.collect.Iterators$5.hasNext(Iterators.java:544)
at org.apache.crunch.lib.join.BloomFilterJoinStrategy$FilterKeysWithBloomFilterFn.initialize(BloomFilterJoinStrategy.java:233)
at org.apache.crunch.impl.spark.SparkRuntimeContext.initialize(SparkRuntimeContext.java:86)
at org.apache.crunch.impl.spark.fn.FlatMapIndexFn.call(FlatMapIndexFn.java:46)
at org.apache.crunch.impl.spark.fn.FlatMapIndexFn.call(FlatMapIndexFn.java:33)
at org.apache.spark.api.java.JavaPairRDD$$anonfun$toScalaFunction2$1.apply(JavaPairRDD.scala:996)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:90)
at org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:90)
at org.apache.spark.rdd.RDD$$anonfun$15.apply(RDD.scala:647)
at org.apache.spark.rdd.RDD$$anonfun$15.apply(RDD.scala:647)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:87)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:244)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:64)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment