Skip to content

Instantly share code, notes, and snippets.

val res = spark
.range(1000L * 1000 * 1000)
.filter(x => x % 2 == 0) // note that the condition changed
.select(count(col("id")))
.first()
.getAs[Long](0)
val res = spark.range(1000L * 1000 * 1000)
.rdd
.filter(_ %2 == 0)
.count()
public Object generate(Object[] references) {
return new GeneratedIteratorForCodegenStage1(references);
}
/*wsc_codegenStageId*/
final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
private Object[] references;
private scala.collection.Iterator[] inputs;
private boolean agg_initAgg_0;
private boolean agg_bufIsNull_0;
def pseudo_rdd_count(rdd: RDD[T]): Long = {
val iter = rdd.compute
var result = 0
while (iter.hasNext) result += 1
result
}
abstract class RDD[T: ClassTag]
def compute(…): Iterator[T]
val res = spark.range(1000L * 1000 * 1000)
.filter(col("id") % 2 === 0)
.select(count(col("id")))
.first().getAs[Long](0)
val res = spark.sparkContext
.range(0L, 1000L * 1000 * 1000)
.filter(_ % 2 == 0)
.count()
@vzayaz
vzayaz / CountNaively.scala
Last active September 6, 2020 20:39
Count naively
var res: Long = 0L
var i: Long = 0L
while (i < 1000L * 1000 * 1000) {
if (i % 2 == 0) res += 1
i += 1L
}