Skip to content

Instantly share code, notes, and snippets.

View mateiz's full-sized avatar

Matei Zaharia mateiz

View GitHub Profile
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 06b1544..e9a66bd 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -15,6 +15,7 @@
# limitations under the License.
#
+import gc
import os
// BEFORE AppendOnlyMap change: ////
matei@mbp:~/workspace/spark-perf$ tail -f `ls -t | head -1`
scala-agg-by-key, --num-trials=8 --inter-trial-wait=2 --num-partitions=32 --reduce-tasks=32 --random-seed=5 --persistent-type=memory --num-records=16000000 --unique-keys=1600 --key-length=10 --unique-values=80000 --value-length=10, 1.188, 0.113, 1.16, 1.188, 1.160
scala-agg-by-key, --num-trials=8 --inter-trial-wait=2 --num-partitions=32 --reduce-tasks=32 --random-seed=5 --persistent-type=memory --num-records=16000000 --unique-keys=240000 --key-length=10 --unique-values=80000 --value-length=10, 4.629, 0.151, 4.583, 4.713, 4.583
scala-agg-by-key-int, --num-trials=8 --inter-trial-wait=2 --num-partitions=64 --reduce-tasks=64 --random-seed=5 --persistent-type=memory --num-records=32000000 --unique-keys=3200 --key-length=10 --unique-values=160000 --value-length=10, 2.12, 0.085, 2.027, 2.120, 2.134
scala-agg-by-key-int, --num-trials=8 --inter-trial-wait=2 --num-partitions=64 --reduce-tasks=64 --random-seed=5 --persis