Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# hanging executor on Spark 2.1.0 and Python 2.7
from pyspark import SparkContext
class BadEncodedException(Exception):
def __init__(self, reason):
self.msg = str(reason)
super(BadEncodedException, self).__init__(self.msg)
def emit_word(word):
if word == 'foo':
raise(BadEncodedException('foo <[\x8b\x8e\x8b]>'))
return word, 1
sc = SparkContext()
rdd = sc.parallelize(['abc', 'foo', 'def'])
output = rdd.map(emit_word).reduceByKey(lambda x, y: x + y).collect()
for (word, count) in output:
print("%s: %i" % (word, count))
sc.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment