Skip to content

Instantly share code, notes, and snippets.

@sebastian-nagel
Created April 27, 2017 16:01
Show Gist options
  • Save sebastian-nagel/310a5a5f39cc668fb71b6ace208706f7 to your computer and use it in GitHub Desktop.
Save sebastian-nagel/310a5a5f39cc668fb71b6ace208706f7 to your computer and use it in GitHub Desktop.
# hanging executor on Spark 2.1.0 and Python 2.7
from pyspark import SparkContext
class BadEncodedException(Exception):
def __init__(self, reason):
self.msg = str(reason)
super(BadEncodedException, self).__init__(self.msg)
def emit_word(word):
if word == 'foo':
raise(BadEncodedException('foo <[\x8b\x8e\x8b]>'))
return word, 1
sc = SparkContext()
rdd = sc.parallelize(['abc', 'foo', 'def'])
output = rdd.map(emit_word).reduceByKey(lambda x, y: x + y).collect()
for (word, count) in output:
print("%s: %i" % (word, count))
sc.stop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment