Skip to content

Instantly share code, notes, and snippets.

@smartkiwi
Created February 20, 2016 06:26
Show Gist options
  • Save smartkiwi/2610b575f56007f67de3 to your computer and use it in GitHub Desktop.
Save smartkiwi/2610b575f56007f67de3 to your computer and use it in GitHub Desktop.
Report exceptions to Sentry
import logging
from pyspark import SparkContext, SparkConf, BasicProfiler
from raven import Client
from raven.context import Context
class RavenContextProfiler(BasicProfiler):
raven_client = None
dsn = None
environment = None
def profile(self, func):
with Context(self.get_lazy_raven_client(), tags={"environment": self.environment}):
func()
def get_lazy_raven_client(self):
if not self.raven_client:
self.raven_client = Client(self.dsn)
return self.raven_client
conf = SparkConf().setAppName("RavenContext")
conf.set('spark.ui.showConsoleProgress', False) # It just comes messed up with the rest of the logs
conf.set('spark.shuffle.memoryFraction', '0.6')
conf.set('spark.storage.memoryFraction', '0.3')
conf.set('spark.python.profile', "true")
RavenContextProfiler.dsn = "https://<you id>@app.getsentry.com/34960"
RavenContextProfiler.environment = "dev"
sc = SparkContext(conf=conf, profiler_cls=RavenContextProfiler)
logging.getLogger("py4j").setLevel(logging.ERROR)
def some_mapper_function(e):
raise 1 / 0
return e
sc.parallelize(range(10)).map(some_mapper_function).collect()
@samueltc
Copy link

samueltc commented Apr 7, 2016

To get it work, I needed to use the RequestsHTTPTransport

Thanks for that amazing gist.

@smartkiwi
Copy link
Author

@samueltc I'd be curious to see your version.
I guess the version of sentry I was using for experiment picked some transport.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment