Skip to content

Instantly share code, notes, and snippets.

@eddies
Created November 4, 2015 07:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eddies/86ae0961813b041175e9 to your computer and use it in GitHub Desktop.
Save eddies/86ae0961813b041175e9 to your computer and use it in GitHub Desktop.
py.test sanity test of spark
import pytest
def test_spark_sanity_check(sc, tmpdir):
csv = tmpdir.mkdir(__name__).join('one_liner.csv')
csv.write("foo,bar,baz")
data = sc.textFile(str(csv))
line_count = data.count()
assert line_count == 1
@pytest.fixture(scope='session')
def sc(request):
import sys
import os
try:
sys.path.append(os.path.join(os.environ['SPARK_HOME'], "python"))
sys.path.append(os.path.join(os.environ['SPARK_HOME'], "python", "lib",
"py4j-0.8.2.1-src.zip"))
except KeyError:
print("SPARK_HOME not set")
sys.exit(1)
try:
from pyspark import SparkContext
from pyspark import SparkConf
except ImportError as e:
print("Error importing Spark modules", e)
sys.exit(1)
conf = SparkConf().setAppName(__name__)
sc = SparkContext(conf=conf)
def teardown():
if sc:
sc.stop()
request.addfinalizer(teardown)
return sc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment