Skip to content

Instantly share code, notes, and snippets.

@yatsu
Last active August 29, 2015 14:22
Show Gist options
  • Save yatsu/19249664d2bef73ad0e5 to your computer and use it in GitHub Desktop.
Save yatsu/19249664d2bef73ad0e5 to your computer and use it in GitHub Desktop.
from glob import glob
import os
import sys
os.environ['SPARK_HOME'] = 'C:\\Users\\MyName\\Documents\\spark-1.4.0-bin-hadoop2.6'
spark_home = os.environ.get('SPARK_HOME')
pyspark_path = os.path.join(spark_home, 'python')
sys.path.insert(0, pyspark_path)
for z in glob(os.path.join(pyspark_path, 'lib', 'py4j*.zip')):
sys.path.insert(0, z)
os.environ['SPARK_SCALA_VERSION'] = '2.10'
import pyspark.shell as ps
sc = ps.sc
sqlCtx = ps.sqlCtx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment