Skip to content

Instantly share code, notes, and snippets.

@RajatGoyal
Last active January 11, 2017 14:17
Show Gist options
  • Save RajatGoyal/a175da772d7e6158ccb13782a49eb15b to your computer and use it in GitHub Desktop.
Save RajatGoyal/a175da772d7e6158ccb13782a49eb15b to your computer and use it in GitHub Desktop.
import argparse
import os
import sys
SPARK_HOME = '/usr/share/dse/spark'
import findspark
findspark.init(spark_home=SPARK_HOME)
import pyspark
from pyspark import SparkContext
from pyspark.sql import SQLContext
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--table", help="table name")
parser.add_argument("--keyspace", help="keyspace")
parser.add_argument("--sql", help="sql")
args = parser.parse_args()
if args.table:
table_name = args.table
if args.keyspace:
keyspace = args.keyspace
table = sys.argv[2]
keyspace = sys.argv[1]
SparkContext.setSystemProperty('spark.executor.cores', '1')
SparkContext.setSystemProperty('spark.executor.memory', '1g')
sc = SparkContext(master='spark://172.31.23.27:7077', appName="cassandra_testing", sparkHome=SPARK_HOME)
print sc
sqlContext = SQLContext(sc)
print sqlContext
a = sqlContext.read.format("org.apache.spark.sql.cassandra").options(table=table_name, keyspace=keyspace).load()
new_table_name = keyspace+"__"+table_name
a.registerTempTable(new_table_name)
print sqlContext.sql("select count(*) from " + new_table_name).collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment