Skip to content

Instantly share code, notes, and snippets.

@dgadiraju
Created August 21, 2018 00:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgadiraju/7817fdd69bf8b4f15fe3225c5d8897bf to your computer and use it in GitHub Desktop.
Save dgadiraju/7817fdd69bf8b4f15fe3225c5d8897bf to your computer and use it in GitHub Desktop.
from pyspark.sql import SparkSession
spark = SparkSession. \
builder. \
master('local'). \
appName('Create Dataframe over JDBC'). \
getOrCreate()
orders = spark.read. \
format('jdbc'). \
option('url', 'jdbc:mysql://ms.itversity.com'). \
option('dbtable', 'retail_db.orders'). \
option('user', 'retail_user'). \
option('password', 'itversity'). \
load()
orders.show()
orderItems = spark.read. \
jdbc("jdbc:mysql://ms.itversity.com", "retail_db.order_items",
properties={"user": "retail_user",
"password": "itversity",
"numPartitions": "4",
"partitionColumn": "order_item_order_id",
"lowerBound": "10000",
"upperBound": "20000"})
orderItems.write.json('/user/training/bootcamp/pyspark/orderItemsJDBC')
query = "(select order_status, count(1) from retail_db.orders group by order_status) t"
queryData = spark.read. \
jdbc("jdbc:mysql://ms.itversity.com", query,
properties={"user": "retail_user",
"password": "itversity"})
queryData.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment