Skip to content

Instantly share code, notes, and snippets.

from pyspark.sql.types import StructType
# Create an empty DataFrame with empty schema
schema = StructType([])
spark.createDataFrame(spark.sparkContext.emptyRDD(), schema)
# Create a small dataset with SparkContext
data = ["Owen", 22]
rdd = spark.sparkContext.parallelize(data)
df = spark.createDataFrame(rdd, ["name", "age"])
# Create a small dataset with SparkContext
data = ["Owen", 22}]
rdd = spark.sparkContext.parallelize(data)
df = spark.createDataFrame(rdd, ["name", "age"])
import logging
import os
import azure.functions as func
import json
import stripe
# This is your real test secret API key.
stripe.api_key = os.environ["STRIPE_API_KEY"]
import React, { useState, useEffect } from "react";
import {
CardElement,
useStripe,
useElements
} from "@stripe/react-stripe-js";
export default function CheckoutForm() {
const [succeeded, setSucceeded] = useState(false);
const [error, setError] = useState(null);
# Read data from a pandas dataframe
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# Be careful the object type in pandas can not be understood
# Explicitly change to string type
pd_df = pd.read_csv(path)
df = spark.createDataFrame(pd_df)
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.25 S
2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38.0 1 0 PC 17599 71.2833 C85 C
3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.925 S
4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1 C123 S
5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.05 S
summary Survived SibSp Pclass Parch Fare
count 891.0 891.0 891.0 891.0 891.0
mean 0.384 0.523 2.309 0.382 32.204
stddev 0.487 1.103 0.836 0.806 49.693
min 0.0 0.0 1.0 0.0 0.0
max 1.0 8.0 3.0 6.0 512.329
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# read in the csv file
df = spark.read.format('csv').load(SparkFiles.get("titanic_dataset.csv"), header=True, inferSchema=True)
# One can read in data from csv/partquet/json... if the path is linked to a parquet or json file
df = spark.read.format('json').load(SparkFiles.get("titanic_dataset.json"), header=True, inferSchema=True)
df = spark.read.format('parquet').load(SparkFiles.get("titanic_dataset.parquet"), header=True, inferSchema=True)
# Path on gist
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# Read from local
df = spark.read.csv("titanic_dataset.csv", header=True, inferSchema=True)
# Read from url
# One more step required to add the url into file
spark.sparkContext.addFile(path)
df = spark.read.csv(SparkFiles.get("titanic_dataset.csv"), header=True, inferSchema=True)