Skip to content

Instantly share code, notes, and snippets.

PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.25 S
2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Thayer) female 38.0 1 0 PC 17599 71.2833 C85 C
3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.925 S
4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1 C123 S
5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.05 S
6 0 3 Moran, Mr. James male 0 0 330877 8.4583 Q
7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.075 S
9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 S
from pyspark.sql.types import StructType
# Create an empty DataFrame with empty schema
schema = StructType([])
spark.createDataFrame(spark.sparkContext.emptyRDD(), schema)
# Create a small dataset with SparkContext
data = ["Owen", 22]
rdd = spark.sparkContext.parallelize(data)
df = spark.createDataFrame(rdd, ["name", "age"])
# Create a small dataset with SparkContext
data = ["Owen", 22}]
rdd = spark.sparkContext.parallelize(data)
df = spark.createDataFrame(rdd, ["name", "age"])
# Implementation with Pandas
import pandas as pd
# Read in the cash flows data and rate data as csv
cashflow_df = pd.read_csv(path_cashflow)
rate_df = pd.read_csv(path_rate)
# Calculate discount factor from the rates
rate_df["Discount factor"] = 1 / (1 + rate_df["Interest rate"])**rate_df["Year"]
import logging
import os
import azure.functions as func
import json
import stripe
# This is your real test secret API key.
stripe.api_key = os.environ["STRIPE_API_KEY"]
import React, { useState, useEffect } from "react";
import {
CardElement,
useStripe,
useElements
} from "@stripe/react-stripe-js";
export default function CheckoutForm() {
const [succeeded, setSucceeded] = useState(false);
const [error, setError] = useState(null);
# Path on gist
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# Read from local
df = spark.read.csv("titanic_dataset.csv", header=True, inferSchema=True)
# Read from url
# One more step required to add the url into file
spark.sparkContext.addFile(path)
df = spark.read.csv(SparkFiles.get("titanic_dataset.csv"), header=True, inferSchema=True)
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# read in the csv file
df = spark.read.format('csv').load(SparkFiles.get("titanic_dataset.csv"), header=True, inferSchema=True)
# One can read in data from csv/partquet/json... if the path is linked to a parquet or json file
df = spark.read.format('json').load(SparkFiles.get("titanic_dataset.json"), header=True, inferSchema=True)
df = spark.read.format('parquet').load(SparkFiles.get("titanic_dataset.parquet"), header=True, inferSchema=True)
# Read data from a pandas dataframe
path = "https://gist.githubusercontent.com/fyyying/4aa5b471860321d7b47fd881898162b7/raw/e8606de9a82e13ca6215b340ce260dad60469cba/titanic_dataset.csv"
# Be careful the object type in pandas can not be understood
# Explicitly change to string type
pd_df = pd.read_csv(path)
df = spark.createDataFrame(pd_df)