Skip to content

Instantly share code, notes, and snippets.

View zhuchangzhan's full-sized avatar

Zhuchang Zhan zhuchangzhan

View GitHub Profile
@zhuchangzhan
zhuchangzhan / TPCX_Blog_Spark_Code.py
Created August 23, 2021 13:52
TPCX Blog Spark Code
import numpy as np
import pyspark
import pyspark.sql.functions as f
from pyspark.sql import SparkSession
from pyspark.storagelevel import StorageLevel
def tpcx_bb_q26():
spark = SparkSession.builder.appName('ipython').getOrCreate()
store_sales = spark.read.parquet('s3://...')
store_sales.createOrReplaceTempView('store_sales')
@zhuchangzhan
zhuchangzhan / TPCX_Blog_bodo_code.py
Last active August 23, 2021 13:49
TPCX Blog Bodo Code
import bodo
import pandas as pd
import numpy as np
@bodo.jit
def tpcx_bb_q26():
store_sales = pd.read_parquet('s3://...')
item = pd.read_parquet('s3://...')
item2 = item[item['i_category'] == 'Books']
sale_items = pd.merge(