Last active
November 8, 2022 08:11
-
-
Save Shinichi-Nakagawa/01b1e1583cb8ba0c2e24abe2b77cdfac to your computer and use it in GitHub Desktop.
PyCon JP 2022資料用スニペット
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
5. BigQuery読込 | |
""" | |
from pyspark.sql import SparkSession | |
from pyspark.sql import DataFrame as SparkDataFrame | |
from pyspark.sql.utils import AnalysisException | |
# ここは同じ | |
spark: SparkSession = SparkSession \ | |
.builder \ | |
.appName('your app')\ | |
.config('spark.jars', 'gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.25.2.jar') \ | |
.config('spark.sql.debug.maxToStringFields', 2000) \ | |
.getOrCreate() | |
spark.conf.set('temporaryGcsBucket', GCS_BUCKET) | |
# ここでView読み取りを有効化しないとエラーになるので注意 | |
spark.conf.set("viewsEnabled", "true") | |
def read_bq() -> SparkDataFrame: | |
""" | |
Read Dashboard data | |
BigQuery View to SparkDataFrame | |
""" | |
try: | |
df: SparkDataFrame = spark.read.format('bigquery') \ | |
.option('project', 'your project') \ | |
.option('table', f'your_project.view_baseball') \ | |
.load() | |
return df | |
except AnalysisException: | |
return None # type: ignore | |
sdf: SparkDataFrame = read_bq() | |
""" | |
6. GCS保存 | |
""" | |
from pyspark.sql import DataFrame as SparkDataFrame | |
def save_json(sdf: SparkDataFrame) -> None: | |
""" | |
Save as JSON dataset | |
SparkDataFrame to GCS Bucket | |
""" | |
sdf.write \ | |
.format("json") \ | |
.mode("overwrite") \ | |
.option("path", "gs://your-gcs-bucket/filepath/hoge") \ | |
.save() | |
save_json(sdf) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment