Skip to content

Instantly share code, notes, and snippets.

@velotiotech
Created May 22, 2024 10:33
Show Gist options
  • Save velotiotech/2ab591115d9486d74bfee0096d93f869 to your computer and use it in GitHub Desktop.
Save velotiotech/2ab591115d9486d74bfee0096d93f869 to your computer and use it in GitHub Desktop.
AWS_ACCESS_KEY_ID = "XXXXXXXXXXX"
AWS_SECRET_ACCESS_KEY = "XXXXXXXXXXX+XXXXXXXXXXX"
spark_jars_packages = "com.amazonaws:aws-java-sdk:1.12.246,org.apache.hadoop:hadoop-aws:3.2.2,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0"
spark = pyspark.sql.SparkSession.builder \
.config("spark.jars.packages", spark_jars_packages) \
.config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") \
.config("spark.sql.catalog.demo", "org.apache.iceberg.spark.SparkCatalog") \
.config("spark.sql.catalog.demo.warehouse", "s3a://abhishek-test-01012023/iceberg-sample-data/") \
.config('spark.sql.catalog.demo.type', 'hadoop') \
.config('spark.hadoop.fs.s3a.aws.credentials.provider', 'org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider') \
.config("spark.driver.memory", "20g") \
.config("spark.memory.offHeap.enabled", "true") \
.config("spark.memory.offHeap.size", "8g") \
.getOrCreate()
spark.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.access.key", AWS_ACCESS_KEY_ID)
spark.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.secret.key", AWS_SECRET_ACCESS_KEY)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment